1 /* 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $ 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2014 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2020 Oxide Computer Company 41 */ 42 43 #include <sys/types.h> 44 #include <sys/archsystm.h> 45 #include <sys/cpuset.h> 46 #include <sys/fp.h> 47 #include <sys/kmem.h> 48 #include <sys/queue.h> 49 #include <sys/spl.h> 50 #include <sys/systm.h> 51 #include <sys/ddidmareq.h> 52 #include <sys/id_space.h> 53 #include <sys/psm_defs.h> 54 #include <sys/smp_impldefs.h> 55 #include <sys/modhash.h> 56 #include <sys/hma.h> 57 58 #include <sys/x86_archext.h> 59 60 #include <machine/cpufunc.h> 61 #include <machine/md_var.h> 62 #include <machine/specialreg.h> 63 #include <machine/vmm.h> 64 #include <machine/vmparam.h> 65 #include <sys/vmm_impl.h> 66 #include <sys/kernel.h> 67 68 #include <vm/as.h> 69 #include <vm/seg_kmem.h> 70 71 72 static void vmm_tsc_init(void); 73 74 SET_DECLARE(sysinit_set, struct sysinit); 75 76 void 77 sysinit(void) 78 { 79 struct sysinit **si; 80 81 SET_FOREACH(si, sysinit_set) 82 (*si)->func((*si)->data); 83 } 84 85 void 86 invalidate_cache_all(void) 87 { 88 cpuset_t cpuset; 89 90 kpreempt_disable(); 91 cpuset_all_but(&cpuset, CPU->cpu_id); 92 xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL, 93 CPUSET2BV(cpuset), (xc_func_t)invalidate_cache); 94 invalidate_cache(); 95 kpreempt_enable(); 96 } 97 98 vm_paddr_t 99 vtophys(void *va) 100 { 101 pfn_t pfn; 102 103 /* 104 * Since hat_getpfnum() may block on an htable mutex, this is not at 105 * all safe to run from a critical_enter/kpreempt_disable context. 106 * The FreeBSD analog does not have the same locking constraints, so 107 * close attention must be paid wherever this is called. 108 */ 109 ASSERT(curthread->t_preempt == 0); 110 111 pfn = hat_getpfnum(kas.a_hat, (caddr_t)va); 112 ASSERT(pfn != PFN_INVALID); 113 return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK); 114 } 115 116 int 117 cpusetobj_ffs(const cpuset_t *set) 118 { 119 uint_t large, small; 120 121 /* 122 * Rather than reaching into the cpuset_t ourselves, leave that task to 123 * cpuset_bounds(). The simplicity is worth the extra wasted work to 124 * find the upper bound. 125 */ 126 cpuset_bounds(set, &small, &large); 127 128 if (small == CPUSET_NOTINSET) { 129 /* The FreeBSD version returns 0 if it find nothing */ 130 return (0); 131 } 132 133 ASSERT3U(small, <=, INT_MAX); 134 135 /* Least significant bit index starts at 1 for valid results */ 136 return (small + 1); 137 } 138 139 struct vmm_ptp_item { 140 void *vpi_vaddr; 141 }; 142 static kmutex_t vmm_ptp_lock; 143 144 static mod_hash_t *vmm_ptp_hash; 145 uint_t vmm_ptp_hash_nchains = 16381; 146 uint_t vmm_ptp_hash_size = PAGESIZE; 147 148 static void 149 vmm_ptp_hash_valdtor(mod_hash_val_t val) 150 { 151 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 152 153 kmem_free(i->vpi_vaddr, PAGE_SIZE); 154 kmem_free(i, sizeof (*i)); 155 } 156 157 static void 158 vmm_ptp_init(void) 159 { 160 vmm_ptp_hash = mod_hash_create_ptrhash("vmm_ptp_hash", 161 vmm_ptp_hash_nchains, vmm_ptp_hash_valdtor, vmm_ptp_hash_size); 162 163 VERIFY(vmm_ptp_hash != NULL); 164 } 165 166 static uint_t 167 vmm_ptp_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) 168 { 169 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 170 171 cmn_err(CE_PANIC, "!vmm_ptp_check: hash not empty: %p", i->vpi_vaddr); 172 173 return (MH_WALK_TERMINATE); 174 } 175 176 static void 177 vmm_ptp_cleanup(void) 178 { 179 mod_hash_walk(vmm_ptp_hash, vmm_ptp_check, NULL); 180 mod_hash_destroy_ptrhash(vmm_ptp_hash); 181 } 182 183 /* 184 * The logic in VT-d uses both kernel-virtual and direct-mapped addresses when 185 * freeing PTP pages. Until the consuming code is improved to better track the 186 * pages it allocates, we keep the kernel-virtual addresses to those pages in a 187 * hash table for when they are freed. 188 */ 189 void * 190 vmm_ptp_alloc(void) 191 { 192 void *p; 193 struct vmm_ptp_item *i; 194 195 p = kmem_zalloc(PAGE_SIZE, KM_SLEEP); 196 i = kmem_alloc(sizeof (struct vmm_ptp_item), KM_SLEEP); 197 i->vpi_vaddr = p; 198 199 mutex_enter(&vmm_ptp_lock); 200 VERIFY(mod_hash_insert(vmm_ptp_hash, 201 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0); 202 mutex_exit(&vmm_ptp_lock); 203 204 return (p); 205 } 206 207 void 208 vmm_ptp_free(void *addr) 209 { 210 mutex_enter(&vmm_ptp_lock); 211 VERIFY(mod_hash_destroy(vmm_ptp_hash, 212 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0); 213 mutex_exit(&vmm_ptp_lock); 214 } 215 216 /* Reach into i86pc/os/ddi_impl.c for these */ 217 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int); 218 extern void contig_free(void *, size_t); 219 220 void * 221 vmm_contig_alloc(size_t size) 222 { 223 ddi_dma_attr_t attr = { 224 /* Using fastboot_dma_attr as a guide... */ 225 .dma_attr_version = DMA_ATTR_V0, 226 .dma_attr_addr_lo = 0, 227 .dma_attr_addr_hi = ~0UL, 228 .dma_attr_count_max = 0x00000000FFFFFFFFULL, 229 .dma_attr_align = PAGE_SIZE, 230 .dma_attr_burstsizes = 1, 231 .dma_attr_minxfer = 1, 232 .dma_attr_maxxfer = 0x00000000FFFFFFFFULL, 233 .dma_attr_seg = 0x00000000FFFFFFFFULL, /* any */ 234 .dma_attr_sgllen = 1, 235 .dma_attr_granular = PAGE_SIZE, 236 .dma_attr_flags = 0, 237 }; 238 void *res; 239 240 res = contig_alloc(size, &attr, PAGE_SIZE, 1); 241 if (res != NULL) { 242 bzero(res, size); 243 } 244 245 return (res); 246 } 247 248 void 249 vmm_contig_free(void *addr, size_t size) 250 { 251 contig_free(addr, size); 252 } 253 254 void 255 critical_enter(void) 256 { 257 kpreempt_disable(); 258 } 259 260 void 261 critical_exit(void) 262 { 263 kpreempt_enable(); 264 } 265 266 267 static void 268 vmm_glue_callout_handler(void *arg) 269 { 270 struct callout *c = arg; 271 272 if (callout_active(c)) { 273 /* 274 * Record the handler fire time so that callout_pending() is 275 * able to detect if the callout becomes rescheduled during the 276 * course of the handler. 277 */ 278 c->c_fired = gethrtime(); 279 (c->c_func)(c->c_arg); 280 } 281 } 282 283 void 284 vmm_glue_callout_init(struct callout *c, int mpsafe) 285 { 286 cyc_handler_t hdlr; 287 cyc_time_t when; 288 289 hdlr.cyh_level = CY_LOW_LEVEL; 290 hdlr.cyh_func = vmm_glue_callout_handler; 291 hdlr.cyh_arg = c; 292 when.cyt_when = CY_INFINITY; 293 when.cyt_interval = CY_INFINITY; 294 bzero(c, sizeof (*c)); 295 296 mutex_enter(&cpu_lock); 297 c->c_cyc_id = cyclic_add(&hdlr, &when); 298 mutex_exit(&cpu_lock); 299 } 300 301 void 302 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *), 303 void *arg, int flags) 304 { 305 ASSERT(c->c_cyc_id != CYCLIC_NONE); 306 307 if ((flags & C_ABSOLUTE) == 0) { 308 target += gethrtime(); 309 } 310 311 c->c_func = func; 312 c->c_arg = arg; 313 c->c_target = target; 314 (void) cyclic_reprogram(c->c_cyc_id, target); 315 } 316 317 void 318 vmm_glue_callout_stop(struct callout *c) 319 { 320 ASSERT(c->c_cyc_id != CYCLIC_NONE); 321 322 c->c_target = 0; 323 (void) cyclic_reprogram(c->c_cyc_id, CY_INFINITY); 324 } 325 326 void 327 vmm_glue_callout_drain(struct callout *c) 328 { 329 ASSERT(c->c_cyc_id != CYCLIC_NONE); 330 331 c->c_target = 0; 332 mutex_enter(&cpu_lock); 333 cyclic_remove(c->c_cyc_id); 334 c->c_cyc_id = CYCLIC_NONE; 335 mutex_exit(&cpu_lock); 336 } 337 338 void 339 vmm_glue_callout_localize(struct callout *c) 340 { 341 mutex_enter(&cpu_lock); 342 cyclic_move_here(c->c_cyc_id); 343 mutex_exit(&cpu_lock); 344 } 345 346 /* 347 * Given an interval (in ns) and a frequency (in hz), calculate the number of 348 * "ticks" at that frequency which cover the interval. 349 */ 350 uint64_t 351 hrt_freq_count(hrtime_t interval, uint32_t freq) 352 { 353 ASSERT3S(interval, >=, 0); 354 const uint64_t sec = interval / NANOSEC; 355 const uint64_t nsec = interval % NANOSEC; 356 357 return ((sec * freq) + ((nsec * freq) / NANOSEC)); 358 } 359 360 /* 361 * Given a frequency (in hz) and number of "ticks", calculate the interval 362 * (in ns) which would be covered by those ticks. 363 */ 364 hrtime_t 365 hrt_freq_interval(uint32_t freq, uint64_t count) 366 { 367 const uint64_t sec = count / freq; 368 const uint64_t frac = count % freq; 369 370 return ((NANOSEC * sec) + ((frac * NANOSEC) / freq)); 371 } 372 373 374 uint_t cpu_high; /* Highest arg to CPUID */ 375 uint_t cpu_exthigh; /* Highest arg to extended CPUID */ 376 uint_t cpu_id; /* Stepping ID */ 377 char cpu_vendor[20]; /* CPU Origin code */ 378 379 static void 380 vmm_cpuid_init(void) 381 { 382 uint_t regs[4]; 383 384 do_cpuid(0, regs); 385 cpu_high = regs[0]; 386 ((uint_t *)&cpu_vendor)[0] = regs[1]; 387 ((uint_t *)&cpu_vendor)[1] = regs[3]; 388 ((uint_t *)&cpu_vendor)[2] = regs[2]; 389 cpu_vendor[12] = '\0'; 390 391 do_cpuid(1, regs); 392 cpu_id = regs[0]; 393 394 do_cpuid(0x80000000, regs); 395 cpu_exthigh = regs[0]; 396 } 397 398 void 399 vmm_sol_glue_init(void) 400 { 401 vmm_ptp_init(); 402 vmm_cpuid_init(); 403 vmm_tsc_init(); 404 } 405 406 void 407 vmm_sol_glue_cleanup(void) 408 { 409 vmm_ptp_cleanup(); 410 } 411 412 413 /* From FreeBSD's sys/kern/subr_clock.c */ 414 415 /*- 416 * Copyright (c) 1988 University of Utah. 417 * Copyright (c) 1982, 1990, 1993 418 * The Regents of the University of California. All rights reserved. 419 * 420 * This code is derived from software contributed to Berkeley by 421 * the Systems Programming Group of the University of Utah Computer 422 * Science Department. 423 * 424 * Redistribution and use in source and binary forms, with or without 425 * modification, are permitted provided that the following conditions 426 * are met: 427 * 1. Redistributions of source code must retain the above copyright 428 * notice, this list of conditions and the following disclaimer. 429 * 2. Redistributions in binary form must reproduce the above copyright 430 * notice, this list of conditions and the following disclaimer in the 431 * documentation and/or other materials provided with the distribution. 432 * 4. Neither the name of the University nor the names of its contributors 433 * may be used to endorse or promote products derived from this software 434 * without specific prior written permission. 435 * 436 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 437 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 438 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 439 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 440 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 441 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 442 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 443 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 444 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 445 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 446 * SUCH DAMAGE. 447 * 448 * from: Utah $Hdr: clock.c 1.18 91/01/21$ 449 * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 450 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp 451 * and 452 * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 453 */ 454 455 #include <sys/clock.h> 456 457 /* 458 * Generic routines to convert between a POSIX date 459 * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec 460 * Derived from NetBSD arch/hp300/hp300/clock.c 461 */ 462 463 #define FEBRUARY 2 464 #define days_in_year(y) (leapyear(y) ? 366 : 365) 465 #define days_in_month(y, m) \ 466 (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) 467 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ 468 #define day_of_week(days) (((days) + 4) % 7) 469 470 static const int month_days[12] = { 471 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 472 }; 473 474 475 /* 476 * This inline avoids some unnecessary modulo operations 477 * as compared with the usual macro: 478 * ( ((year % 4) == 0 && 479 * (year % 100) != 0) || 480 * ((year % 400) == 0) ) 481 * It is otherwise equivalent. 482 */ 483 static int 484 leapyear(int year) 485 { 486 int rv = 0; 487 488 if ((year & 3) == 0) { 489 rv = 1; 490 if ((year % 100) == 0) { 491 rv = 0; 492 if ((year % 400) == 0) 493 rv = 1; 494 } 495 } 496 return (rv); 497 } 498 499 int 500 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) 501 { 502 int i, year, days; 503 504 year = ct->year; 505 506 #ifdef __FreeBSD__ 507 if (ct_debug) { 508 printf("ct_to_ts("); 509 print_ct(ct); 510 printf(")"); 511 } 512 #endif 513 514 /* Sanity checks. */ 515 if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 || 516 ct->day > days_in_month(year, ct->mon) || 517 ct->hour > 23 || ct->min > 59 || ct->sec > 59 || 518 (sizeof (time_t) == 4 && year > 2037)) { /* time_t overflow */ 519 #ifdef __FreeBSD__ 520 if (ct_debug) 521 printf(" = EINVAL\n"); 522 #endif 523 return (EINVAL); 524 } 525 526 /* 527 * Compute days since start of time 528 * First from years, then from months. 529 */ 530 days = 0; 531 for (i = POSIX_BASE_YEAR; i < year; i++) 532 days += days_in_year(i); 533 534 /* Months */ 535 for (i = 1; i < ct->mon; i++) 536 days += days_in_month(year, i); 537 days += (ct->day - 1); 538 539 ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + 540 ct->sec; 541 ts->tv_nsec = ct->nsec; 542 543 #ifdef __FreeBSD__ 544 if (ct_debug) 545 printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec); 546 #endif 547 return (0); 548 } 549 550 void 551 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct) 552 { 553 int i, year, days; 554 time_t rsec; /* remainder seconds */ 555 time_t secs; 556 557 secs = ts->tv_sec; 558 days = secs / SECDAY; 559 rsec = secs % SECDAY; 560 561 ct->dow = day_of_week(days); 562 563 /* Subtract out whole years, counting them in i. */ 564 for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++) 565 days -= days_in_year(year); 566 ct->year = year; 567 568 /* Subtract out whole months, counting them in i. */ 569 for (i = 1; days >= days_in_month(year, i); i++) 570 days -= days_in_month(year, i); 571 ct->mon = i; 572 573 /* Days are what is left over (+1) from all that. */ 574 ct->day = days + 1; 575 576 /* Hours, minutes, seconds are easy */ 577 ct->hour = rsec / 3600; 578 rsec = rsec % 3600; 579 ct->min = rsec / 60; 580 rsec = rsec % 60; 581 ct->sec = rsec; 582 ct->nsec = ts->tv_nsec; 583 #ifdef __FreeBSD__ 584 if (ct_debug) { 585 printf("ts_to_ct(%ld.%09ld) = ", 586 (long)ts->tv_sec, (long)ts->tv_nsec); 587 print_ct(ct); 588 printf("\n"); 589 } 590 #endif 591 } 592 593 /* Do the host CPU TSCs require offsets be applied for proper sync? */ 594 static bool vmm_host_tsc_offset; 595 596 static void 597 vmm_tsc_init(void) 598 { 599 /* 600 * The timestamp logic will decide if a delta need be applied to the 601 * unscaled hrtime reading (effectively rdtsc), but we do require it be 602 * backed by the TSC itself. 603 */ 604 extern hrtime_t (*gethrtimeunscaledf)(void); 605 extern hrtime_t tsc_gethrtimeunscaled(void); 606 extern hrtime_t tsc_gethrtimeunscaled_delta(void); 607 608 VERIFY(*gethrtimeunscaledf == tsc_gethrtimeunscaled || 609 *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 610 611 /* 612 * If a delta is being applied to the TSC on a per-host-CPU basis, 613 * expose that delta via vmm_host_tsc_delta(). 614 */ 615 vmm_host_tsc_offset = 616 (*gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 617 618 } 619 620 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */ 621 uint64_t 622 rdtsc_offset(void) 623 { 624 return ((uint64_t)gethrtimeunscaledf()); 625 } 626 627 /* 628 * The delta (if any) which needs to be applied to the TSC of this host CPU to 629 * bring it in sync with the others. 630 */ 631 uint64_t 632 vmm_host_tsc_delta(void) 633 { 634 if (vmm_host_tsc_offset) { 635 extern hrtime_t tsc_gethrtime_tick_delta(void); 636 return (tsc_gethrtime_tick_delta()); 637 } else { 638 return (0); 639 } 640 } 641