1 /* 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $ 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2014 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2020 Oxide Computer Company 41 */ 42 43 #include <sys/types.h> 44 #include <sys/archsystm.h> 45 #include <sys/cpuset.h> 46 #include <sys/fp.h> 47 #include <sys/kmem.h> 48 #include <sys/queue.h> 49 #include <sys/spl.h> 50 #include <sys/systm.h> 51 #include <sys/ddidmareq.h> 52 #include <sys/id_space.h> 53 #include <sys/psm_defs.h> 54 #include <sys/smp_impldefs.h> 55 #include <sys/modhash.h> 56 #include <sys/hma.h> 57 58 #include <sys/x86_archext.h> 59 60 #include <machine/cpufunc.h> 61 #include <machine/md_var.h> 62 #include <machine/specialreg.h> 63 #include <machine/vmm.h> 64 #include <machine/vmparam.h> 65 #include <sys/vmm_impl.h> 66 #include <sys/kernel.h> 67 68 #include <vm/as.h> 69 #include <vm/seg_kmem.h> 70 71 SET_DECLARE(sysinit_set, struct sysinit); 72 73 void 74 sysinit(void) 75 { 76 struct sysinit **si; 77 78 SET_FOREACH(si, sysinit_set) 79 (*si)->func((*si)->data); 80 } 81 82 void 83 invalidate_cache_all(void) 84 { 85 cpuset_t cpuset; 86 87 kpreempt_disable(); 88 cpuset_all_but(&cpuset, CPU->cpu_id); 89 xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL, 90 CPUSET2BV(cpuset), (xc_func_t)invalidate_cache); 91 invalidate_cache(); 92 kpreempt_enable(); 93 } 94 95 vm_paddr_t 96 vtophys(void *va) 97 { 98 pfn_t pfn; 99 100 /* 101 * Since hat_getpfnum() may block on an htable mutex, this is not at 102 * all safe to run from a critical_enter/kpreempt_disable context. 103 * The FreeBSD analog does not have the same locking constraints, so 104 * close attention must be paid wherever this is called. 105 */ 106 ASSERT(curthread->t_preempt == 0); 107 108 pfn = hat_getpfnum(kas.a_hat, (caddr_t)va); 109 ASSERT(pfn != PFN_INVALID); 110 return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK); 111 } 112 113 int 114 cpusetobj_ffs(const cpuset_t *set) 115 { 116 uint_t large, small; 117 118 /* 119 * Rather than reaching into the cpuset_t ourselves, leave that task to 120 * cpuset_bounds(). The simplicity is worth the extra wasted work to 121 * find the upper bound. 122 */ 123 cpuset_bounds(set, &small, &large); 124 125 if (small == CPUSET_NOTINSET) { 126 /* The FreeBSD version returns 0 if it find nothing */ 127 return (0); 128 } 129 130 ASSERT3U(small, <=, INT_MAX); 131 132 /* Least significant bit index starts at 1 for valid results */ 133 return (small + 1); 134 } 135 136 struct vmm_ptp_item { 137 void *vpi_vaddr; 138 }; 139 static kmutex_t vmm_ptp_lock; 140 141 static mod_hash_t *vmm_ptp_hash; 142 uint_t vmm_ptp_hash_nchains = 16381; 143 uint_t vmm_ptp_hash_size = PAGESIZE; 144 145 static void 146 vmm_ptp_hash_valdtor(mod_hash_val_t val) 147 { 148 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 149 150 kmem_free(i->vpi_vaddr, PAGE_SIZE); 151 kmem_free(i, sizeof (*i)); 152 } 153 154 static void 155 vmm_ptp_init(void) 156 { 157 vmm_ptp_hash = mod_hash_create_ptrhash("vmm_ptp_hash", 158 vmm_ptp_hash_nchains, vmm_ptp_hash_valdtor, vmm_ptp_hash_size); 159 160 VERIFY(vmm_ptp_hash != NULL); 161 } 162 163 static uint_t 164 vmm_ptp_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) 165 { 166 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 167 168 cmn_err(CE_PANIC, "!vmm_ptp_check: hash not empty: %p", i->vpi_vaddr); 169 170 return (MH_WALK_TERMINATE); 171 } 172 173 static void 174 vmm_ptp_cleanup(void) 175 { 176 mod_hash_walk(vmm_ptp_hash, vmm_ptp_check, NULL); 177 mod_hash_destroy_ptrhash(vmm_ptp_hash); 178 } 179 180 /* 181 * The logic in VT-d uses both kernel-virtual and direct-mapped addresses when 182 * freeing PTP pages. Until the consuming code is improved to better track the 183 * pages it allocates, we keep the kernel-virtual addresses to those pages in a 184 * hash table for when they are freed. 185 */ 186 void * 187 vmm_ptp_alloc(void) 188 { 189 void *p; 190 struct vmm_ptp_item *i; 191 192 p = kmem_zalloc(PAGE_SIZE, KM_SLEEP); 193 i = kmem_alloc(sizeof (struct vmm_ptp_item), KM_SLEEP); 194 i->vpi_vaddr = p; 195 196 mutex_enter(&vmm_ptp_lock); 197 VERIFY(mod_hash_insert(vmm_ptp_hash, 198 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0); 199 mutex_exit(&vmm_ptp_lock); 200 201 return (p); 202 } 203 204 void 205 vmm_ptp_free(void *addr) 206 { 207 mutex_enter(&vmm_ptp_lock); 208 VERIFY(mod_hash_destroy(vmm_ptp_hash, 209 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0); 210 mutex_exit(&vmm_ptp_lock); 211 } 212 213 /* Reach into i86pc/os/ddi_impl.c for these */ 214 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int); 215 extern void contig_free(void *, size_t); 216 217 void * 218 vmm_contig_alloc(size_t size) 219 { 220 ddi_dma_attr_t attr = { 221 /* Using fastboot_dma_attr as a guide... */ 222 .dma_attr_version = DMA_ATTR_V0, 223 .dma_attr_addr_lo = 0, 224 .dma_attr_addr_hi = ~0UL, 225 .dma_attr_count_max = 0x00000000FFFFFFFFULL, 226 .dma_attr_align = PAGE_SIZE, 227 .dma_attr_burstsizes = 1, 228 .dma_attr_minxfer = 1, 229 .dma_attr_maxxfer = 0x00000000FFFFFFFFULL, 230 .dma_attr_seg = 0x00000000FFFFFFFFULL, /* any */ 231 .dma_attr_sgllen = 1, 232 .dma_attr_granular = PAGE_SIZE, 233 .dma_attr_flags = 0, 234 }; 235 void *res; 236 237 res = contig_alloc(size, &attr, PAGE_SIZE, 1); 238 if (res != NULL) { 239 bzero(res, size); 240 } 241 242 return (res); 243 } 244 245 void 246 vmm_contig_free(void *addr, size_t size) 247 { 248 contig_free(addr, size); 249 } 250 251 void 252 critical_enter(void) 253 { 254 kpreempt_disable(); 255 } 256 257 void 258 critical_exit(void) 259 { 260 kpreempt_enable(); 261 } 262 263 264 static void 265 vmm_glue_callout_handler(void *arg) 266 { 267 struct callout *c = arg; 268 269 if (callout_active(c)) { 270 /* 271 * Record the handler fire time so that callout_pending() is 272 * able to detect if the callout becomes rescheduled during the 273 * course of the handler. 274 */ 275 c->c_fired = gethrtime(); 276 (c->c_func)(c->c_arg); 277 } 278 } 279 280 void 281 vmm_glue_callout_init(struct callout *c, int mpsafe) 282 { 283 cyc_handler_t hdlr; 284 cyc_time_t when; 285 286 hdlr.cyh_level = CY_LOW_LEVEL; 287 hdlr.cyh_func = vmm_glue_callout_handler; 288 hdlr.cyh_arg = c; 289 when.cyt_when = CY_INFINITY; 290 when.cyt_interval = CY_INFINITY; 291 bzero(c, sizeof (*c)); 292 293 mutex_enter(&cpu_lock); 294 c->c_cyc_id = cyclic_add(&hdlr, &when); 295 mutex_exit(&cpu_lock); 296 } 297 298 void 299 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *), 300 void *arg, int flags) 301 { 302 ASSERT(c->c_cyc_id != CYCLIC_NONE); 303 304 if ((flags & C_ABSOLUTE) == 0) { 305 target += gethrtime(); 306 } 307 308 c->c_func = func; 309 c->c_arg = arg; 310 c->c_target = target; 311 (void) cyclic_reprogram(c->c_cyc_id, target); 312 } 313 314 void 315 vmm_glue_callout_stop(struct callout *c) 316 { 317 ASSERT(c->c_cyc_id != CYCLIC_NONE); 318 319 c->c_target = 0; 320 (void) cyclic_reprogram(c->c_cyc_id, CY_INFINITY); 321 } 322 323 void 324 vmm_glue_callout_drain(struct callout *c) 325 { 326 ASSERT(c->c_cyc_id != CYCLIC_NONE); 327 328 c->c_target = 0; 329 mutex_enter(&cpu_lock); 330 cyclic_remove(c->c_cyc_id); 331 c->c_cyc_id = CYCLIC_NONE; 332 mutex_exit(&cpu_lock); 333 } 334 335 void 336 vmm_glue_callout_localize(struct callout *c) 337 { 338 mutex_enter(&cpu_lock); 339 cyclic_move_here(c->c_cyc_id); 340 mutex_exit(&cpu_lock); 341 } 342 343 /* 344 * Given an interval (in ns) and a frequency (in hz), calculate the number of 345 * "ticks" at that frequency which cover the interval. 346 */ 347 uint64_t 348 hrt_freq_count(hrtime_t interval, uint32_t freq) 349 { 350 ASSERT3S(interval, >=, 0); 351 const uint64_t sec = interval / NANOSEC; 352 const uint64_t nsec = interval % NANOSEC; 353 354 return ((sec * freq) + ((nsec * freq) / NANOSEC)); 355 } 356 357 /* 358 * Given a frequency (in hz) and number of "ticks", calculate the interval 359 * (in ns) which would be covered by those ticks. 360 */ 361 hrtime_t 362 hrt_freq_interval(uint32_t freq, uint64_t count) 363 { 364 const uint64_t sec = count / freq; 365 const uint64_t frac = count % freq; 366 367 return ((NANOSEC * sec) + ((frac * NANOSEC) / freq)); 368 } 369 370 371 uint_t cpu_high; /* Highest arg to CPUID */ 372 uint_t cpu_exthigh; /* Highest arg to extended CPUID */ 373 uint_t cpu_id; /* Stepping ID */ 374 char cpu_vendor[20]; /* CPU Origin code */ 375 376 static void 377 vmm_cpuid_init(void) 378 { 379 uint_t regs[4]; 380 381 do_cpuid(0, regs); 382 cpu_high = regs[0]; 383 ((uint_t *)&cpu_vendor)[0] = regs[1]; 384 ((uint_t *)&cpu_vendor)[1] = regs[3]; 385 ((uint_t *)&cpu_vendor)[2] = regs[2]; 386 cpu_vendor[12] = '\0'; 387 388 do_cpuid(1, regs); 389 cpu_id = regs[0]; 390 391 do_cpuid(0x80000000, regs); 392 cpu_exthigh = regs[0]; 393 } 394 395 void 396 vmm_sol_glue_init(void) 397 { 398 vmm_ptp_init(); 399 vmm_cpuid_init(); 400 } 401 402 void 403 vmm_sol_glue_cleanup(void) 404 { 405 vmm_ptp_cleanup(); 406 } 407 408 409 /* From FreeBSD's sys/kern/subr_clock.c */ 410 411 /*- 412 * Copyright (c) 1988 University of Utah. 413 * Copyright (c) 1982, 1990, 1993 414 * The Regents of the University of California. All rights reserved. 415 * 416 * This code is derived from software contributed to Berkeley by 417 * the Systems Programming Group of the University of Utah Computer 418 * Science Department. 419 * 420 * Redistribution and use in source and binary forms, with or without 421 * modification, are permitted provided that the following conditions 422 * are met: 423 * 1. Redistributions of source code must retain the above copyright 424 * notice, this list of conditions and the following disclaimer. 425 * 2. Redistributions in binary form must reproduce the above copyright 426 * notice, this list of conditions and the following disclaimer in the 427 * documentation and/or other materials provided with the distribution. 428 * 4. Neither the name of the University nor the names of its contributors 429 * may be used to endorse or promote products derived from this software 430 * without specific prior written permission. 431 * 432 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 433 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 434 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 435 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 436 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 437 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 438 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 439 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 440 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 441 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 442 * SUCH DAMAGE. 443 * 444 * from: Utah $Hdr: clock.c 1.18 91/01/21$ 445 * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 446 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp 447 * and 448 * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 449 */ 450 451 #include <sys/clock.h> 452 453 /* 454 * Generic routines to convert between a POSIX date 455 * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec 456 * Derived from NetBSD arch/hp300/hp300/clock.c 457 */ 458 459 #define FEBRUARY 2 460 #define days_in_year(y) (leapyear(y) ? 366 : 365) 461 #define days_in_month(y, m) \ 462 (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) 463 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ 464 #define day_of_week(days) (((days) + 4) % 7) 465 466 static const int month_days[12] = { 467 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 468 }; 469 470 471 /* 472 * This inline avoids some unnecessary modulo operations 473 * as compared with the usual macro: 474 * ( ((year % 4) == 0 && 475 * (year % 100) != 0) || 476 * ((year % 400) == 0) ) 477 * It is otherwise equivalent. 478 */ 479 static int 480 leapyear(int year) 481 { 482 int rv = 0; 483 484 if ((year & 3) == 0) { 485 rv = 1; 486 if ((year % 100) == 0) { 487 rv = 0; 488 if ((year % 400) == 0) 489 rv = 1; 490 } 491 } 492 return (rv); 493 } 494 495 int 496 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) 497 { 498 int i, year, days; 499 500 year = ct->year; 501 502 #ifdef __FreeBSD__ 503 if (ct_debug) { 504 printf("ct_to_ts("); 505 print_ct(ct); 506 printf(")"); 507 } 508 #endif 509 510 /* Sanity checks. */ 511 if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 || 512 ct->day > days_in_month(year, ct->mon) || 513 ct->hour > 23 || ct->min > 59 || ct->sec > 59 || 514 (sizeof (time_t) == 4 && year > 2037)) { /* time_t overflow */ 515 #ifdef __FreeBSD__ 516 if (ct_debug) 517 printf(" = EINVAL\n"); 518 #endif 519 return (EINVAL); 520 } 521 522 /* 523 * Compute days since start of time 524 * First from years, then from months. 525 */ 526 days = 0; 527 for (i = POSIX_BASE_YEAR; i < year; i++) 528 days += days_in_year(i); 529 530 /* Months */ 531 for (i = 1; i < ct->mon; i++) 532 days += days_in_month(year, i); 533 days += (ct->day - 1); 534 535 ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + 536 ct->sec; 537 ts->tv_nsec = ct->nsec; 538 539 #ifdef __FreeBSD__ 540 if (ct_debug) 541 printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec); 542 #endif 543 return (0); 544 } 545 546 void 547 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct) 548 { 549 int i, year, days; 550 time_t rsec; /* remainder seconds */ 551 time_t secs; 552 553 secs = ts->tv_sec; 554 days = secs / SECDAY; 555 rsec = secs % SECDAY; 556 557 ct->dow = day_of_week(days); 558 559 /* Subtract out whole years, counting them in i. */ 560 for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++) 561 days -= days_in_year(year); 562 ct->year = year; 563 564 /* Subtract out whole months, counting them in i. */ 565 for (i = 1; days >= days_in_month(year, i); i++) 566 days -= days_in_month(year, i); 567 ct->mon = i; 568 569 /* Days are what is left over (+1) from all that. */ 570 ct->day = days + 1; 571 572 /* Hours, minutes, seconds are easy */ 573 ct->hour = rsec / 3600; 574 rsec = rsec % 3600; 575 ct->min = rsec / 60; 576 rsec = rsec % 60; 577 ct->sec = rsec; 578 ct->nsec = ts->tv_nsec; 579 #ifdef __FreeBSD__ 580 if (ct_debug) { 581 printf("ts_to_ct(%ld.%09ld) = ", 582 (long)ts->tv_sec, (long)ts->tv_nsec); 583 print_ct(ct); 584 printf("\n"); 585 } 586 #endif 587 } 588 589 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */ 590 uint64_t 591 rdtsc_offset(void) 592 { 593 /* 594 * The timestamp logic will decide if a delta need be applied to the 595 * unscaled hrtime reading (effectively rdtsc), but we do require it be 596 * backed by the TSC itself. 597 */ 598 extern hrtime_t (*gethrtimeunscaledf)(void); 599 extern hrtime_t tsc_gethrtimeunscaled(void); 600 extern hrtime_t tsc_gethrtimeunscaled_delta(void); 601 602 ASSERT(*gethrtimeunscaledf == tsc_gethrtimeunscaled || 603 *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 604 return ((uint64_t)gethrtimeunscaledf()); 605 } 606