1 /* 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $ 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2014 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2020 Oxide Computer Company 41 */ 42 43 #include <sys/types.h> 44 #include <sys/archsystm.h> 45 #include <sys/cpuset.h> 46 #include <sys/fp.h> 47 #include <sys/malloc.h> 48 #include <sys/queue.h> 49 #include <sys/spl.h> 50 #include <sys/systm.h> 51 #include <sys/ddidmareq.h> 52 #include <sys/id_space.h> 53 #include <sys/psm_defs.h> 54 #include <sys/smp_impldefs.h> 55 #include <sys/modhash.h> 56 #include <sys/hma.h> 57 58 #include <sys/x86_archext.h> 59 60 #include <machine/cpufunc.h> 61 #include <machine/md_var.h> 62 #include <machine/specialreg.h> 63 #include <machine/vmm.h> 64 #include <machine/vmparam.h> 65 #include <sys/vmm_impl.h> 66 #include <sys/kernel.h> 67 68 #include <vm/as.h> 69 #include <vm/seg_kmem.h> 70 71 SET_DECLARE(sysinit_set, struct sysinit); 72 73 void 74 sysinit(void) 75 { 76 struct sysinit **si; 77 78 SET_FOREACH(si, sysinit_set) 79 (*si)->func((*si)->data); 80 } 81 82 uint8_t const bin2bcd_data[] = { 83 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 84 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 85 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 86 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 87 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 88 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 89 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 90 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 91 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 92 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 93 }; 94 95 void 96 invalidate_cache_all(void) 97 { 98 cpuset_t cpuset; 99 100 kpreempt_disable(); 101 cpuset_all_but(&cpuset, CPU->cpu_id); 102 xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL, 103 CPUSET2BV(cpuset), (xc_func_t)invalidate_cache); 104 invalidate_cache(); 105 kpreempt_enable(); 106 } 107 108 vm_paddr_t 109 vtophys(void *va) 110 { 111 pfn_t pfn; 112 113 /* 114 * Since hat_getpfnum() may block on an htable mutex, this is not at 115 * all safe to run from a critical_enter/kpreempt_disable context. 116 * The FreeBSD analog does not have the same locking constraints, so 117 * close attention must be paid wherever this is called. 118 */ 119 ASSERT(curthread->t_preempt == 0); 120 121 pfn = hat_getpfnum(kas.a_hat, (caddr_t)va); 122 ASSERT(pfn != PFN_INVALID); 123 return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK); 124 } 125 126 int 127 cpusetobj_ffs(const cpuset_t *set) 128 { 129 uint_t large, small; 130 131 /* 132 * Rather than reaching into the cpuset_t ourselves, leave that task to 133 * cpuset_bounds(). The simplicity is worth the extra wasted work to 134 * find the upper bound. 135 */ 136 cpuset_bounds(set, &small, &large); 137 138 if (small == CPUSET_NOTINSET) { 139 /* The FreeBSD version returns 0 if it find nothing */ 140 return (0); 141 } 142 143 ASSERT3U(small, <=, INT_MAX); 144 145 /* Least significant bit index starts at 1 for valid results */ 146 return (small + 1); 147 } 148 149 struct kmem_item { 150 void *addr; 151 size_t size; 152 }; 153 static kmutex_t kmem_items_lock; 154 155 static mod_hash_t *vmm_alloc_hash; 156 uint_t vmm_alloc_hash_nchains = 16381; 157 uint_t vmm_alloc_hash_size = PAGESIZE; 158 159 static void 160 vmm_alloc_hash_valdtor(mod_hash_val_t val) 161 { 162 struct kmem_item *i = (struct kmem_item *)val; 163 164 kmem_free(i->addr, i->size); 165 kmem_free(i, sizeof (struct kmem_item)); 166 } 167 168 static void 169 vmm_alloc_init(void) 170 { 171 vmm_alloc_hash = mod_hash_create_ptrhash("vmm_alloc_hash", 172 vmm_alloc_hash_nchains, vmm_alloc_hash_valdtor, 173 vmm_alloc_hash_size); 174 175 VERIFY(vmm_alloc_hash != NULL); 176 } 177 178 static uint_t 179 vmm_alloc_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) 180 { 181 struct kmem_item *i = (struct kmem_item *)val; 182 183 cmn_err(CE_PANIC, "!vmm_alloc_check: hash not empty: %p, %lu", i->addr, 184 i->size); 185 186 return (MH_WALK_TERMINATE); 187 } 188 189 static void 190 vmm_alloc_cleanup(void) 191 { 192 mod_hash_walk(vmm_alloc_hash, vmm_alloc_check, NULL); 193 mod_hash_destroy_ptrhash(vmm_alloc_hash); 194 } 195 196 void * 197 malloc(unsigned long size, struct malloc_type *mtp, int flags) 198 { 199 void *p; 200 struct kmem_item *i; 201 int kmem_flag = KM_SLEEP; 202 203 if (flags & M_NOWAIT) 204 kmem_flag = KM_NOSLEEP; 205 206 if (flags & M_ZERO) { 207 p = kmem_zalloc(size, kmem_flag); 208 } else { 209 p = kmem_alloc(size, kmem_flag); 210 } 211 212 if (p == NULL) 213 return (NULL); 214 215 i = kmem_zalloc(sizeof (struct kmem_item), kmem_flag); 216 217 if (i == NULL) { 218 kmem_free(p, size); 219 return (NULL); 220 } 221 222 mutex_enter(&kmem_items_lock); 223 i->addr = p; 224 i->size = size; 225 226 VERIFY(mod_hash_insert(vmm_alloc_hash, 227 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0); 228 229 mutex_exit(&kmem_items_lock); 230 231 return (p); 232 } 233 234 void 235 free(void *addr, struct malloc_type *mtp) 236 { 237 mutex_enter(&kmem_items_lock); 238 VERIFY(mod_hash_destroy(vmm_alloc_hash, 239 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0); 240 mutex_exit(&kmem_items_lock); 241 } 242 243 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int); 244 extern void contig_free(void *, size_t); 245 246 void * 247 contigmalloc(unsigned long size, struct malloc_type *type, int flags, 248 vm_paddr_t low, vm_paddr_t high, unsigned long alignment, 249 vm_paddr_t boundary) 250 { 251 ddi_dma_attr_t attr = { 252 /* Using fastboot_dma_attr as a guide... */ 253 DMA_ATTR_V0, 254 low, /* dma_attr_addr_lo */ 255 high, /* dma_attr_addr_hi */ 256 0x00000000FFFFFFFFULL, /* dma_attr_count_max */ 257 alignment, /* dma_attr_align */ 258 1, /* dma_attr_burstsize */ 259 1, /* dma_attr_minxfer */ 260 0x00000000FFFFFFFFULL, /* dma_attr_maxxfer */ 261 0x00000000FFFFFFFFULL, /* dma_attr_seg: any */ 262 1, /* dma_attr_sgllen */ 263 alignment, /* dma_attr_granular */ 264 0, /* dma_attr_flags */ 265 }; 266 int cansleep = (flags & M_WAITOK); 267 void *result; 268 269 ASSERT(alignment == PAGESIZE); 270 271 result = contig_alloc((size_t)size, &attr, alignment, cansleep); 272 273 if (result != NULL && (flags & M_ZERO) != 0) { 274 bzero(result, size); 275 } 276 return (result); 277 } 278 279 void 280 contigfree(void *addr, unsigned long size, struct malloc_type *type) 281 { 282 contig_free(addr, size); 283 } 284 285 void 286 critical_enter(void) 287 { 288 kpreempt_disable(); 289 } 290 291 void 292 critical_exit(void) 293 { 294 kpreempt_enable(); 295 } 296 297 298 static void 299 vmm_glue_callout_handler(void *arg) 300 { 301 struct callout *c = arg; 302 303 if (callout_active(c)) { 304 /* 305 * Record the handler fire time so that callout_pending() is 306 * able to detect if the callout becomes rescheduled during the 307 * course of the handler. 308 */ 309 c->c_fired = gethrtime(); 310 (c->c_func)(c->c_arg); 311 } 312 } 313 314 void 315 vmm_glue_callout_init(struct callout *c, int mpsafe) 316 { 317 cyc_handler_t hdlr; 318 cyc_time_t when; 319 320 hdlr.cyh_level = CY_LOW_LEVEL; 321 hdlr.cyh_func = vmm_glue_callout_handler; 322 hdlr.cyh_arg = c; 323 when.cyt_when = CY_INFINITY; 324 when.cyt_interval = CY_INFINITY; 325 bzero(c, sizeof (*c)); 326 327 mutex_enter(&cpu_lock); 328 c->c_cyc_id = cyclic_add(&hdlr, &when); 329 mutex_exit(&cpu_lock); 330 } 331 332 void 333 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *), 334 void *arg, int flags) 335 { 336 ASSERT(c->c_cyc_id != CYCLIC_NONE); 337 338 if ((flags & C_ABSOLUTE) == 0) { 339 target += gethrtime(); 340 } 341 342 c->c_func = func; 343 c->c_arg = arg; 344 c->c_target = target; 345 cyclic_reprogram(c->c_cyc_id, target); 346 } 347 348 int 349 vmm_glue_callout_stop(struct callout *c) 350 { 351 ASSERT(c->c_cyc_id != CYCLIC_NONE); 352 353 c->c_target = 0; 354 cyclic_reprogram(c->c_cyc_id, CY_INFINITY); 355 356 return (0); 357 } 358 359 int 360 vmm_glue_callout_drain(struct callout *c) 361 { 362 ASSERT(c->c_cyc_id != CYCLIC_NONE); 363 364 c->c_target = 0; 365 mutex_enter(&cpu_lock); 366 cyclic_remove(c->c_cyc_id); 367 c->c_cyc_id = CYCLIC_NONE; 368 mutex_exit(&cpu_lock); 369 370 return (0); 371 } 372 373 void 374 vmm_glue_callout_localize(struct callout *c) 375 { 376 mutex_enter(&cpu_lock); 377 cyclic_move_here(c->c_cyc_id); 378 mutex_exit(&cpu_lock); 379 } 380 381 /* 382 * Given an interval (in ns) and a frequency (in hz), calculate the number of 383 * "ticks" at that frequency which cover the interval. 384 */ 385 uint64_t 386 hrt_freq_count(hrtime_t interval, uint32_t freq) 387 { 388 ASSERT3S(interval, >=, 0); 389 const uint64_t sec = interval / NANOSEC; 390 const uint64_t nsec = interval % NANOSEC; 391 392 return ((sec * freq) + ((nsec * freq) / NANOSEC)); 393 } 394 395 /* 396 * Given a frequency (in hz) and number of "ticks", calculate the interval 397 * (in ns) which would be covered by those ticks. 398 */ 399 hrtime_t 400 hrt_freq_interval(uint32_t freq, uint64_t count) 401 { 402 const uint64_t sec = count / freq; 403 const uint64_t frac = count % freq; 404 405 return ((NANOSEC * sec) + ((frac * NANOSEC) / freq)); 406 } 407 408 409 uint_t cpu_high; /* Highest arg to CPUID */ 410 uint_t cpu_exthigh; /* Highest arg to extended CPUID */ 411 uint_t cpu_id; /* Stepping ID */ 412 char cpu_vendor[20]; /* CPU Origin code */ 413 414 static void 415 vmm_cpuid_init(void) 416 { 417 uint_t regs[4]; 418 419 do_cpuid(0, regs); 420 cpu_high = regs[0]; 421 ((uint_t *)&cpu_vendor)[0] = regs[1]; 422 ((uint_t *)&cpu_vendor)[1] = regs[3]; 423 ((uint_t *)&cpu_vendor)[2] = regs[2]; 424 cpu_vendor[12] = '\0'; 425 426 do_cpuid(1, regs); 427 cpu_id = regs[0]; 428 429 do_cpuid(0x80000000, regs); 430 cpu_exthigh = regs[0]; 431 } 432 433 void 434 vmm_sol_glue_init(void) 435 { 436 vmm_alloc_init(); 437 vmm_cpuid_init(); 438 } 439 440 void 441 vmm_sol_glue_cleanup(void) 442 { 443 vmm_alloc_cleanup(); 444 } 445 446 447 /* From FreeBSD's sys/kern/subr_clock.c */ 448 449 /*- 450 * Copyright (c) 1988 University of Utah. 451 * Copyright (c) 1982, 1990, 1993 452 * The Regents of the University of California. All rights reserved. 453 * 454 * This code is derived from software contributed to Berkeley by 455 * the Systems Programming Group of the University of Utah Computer 456 * Science Department. 457 * 458 * Redistribution and use in source and binary forms, with or without 459 * modification, are permitted provided that the following conditions 460 * are met: 461 * 1. Redistributions of source code must retain the above copyright 462 * notice, this list of conditions and the following disclaimer. 463 * 2. Redistributions in binary form must reproduce the above copyright 464 * notice, this list of conditions and the following disclaimer in the 465 * documentation and/or other materials provided with the distribution. 466 * 4. Neither the name of the University nor the names of its contributors 467 * may be used to endorse or promote products derived from this software 468 * without specific prior written permission. 469 * 470 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 471 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 472 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 473 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 474 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 475 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 476 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 477 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 478 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 479 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 480 * SUCH DAMAGE. 481 * 482 * from: Utah $Hdr: clock.c 1.18 91/01/21$ 483 * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 484 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp 485 * and 486 * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 487 */ 488 489 #include <sys/clock.h> 490 491 /* 492 * Generic routines to convert between a POSIX date 493 * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec 494 * Derived from NetBSD arch/hp300/hp300/clock.c 495 */ 496 497 #define FEBRUARY 2 498 #define days_in_year(y) (leapyear(y) ? 366 : 365) 499 #define days_in_month(y, m) \ 500 (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) 501 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ 502 #define day_of_week(days) (((days) + 4) % 7) 503 504 static const int month_days[12] = { 505 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 506 }; 507 508 509 /* 510 * This inline avoids some unnecessary modulo operations 511 * as compared with the usual macro: 512 * ( ((year % 4) == 0 && 513 * (year % 100) != 0) || 514 * ((year % 400) == 0) ) 515 * It is otherwise equivalent. 516 */ 517 static int 518 leapyear(int year) 519 { 520 int rv = 0; 521 522 if ((year & 3) == 0) { 523 rv = 1; 524 if ((year % 100) == 0) { 525 rv = 0; 526 if ((year % 400) == 0) 527 rv = 1; 528 } 529 } 530 return (rv); 531 } 532 533 int 534 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) 535 { 536 int i, year, days; 537 538 year = ct->year; 539 540 #ifdef __FreeBSD__ 541 if (ct_debug) { 542 printf("ct_to_ts("); 543 print_ct(ct); 544 printf(")"); 545 } 546 #endif 547 548 /* Sanity checks. */ 549 if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 || 550 ct->day > days_in_month(year, ct->mon) || 551 ct->hour > 23 || ct->min > 59 || ct->sec > 59 || 552 (sizeof (time_t) == 4 && year > 2037)) { /* time_t overflow */ 553 #ifdef __FreeBSD__ 554 if (ct_debug) 555 printf(" = EINVAL\n"); 556 #endif 557 return (EINVAL); 558 } 559 560 /* 561 * Compute days since start of time 562 * First from years, then from months. 563 */ 564 days = 0; 565 for (i = POSIX_BASE_YEAR; i < year; i++) 566 days += days_in_year(i); 567 568 /* Months */ 569 for (i = 1; i < ct->mon; i++) 570 days += days_in_month(year, i); 571 days += (ct->day - 1); 572 573 ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + 574 ct->sec; 575 ts->tv_nsec = ct->nsec; 576 577 #ifdef __FreeBSD__ 578 if (ct_debug) 579 printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec); 580 #endif 581 return (0); 582 } 583 584 void 585 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct) 586 { 587 int i, year, days; 588 time_t rsec; /* remainder seconds */ 589 time_t secs; 590 591 secs = ts->tv_sec; 592 days = secs / SECDAY; 593 rsec = secs % SECDAY; 594 595 ct->dow = day_of_week(days); 596 597 /* Subtract out whole years, counting them in i. */ 598 for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++) 599 days -= days_in_year(year); 600 ct->year = year; 601 602 /* Subtract out whole months, counting them in i. */ 603 for (i = 1; days >= days_in_month(year, i); i++) 604 days -= days_in_month(year, i); 605 ct->mon = i; 606 607 /* Days are what is left over (+1) from all that. */ 608 ct->day = days + 1; 609 610 /* Hours, minutes, seconds are easy */ 611 ct->hour = rsec / 3600; 612 rsec = rsec % 3600; 613 ct->min = rsec / 60; 614 rsec = rsec % 60; 615 ct->sec = rsec; 616 ct->nsec = ts->tv_nsec; 617 #ifdef __FreeBSD__ 618 if (ct_debug) { 619 printf("ts_to_ct(%ld.%09ld) = ", 620 (long)ts->tv_sec, (long)ts->tv_nsec); 621 print_ct(ct); 622 printf("\n"); 623 } 624 #endif 625 } 626 627 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */ 628 uint64_t 629 rdtsc_offset(void) 630 { 631 /* 632 * The timestamp logic will decide if a delta need be applied to the 633 * unscaled hrtime reading (effectively rdtsc), but we do require it be 634 * backed by the TSC itself. 635 */ 636 extern hrtime_t (*gethrtimeunscaledf)(void); 637 extern hrtime_t tsc_gethrtimeunscaled(void); 638 extern hrtime_t tsc_gethrtimeunscaled_delta(void); 639 640 ASSERT(*gethrtimeunscaledf == tsc_gethrtimeunscaled || 641 *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 642 return ((uint64_t)gethrtimeunscaledf()); 643 } 644