xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_sol_glue.c (revision 6446bd46ed1b4e9f69da153665f82181ccaedad5)
1 /*
2  * Copyright (c) 2004 Poul-Henning Kamp
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $
27  */
28 /*
29  * This file and its contents are supplied under the terms of the
30  * Common Development and Distribution License ("CDDL"), version 1.0.
31  * You may only use this file in accordance with the terms of version
32  * 1.0 of the CDDL.
33  *
34  * A full copy of the text of the CDDL should have accompanied this
35  * source.  A copy of the CDDL is also available via the Internet at
36  * http://www.illumos.org/license/CDDL.
37  *
38  * Copyright 2014 Pluribus Networks Inc.
39  * Copyright 2019 Joyent, Inc.
40  * Copyright 2020 Oxide Computer Company
41  */
42 
43 #include <sys/types.h>
44 #include <sys/archsystm.h>
45 #include <sys/cpuset.h>
46 #include <sys/fp.h>
47 #include <sys/kmem.h>
48 #include <sys/queue.h>
49 #include <sys/spl.h>
50 #include <sys/systm.h>
51 #include <sys/ddidmareq.h>
52 #include <sys/id_space.h>
53 #include <sys/psm_defs.h>
54 #include <sys/smp_impldefs.h>
55 #include <sys/modhash.h>
56 #include <sys/hma.h>
57 
58 #include <sys/x86_archext.h>
59 
60 #include <machine/cpufunc.h>
61 #include <machine/md_var.h>
62 #include <machine/specialreg.h>
63 #include <machine/vmm.h>
64 #include <machine/vmparam.h>
65 #include <sys/vmm_impl.h>
66 #include <sys/kernel.h>
67 
68 #include <vm/as.h>
69 #include <vm/seg_kmem.h>
70 
71 SET_DECLARE(sysinit_set, struct sysinit);
72 
73 void
74 sysinit(void)
75 {
76 	struct sysinit **si;
77 
78 	SET_FOREACH(si, sysinit_set)
79 		(*si)->func((*si)->data);
80 }
81 
82 uint8_t const bin2bcd_data[] = {
83 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
84 	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
85 	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
86 	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
87 	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
88 	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
89 	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
90 	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
91 	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
92 	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
93 };
94 
95 void
96 invalidate_cache_all(void)
97 {
98 	cpuset_t cpuset;
99 
100 	kpreempt_disable();
101 	cpuset_all_but(&cpuset, CPU->cpu_id);
102 	xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL,
103 	    CPUSET2BV(cpuset), (xc_func_t)invalidate_cache);
104 	invalidate_cache();
105 	kpreempt_enable();
106 }
107 
108 vm_paddr_t
109 vtophys(void *va)
110 {
111 	pfn_t	pfn;
112 
113 	/*
114 	 * Since hat_getpfnum() may block on an htable mutex, this is not at
115 	 * all safe to run from a critical_enter/kpreempt_disable context.
116 	 * The FreeBSD analog does not have the same locking constraints, so
117 	 * close attention must be paid wherever this is called.
118 	 */
119 	ASSERT(curthread->t_preempt == 0);
120 
121 	pfn = hat_getpfnum(kas.a_hat, (caddr_t)va);
122 	ASSERT(pfn != PFN_INVALID);
123 	return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK);
124 }
125 
126 int
127 cpusetobj_ffs(const cpuset_t *set)
128 {
129 	uint_t large, small;
130 
131 	/*
132 	 * Rather than reaching into the cpuset_t ourselves, leave that task to
133 	 * cpuset_bounds().  The simplicity is worth the extra wasted work to
134 	 * find the upper bound.
135 	 */
136 	cpuset_bounds(set, &small, &large);
137 
138 	if (small == CPUSET_NOTINSET) {
139 		/* The FreeBSD version returns 0 if it find nothing */
140 		return (0);
141 	}
142 
143 	ASSERT3U(small, <=, INT_MAX);
144 
145 	/* Least significant bit index starts at 1 for valid results */
146 	return (small + 1);
147 }
148 
149 struct vmm_ptp_item {
150 	void *vpi_vaddr;
151 };
152 static kmutex_t vmm_ptp_lock;
153 
154 static mod_hash_t *vmm_ptp_hash;
155 uint_t vmm_ptp_hash_nchains = 16381;
156 uint_t vmm_ptp_hash_size = PAGESIZE;
157 
158 static void
159 vmm_ptp_hash_valdtor(mod_hash_val_t val)
160 {
161 	struct vmm_ptp_item *i = (struct vmm_ptp_item *)val;
162 
163 	kmem_free(i->vpi_vaddr, PAGE_SIZE);
164 	kmem_free(i, sizeof (*i));
165 }
166 
167 static void
168 vmm_ptp_init(void)
169 {
170 	vmm_ptp_hash = mod_hash_create_ptrhash("vmm_ptp_hash",
171 	    vmm_ptp_hash_nchains, vmm_ptp_hash_valdtor, vmm_ptp_hash_size);
172 
173 	VERIFY(vmm_ptp_hash != NULL);
174 }
175 
176 static uint_t
177 vmm_ptp_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused)
178 {
179 	struct vmm_ptp_item *i = (struct vmm_ptp_item *)val;
180 
181 	cmn_err(CE_PANIC, "!vmm_ptp_check: hash not empty: %p", i->vpi_vaddr);
182 
183 	return (MH_WALK_TERMINATE);
184 }
185 
186 static void
187 vmm_ptp_cleanup(void)
188 {
189 	mod_hash_walk(vmm_ptp_hash, vmm_ptp_check, NULL);
190 	mod_hash_destroy_ptrhash(vmm_ptp_hash);
191 }
192 
193 /*
194  * The logic in VT-d uses both kernel-virtual and direct-mapped addresses when
195  * freeing PTP pages.  Until the consuming code is improved to better track the
196  * pages it allocates, we keep the kernel-virtual addresses to those pages in a
197  * hash table for when they are freed.
198  */
199 void *
200 vmm_ptp_alloc(void)
201 {
202 	void *p;
203 	struct vmm_ptp_item *i;
204 
205 	p = kmem_zalloc(PAGE_SIZE, KM_SLEEP);
206 	i = kmem_alloc(sizeof (struct vmm_ptp_item), KM_SLEEP);
207 	i->vpi_vaddr = p;
208 
209 	mutex_enter(&vmm_ptp_lock);
210 	VERIFY(mod_hash_insert(vmm_ptp_hash,
211 	    (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0);
212 	mutex_exit(&vmm_ptp_lock);
213 
214 	return (p);
215 }
216 
217 void
218 vmm_ptp_free(void *addr)
219 {
220 	mutex_enter(&vmm_ptp_lock);
221 	VERIFY(mod_hash_destroy(vmm_ptp_hash,
222 	    (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0);
223 	mutex_exit(&vmm_ptp_lock);
224 }
225 
226 /* Reach into i86pc/os/ddi_impl.c for these */
227 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
228 extern void contig_free(void *, size_t);
229 
230 void *
231 vmm_contig_alloc(size_t size)
232 {
233 	ddi_dma_attr_t attr = {
234 		/* Using fastboot_dma_attr as a guide... */
235 		.dma_attr_version	= DMA_ATTR_V0,
236 		.dma_attr_addr_lo	= 0,
237 		.dma_attr_addr_hi	= ~0UL,
238 		.dma_attr_count_max	= 0x00000000FFFFFFFFULL,
239 		.dma_attr_align		= PAGE_SIZE,
240 		.dma_attr_burstsizes	= 1,
241 		.dma_attr_minxfer	= 1,
242 		.dma_attr_maxxfer	= 0x00000000FFFFFFFFULL,
243 		.dma_attr_seg		= 0x00000000FFFFFFFFULL, /* any */
244 		.dma_attr_sgllen	= 1,
245 		.dma_attr_granular	= PAGE_SIZE,
246 		.dma_attr_flags		= 0,
247 	};
248 	void *res;
249 
250 	res = contig_alloc(size, &attr, PAGE_SIZE, 1);
251 	if (res != NULL) {
252 		bzero(res, size);
253 	}
254 
255 	return (res);
256 }
257 
258 void
259 vmm_contig_free(void *addr, size_t size)
260 {
261 	contig_free(addr, size);
262 }
263 
264 void
265 critical_enter(void)
266 {
267 	kpreempt_disable();
268 }
269 
270 void
271 critical_exit(void)
272 {
273 	kpreempt_enable();
274 }
275 
276 
277 static void
278 vmm_glue_callout_handler(void *arg)
279 {
280 	struct callout *c = arg;
281 
282 	if (callout_active(c)) {
283 		/*
284 		 * Record the handler fire time so that callout_pending() is
285 		 * able to detect if the callout becomes rescheduled during the
286 		 * course of the handler.
287 		 */
288 		c->c_fired = gethrtime();
289 		(c->c_func)(c->c_arg);
290 	}
291 }
292 
293 void
294 vmm_glue_callout_init(struct callout *c, int mpsafe)
295 {
296 	cyc_handler_t	hdlr;
297 	cyc_time_t	when;
298 
299 	hdlr.cyh_level = CY_LOW_LEVEL;
300 	hdlr.cyh_func = vmm_glue_callout_handler;
301 	hdlr.cyh_arg = c;
302 	when.cyt_when = CY_INFINITY;
303 	when.cyt_interval = CY_INFINITY;
304 	bzero(c, sizeof (*c));
305 
306 	mutex_enter(&cpu_lock);
307 	c->c_cyc_id = cyclic_add(&hdlr, &when);
308 	mutex_exit(&cpu_lock);
309 }
310 
311 void
312 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *),
313     void *arg, int flags)
314 {
315 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
316 
317 	if ((flags & C_ABSOLUTE) == 0) {
318 		target += gethrtime();
319 	}
320 
321 	c->c_func = func;
322 	c->c_arg = arg;
323 	c->c_target = target;
324 	(void) cyclic_reprogram(c->c_cyc_id, target);
325 }
326 
327 void
328 vmm_glue_callout_stop(struct callout *c)
329 {
330 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
331 
332 	c->c_target = 0;
333 	(void) cyclic_reprogram(c->c_cyc_id, CY_INFINITY);
334 }
335 
336 void
337 vmm_glue_callout_drain(struct callout *c)
338 {
339 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
340 
341 	c->c_target = 0;
342 	mutex_enter(&cpu_lock);
343 	cyclic_remove(c->c_cyc_id);
344 	c->c_cyc_id = CYCLIC_NONE;
345 	mutex_exit(&cpu_lock);
346 }
347 
348 void
349 vmm_glue_callout_localize(struct callout *c)
350 {
351 	mutex_enter(&cpu_lock);
352 	cyclic_move_here(c->c_cyc_id);
353 	mutex_exit(&cpu_lock);
354 }
355 
356 /*
357  * Given an interval (in ns) and a frequency (in hz), calculate the number of
358  * "ticks" at that frequency which cover the interval.
359  */
360 uint64_t
361 hrt_freq_count(hrtime_t interval, uint32_t freq)
362 {
363 	ASSERT3S(interval, >=, 0);
364 	const uint64_t sec = interval / NANOSEC;
365 	const uint64_t nsec = interval % NANOSEC;
366 
367 	return ((sec * freq) + ((nsec * freq) / NANOSEC));
368 }
369 
370 /*
371  * Given a frequency (in hz) and number of "ticks", calculate the interval
372  * (in ns) which would be covered by those ticks.
373  */
374 hrtime_t
375 hrt_freq_interval(uint32_t freq, uint64_t count)
376 {
377 	const uint64_t sec = count / freq;
378 	const uint64_t frac = count % freq;
379 
380 	return ((NANOSEC * sec) + ((frac * NANOSEC) / freq));
381 }
382 
383 
384 uint_t	cpu_high;		/* Highest arg to CPUID */
385 uint_t	cpu_exthigh;		/* Highest arg to extended CPUID */
386 uint_t	cpu_id;			/* Stepping ID */
387 char	cpu_vendor[20];		/* CPU Origin code */
388 
389 static void
390 vmm_cpuid_init(void)
391 {
392 	uint_t regs[4];
393 
394 	do_cpuid(0, regs);
395 	cpu_high = regs[0];
396 	((uint_t *)&cpu_vendor)[0] = regs[1];
397 	((uint_t *)&cpu_vendor)[1] = regs[3];
398 	((uint_t *)&cpu_vendor)[2] = regs[2];
399 	cpu_vendor[12] = '\0';
400 
401 	do_cpuid(1, regs);
402 	cpu_id = regs[0];
403 
404 	do_cpuid(0x80000000, regs);
405 	cpu_exthigh = regs[0];
406 }
407 
408 void
409 vmm_sol_glue_init(void)
410 {
411 	vmm_ptp_init();
412 	vmm_cpuid_init();
413 }
414 
415 void
416 vmm_sol_glue_cleanup(void)
417 {
418 	vmm_ptp_cleanup();
419 }
420 
421 
422 /* From FreeBSD's sys/kern/subr_clock.c */
423 
424 /*-
425  * Copyright (c) 1988 University of Utah.
426  * Copyright (c) 1982, 1990, 1993
427  *	The Regents of the University of California.  All rights reserved.
428  *
429  * This code is derived from software contributed to Berkeley by
430  * the Systems Programming Group of the University of Utah Computer
431  * Science Department.
432  *
433  * Redistribution and use in source and binary forms, with or without
434  * modification, are permitted provided that the following conditions
435  * are met:
436  * 1. Redistributions of source code must retain the above copyright
437  *    notice, this list of conditions and the following disclaimer.
438  * 2. Redistributions in binary form must reproduce the above copyright
439  *    notice, this list of conditions and the following disclaimer in the
440  *    documentation and/or other materials provided with the distribution.
441  * 4. Neither the name of the University nor the names of its contributors
442  *    may be used to endorse or promote products derived from this software
443  *    without specific prior written permission.
444  *
445  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
446  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
447  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
448  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
449  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
450  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
451  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
452  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
453  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
454  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
455  * SUCH DAMAGE.
456  *
457  *	from: Utah $Hdr: clock.c 1.18 91/01/21$
458  *	from: @(#)clock.c	8.2 (Berkeley) 1/12/94
459  *	from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp
460  *	and
461  *	from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04
462  */
463 
464 #include <sys/clock.h>
465 
466 /*
467  * Generic routines to convert between a POSIX date
468  * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec
469  * Derived from NetBSD arch/hp300/hp300/clock.c
470  */
471 
472 #define	FEBRUARY	2
473 #define	days_in_year(y)		(leapyear(y) ? 366 : 365)
474 #define	days_in_month(y, m) \
475 	(month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0))
476 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */
477 #define	day_of_week(days)	(((days) + 4) % 7)
478 
479 static const int month_days[12] = {
480 	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
481 };
482 
483 
484 /*
485  * This inline avoids some unnecessary modulo operations
486  * as compared with the usual macro:
487  *   ( ((year % 4) == 0 &&
488  *      (year % 100) != 0) ||
489  *     ((year % 400) == 0) )
490  * It is otherwise equivalent.
491  */
492 static int
493 leapyear(int year)
494 {
495 	int rv = 0;
496 
497 	if ((year & 3) == 0) {
498 		rv = 1;
499 		if ((year % 100) == 0) {
500 			rv = 0;
501 			if ((year % 400) == 0)
502 				rv = 1;
503 		}
504 	}
505 	return (rv);
506 }
507 
508 int
509 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts)
510 {
511 	int i, year, days;
512 
513 	year = ct->year;
514 
515 #ifdef __FreeBSD__
516 	if (ct_debug) {
517 		printf("ct_to_ts(");
518 		print_ct(ct);
519 		printf(")");
520 	}
521 #endif
522 
523 	/* Sanity checks. */
524 	if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 ||
525 	    ct->day > days_in_month(year, ct->mon) ||
526 	    ct->hour > 23 || ct->min > 59 || ct->sec > 59 ||
527 	    (sizeof (time_t) == 4 && year > 2037)) {	/* time_t overflow */
528 #ifdef __FreeBSD__
529 		if (ct_debug)
530 			printf(" = EINVAL\n");
531 #endif
532 		return (EINVAL);
533 	}
534 
535 	/*
536 	 * Compute days since start of time
537 	 * First from years, then from months.
538 	 */
539 	days = 0;
540 	for (i = POSIX_BASE_YEAR; i < year; i++)
541 		days += days_in_year(i);
542 
543 	/* Months */
544 	for (i = 1; i < ct->mon; i++)
545 		days += days_in_month(year, i);
546 	days += (ct->day - 1);
547 
548 	ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 +
549 	    ct->sec;
550 	ts->tv_nsec = ct->nsec;
551 
552 #ifdef __FreeBSD__
553 	if (ct_debug)
554 		printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec);
555 #endif
556 	return (0);
557 }
558 
559 void
560 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct)
561 {
562 	int i, year, days;
563 	time_t rsec;	/* remainder seconds */
564 	time_t secs;
565 
566 	secs = ts->tv_sec;
567 	days = secs / SECDAY;
568 	rsec = secs % SECDAY;
569 
570 	ct->dow = day_of_week(days);
571 
572 	/* Subtract out whole years, counting them in i. */
573 	for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++)
574 		days -= days_in_year(year);
575 	ct->year = year;
576 
577 	/* Subtract out whole months, counting them in i. */
578 	for (i = 1; days >= days_in_month(year, i); i++)
579 		days -= days_in_month(year, i);
580 	ct->mon = i;
581 
582 	/* Days are what is left over (+1) from all that. */
583 	ct->day = days + 1;
584 
585 	/* Hours, minutes, seconds are easy */
586 	ct->hour = rsec / 3600;
587 	rsec = rsec % 3600;
588 	ct->min  = rsec / 60;
589 	rsec = rsec % 60;
590 	ct->sec  = rsec;
591 	ct->nsec = ts->tv_nsec;
592 #ifdef __FreeBSD__
593 	if (ct_debug) {
594 		printf("ts_to_ct(%ld.%09ld) = ",
595 		    (long)ts->tv_sec, (long)ts->tv_nsec);
596 		print_ct(ct);
597 		printf("\n");
598 	}
599 #endif
600 }
601 
602 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */
603 uint64_t
604 rdtsc_offset(void)
605 {
606 	/*
607 	 * The timestamp logic will decide if a delta need be applied to the
608 	 * unscaled hrtime reading (effectively rdtsc), but we do require it be
609 	 * backed by the TSC itself.
610 	 */
611 	extern hrtime_t (*gethrtimeunscaledf)(void);
612 	extern hrtime_t tsc_gethrtimeunscaled(void);
613 	extern hrtime_t tsc_gethrtimeunscaled_delta(void);
614 
615 	ASSERT(*gethrtimeunscaledf == tsc_gethrtimeunscaled ||
616 	    *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta);
617 	return ((uint64_t)gethrtimeunscaledf());
618 }
619