xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_sol_glue.c (revision 9164a50bf932130cbb5097a16f6986873ce0e6e5)
1 /*
2  * Copyright (c) 2004 Poul-Henning Kamp
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $
27  */
28 /*
29  * This file and its contents are supplied under the terms of the
30  * Common Development and Distribution License ("CDDL"), version 1.0.
31  * You may only use this file in accordance with the terms of version
32  * 1.0 of the CDDL.
33  *
34  * A full copy of the text of the CDDL should have accompanied this
35  * source.  A copy of the CDDL is also available via the Internet at
36  * http://www.illumos.org/license/CDDL.
37  *
38  * Copyright 2014 Pluribus Networks Inc.
39  * Copyright 2019 Joyent, Inc.
40  * Copyright 2020 Oxide Computer Company
41  */
42 
43 #include <sys/types.h>
44 #include <sys/archsystm.h>
45 #include <sys/cpuset.h>
46 #include <sys/fp.h>
47 #include <sys/kmem.h>
48 #include <sys/queue.h>
49 #include <sys/spl.h>
50 #include <sys/systm.h>
51 #include <sys/ddidmareq.h>
52 #include <sys/id_space.h>
53 #include <sys/psm_defs.h>
54 #include <sys/smp_impldefs.h>
55 #include <sys/modhash.h>
56 #include <sys/hma.h>
57 
58 #include <sys/x86_archext.h>
59 
60 #include <machine/cpufunc.h>
61 #include <machine/md_var.h>
62 #include <machine/specialreg.h>
63 #include <machine/vmm.h>
64 #include <machine/vmparam.h>
65 #include <sys/vmm_impl.h>
66 #include <sys/kernel.h>
67 
68 #include <vm/as.h>
69 #include <vm/seg_kmem.h>
70 
71 
72 static void vmm_tsc_init(void);
73 
74 SET_DECLARE(sysinit_set, struct sysinit);
75 
76 void
77 sysinit(void)
78 {
79 	struct sysinit **si;
80 
81 	SET_FOREACH(si, sysinit_set)
82 		(*si)->func((*si)->data);
83 }
84 
85 void
86 invalidate_cache_all(void)
87 {
88 	cpuset_t cpuset;
89 
90 	kpreempt_disable();
91 	cpuset_all_but(&cpuset, CPU->cpu_id);
92 	xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL,
93 	    CPUSET2BV(cpuset), (xc_func_t)invalidate_cache);
94 	invalidate_cache();
95 	kpreempt_enable();
96 }
97 
98 vm_paddr_t
99 vtophys(void *va)
100 {
101 	pfn_t	pfn;
102 
103 	/*
104 	 * Since hat_getpfnum() may block on an htable mutex, this is not at
105 	 * all safe to run from a critical_enter/kpreempt_disable context.
106 	 * The FreeBSD analog does not have the same locking constraints, so
107 	 * close attention must be paid wherever this is called.
108 	 */
109 	ASSERT(curthread->t_preempt == 0);
110 
111 	pfn = hat_getpfnum(kas.a_hat, (caddr_t)va);
112 	ASSERT(pfn != PFN_INVALID);
113 	return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK);
114 }
115 
116 int
117 cpusetobj_ffs(const cpuset_t *set)
118 {
119 	uint_t large, small;
120 
121 	/*
122 	 * Rather than reaching into the cpuset_t ourselves, leave that task to
123 	 * cpuset_bounds().  The simplicity is worth the extra wasted work to
124 	 * find the upper bound.
125 	 */
126 	cpuset_bounds(set, &small, &large);
127 
128 	if (small == CPUSET_NOTINSET) {
129 		/* The FreeBSD version returns 0 if it find nothing */
130 		return (0);
131 	}
132 
133 	ASSERT3U(small, <=, INT_MAX);
134 
135 	/* Least significant bit index starts at 1 for valid results */
136 	return (small + 1);
137 }
138 
139 struct vmm_ptp_item {
140 	void *vpi_vaddr;
141 };
142 static kmutex_t vmm_ptp_lock;
143 
144 static mod_hash_t *vmm_ptp_hash;
145 uint_t vmm_ptp_hash_nchains = 16381;
146 uint_t vmm_ptp_hash_size = PAGESIZE;
147 
148 static void
149 vmm_ptp_hash_valdtor(mod_hash_val_t val)
150 {
151 	struct vmm_ptp_item *i = (struct vmm_ptp_item *)val;
152 
153 	kmem_free(i->vpi_vaddr, PAGE_SIZE);
154 	kmem_free(i, sizeof (*i));
155 }
156 
157 static void
158 vmm_ptp_init(void)
159 {
160 	vmm_ptp_hash = mod_hash_create_ptrhash("vmm_ptp_hash",
161 	    vmm_ptp_hash_nchains, vmm_ptp_hash_valdtor, vmm_ptp_hash_size);
162 
163 	VERIFY(vmm_ptp_hash != NULL);
164 }
165 
166 static uint_t
167 vmm_ptp_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused)
168 {
169 	struct vmm_ptp_item *i = (struct vmm_ptp_item *)val;
170 
171 	cmn_err(CE_PANIC, "!vmm_ptp_check: hash not empty: %p", i->vpi_vaddr);
172 
173 	return (MH_WALK_TERMINATE);
174 }
175 
176 static void
177 vmm_ptp_cleanup(void)
178 {
179 	mod_hash_walk(vmm_ptp_hash, vmm_ptp_check, NULL);
180 	mod_hash_destroy_ptrhash(vmm_ptp_hash);
181 }
182 
183 /*
184  * The logic in VT-d uses both kernel-virtual and direct-mapped addresses when
185  * freeing PTP pages.  Until the consuming code is improved to better track the
186  * pages it allocates, we keep the kernel-virtual addresses to those pages in a
187  * hash table for when they are freed.
188  */
189 void *
190 vmm_ptp_alloc(void)
191 {
192 	void *p;
193 	struct vmm_ptp_item *i;
194 
195 	p = kmem_zalloc(PAGE_SIZE, KM_SLEEP);
196 	i = kmem_alloc(sizeof (struct vmm_ptp_item), KM_SLEEP);
197 	i->vpi_vaddr = p;
198 
199 	mutex_enter(&vmm_ptp_lock);
200 	VERIFY(mod_hash_insert(vmm_ptp_hash,
201 	    (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0);
202 	mutex_exit(&vmm_ptp_lock);
203 
204 	return (p);
205 }
206 
207 void
208 vmm_ptp_free(void *addr)
209 {
210 	mutex_enter(&vmm_ptp_lock);
211 	VERIFY(mod_hash_destroy(vmm_ptp_hash,
212 	    (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0);
213 	mutex_exit(&vmm_ptp_lock);
214 }
215 
216 /* Reach into i86pc/os/ddi_impl.c for these */
217 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int);
218 extern void contig_free(void *, size_t);
219 
220 void *
221 vmm_contig_alloc(size_t size)
222 {
223 	ddi_dma_attr_t attr = {
224 		/* Using fastboot_dma_attr as a guide... */
225 		.dma_attr_version	= DMA_ATTR_V0,
226 		.dma_attr_addr_lo	= 0,
227 		.dma_attr_addr_hi	= ~0UL,
228 		.dma_attr_count_max	= 0x00000000FFFFFFFFULL,
229 		.dma_attr_align		= PAGE_SIZE,
230 		.dma_attr_burstsizes	= 1,
231 		.dma_attr_minxfer	= 1,
232 		.dma_attr_maxxfer	= 0x00000000FFFFFFFFULL,
233 		.dma_attr_seg		= 0x00000000FFFFFFFFULL, /* any */
234 		.dma_attr_sgllen	= 1,
235 		.dma_attr_granular	= PAGE_SIZE,
236 		.dma_attr_flags		= 0,
237 	};
238 	void *res;
239 
240 	res = contig_alloc(size, &attr, PAGE_SIZE, 1);
241 	if (res != NULL) {
242 		bzero(res, size);
243 	}
244 
245 	return (res);
246 }
247 
248 void
249 vmm_contig_free(void *addr, size_t size)
250 {
251 	contig_free(addr, size);
252 }
253 
254 void
255 critical_enter(void)
256 {
257 	kpreempt_disable();
258 }
259 
260 void
261 critical_exit(void)
262 {
263 	kpreempt_enable();
264 }
265 
266 
267 static void
268 vmm_glue_callout_handler(void *arg)
269 {
270 	struct callout *c = arg;
271 
272 	if (callout_active(c)) {
273 		/*
274 		 * Record the handler fire time so that callout_pending() is
275 		 * able to detect if the callout becomes rescheduled during the
276 		 * course of the handler.
277 		 */
278 		c->c_fired = gethrtime();
279 		(c->c_func)(c->c_arg);
280 	}
281 }
282 
283 void
284 vmm_glue_callout_init(struct callout *c, int mpsafe)
285 {
286 	cyc_handler_t	hdlr;
287 	cyc_time_t	when;
288 
289 	hdlr.cyh_level = CY_LOW_LEVEL;
290 	hdlr.cyh_func = vmm_glue_callout_handler;
291 	hdlr.cyh_arg = c;
292 	when.cyt_when = CY_INFINITY;
293 	when.cyt_interval = CY_INFINITY;
294 	bzero(c, sizeof (*c));
295 
296 	mutex_enter(&cpu_lock);
297 	c->c_cyc_id = cyclic_add(&hdlr, &when);
298 	mutex_exit(&cpu_lock);
299 }
300 
301 void
302 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *),
303     void *arg, int flags)
304 {
305 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
306 
307 	if ((flags & C_ABSOLUTE) == 0) {
308 		target += gethrtime();
309 	}
310 
311 	c->c_func = func;
312 	c->c_arg = arg;
313 	c->c_target = target;
314 	(void) cyclic_reprogram(c->c_cyc_id, target);
315 }
316 
317 void
318 vmm_glue_callout_stop(struct callout *c)
319 {
320 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
321 
322 	c->c_target = 0;
323 	(void) cyclic_reprogram(c->c_cyc_id, CY_INFINITY);
324 }
325 
326 void
327 vmm_glue_callout_drain(struct callout *c)
328 {
329 	ASSERT(c->c_cyc_id != CYCLIC_NONE);
330 
331 	c->c_target = 0;
332 	mutex_enter(&cpu_lock);
333 	cyclic_remove(c->c_cyc_id);
334 	c->c_cyc_id = CYCLIC_NONE;
335 	mutex_exit(&cpu_lock);
336 }
337 
338 void
339 vmm_glue_callout_localize(struct callout *c)
340 {
341 	mutex_enter(&cpu_lock);
342 	cyclic_move_here(c->c_cyc_id);
343 	mutex_exit(&cpu_lock);
344 }
345 
346 /*
347  * Given an interval (in ns) and a frequency (in hz), calculate the number of
348  * "ticks" at that frequency which cover the interval.
349  */
350 uint64_t
351 hrt_freq_count(hrtime_t interval, uint32_t freq)
352 {
353 	ASSERT3S(interval, >=, 0);
354 	const uint64_t sec = interval / NANOSEC;
355 	const uint64_t nsec = interval % NANOSEC;
356 
357 	return ((sec * freq) + ((nsec * freq) / NANOSEC));
358 }
359 
360 /*
361  * Given a frequency (in hz) and number of "ticks", calculate the interval
362  * (in ns) which would be covered by those ticks.
363  */
364 hrtime_t
365 hrt_freq_interval(uint32_t freq, uint64_t count)
366 {
367 	const uint64_t sec = count / freq;
368 	const uint64_t frac = count % freq;
369 
370 	return ((NANOSEC * sec) + ((frac * NANOSEC) / freq));
371 }
372 
373 
374 uint_t	cpu_high;		/* Highest arg to CPUID */
375 uint_t	cpu_exthigh;		/* Highest arg to extended CPUID */
376 uint_t	cpu_id;			/* Stepping ID */
377 char	cpu_vendor[20];		/* CPU Origin code */
378 
379 static void
380 vmm_cpuid_init(void)
381 {
382 	uint_t regs[4];
383 
384 	do_cpuid(0, regs);
385 	cpu_high = regs[0];
386 	((uint_t *)&cpu_vendor)[0] = regs[1];
387 	((uint_t *)&cpu_vendor)[1] = regs[3];
388 	((uint_t *)&cpu_vendor)[2] = regs[2];
389 	cpu_vendor[12] = '\0';
390 
391 	do_cpuid(1, regs);
392 	cpu_id = regs[0];
393 
394 	do_cpuid(0x80000000, regs);
395 	cpu_exthigh = regs[0];
396 }
397 
398 void
399 vmm_sol_glue_init(void)
400 {
401 	vmm_ptp_init();
402 	vmm_cpuid_init();
403 	vmm_tsc_init();
404 }
405 
406 void
407 vmm_sol_glue_cleanup(void)
408 {
409 	vmm_ptp_cleanup();
410 }
411 
412 
413 /* From FreeBSD's sys/kern/subr_clock.c */
414 
415 /*-
416  * Copyright (c) 1988 University of Utah.
417  * Copyright (c) 1982, 1990, 1993
418  *	The Regents of the University of California.  All rights reserved.
419  *
420  * This code is derived from software contributed to Berkeley by
421  * the Systems Programming Group of the University of Utah Computer
422  * Science Department.
423  *
424  * Redistribution and use in source and binary forms, with or without
425  * modification, are permitted provided that the following conditions
426  * are met:
427  * 1. Redistributions of source code must retain the above copyright
428  *    notice, this list of conditions and the following disclaimer.
429  * 2. Redistributions in binary form must reproduce the above copyright
430  *    notice, this list of conditions and the following disclaimer in the
431  *    documentation and/or other materials provided with the distribution.
432  * 4. Neither the name of the University nor the names of its contributors
433  *    may be used to endorse or promote products derived from this software
434  *    without specific prior written permission.
435  *
436  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
437  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
438  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
439  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
440  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
441  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
442  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
443  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
444  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
445  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
446  * SUCH DAMAGE.
447  *
448  *	from: Utah $Hdr: clock.c 1.18 91/01/21$
449  *	from: @(#)clock.c	8.2 (Berkeley) 1/12/94
450  *	from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp
451  *	and
452  *	from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04
453  */
454 
455 #include <sys/clock.h>
456 
457 /*
458  * Generic routines to convert between a POSIX date
459  * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec
460  * Derived from NetBSD arch/hp300/hp300/clock.c
461  */
462 
463 #define	FEBRUARY	2
464 #define	days_in_year(y)		(leapyear(y) ? 366 : 365)
465 #define	days_in_month(y, m) \
466 	(month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0))
467 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */
468 #define	day_of_week(days)	(((days) + 4) % 7)
469 
470 static const int month_days[12] = {
471 	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
472 };
473 
474 
475 /*
476  * This inline avoids some unnecessary modulo operations
477  * as compared with the usual macro:
478  *   ( ((year % 4) == 0 &&
479  *      (year % 100) != 0) ||
480  *     ((year % 400) == 0) )
481  * It is otherwise equivalent.
482  */
483 static int
484 leapyear(int year)
485 {
486 	int rv = 0;
487 
488 	if ((year & 3) == 0) {
489 		rv = 1;
490 		if ((year % 100) == 0) {
491 			rv = 0;
492 			if ((year % 400) == 0)
493 				rv = 1;
494 		}
495 	}
496 	return (rv);
497 }
498 
499 int
500 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts)
501 {
502 	int i, year, days;
503 
504 	year = ct->year;
505 
506 #ifdef __FreeBSD__
507 	if (ct_debug) {
508 		printf("ct_to_ts(");
509 		print_ct(ct);
510 		printf(")");
511 	}
512 #endif
513 
514 	/* Sanity checks. */
515 	if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 ||
516 	    ct->day > days_in_month(year, ct->mon) ||
517 	    ct->hour > 23 || ct->min > 59 || ct->sec > 59 ||
518 	    (sizeof (time_t) == 4 && year > 2037)) {	/* time_t overflow */
519 #ifdef __FreeBSD__
520 		if (ct_debug)
521 			printf(" = EINVAL\n");
522 #endif
523 		return (EINVAL);
524 	}
525 
526 	/*
527 	 * Compute days since start of time
528 	 * First from years, then from months.
529 	 */
530 	days = 0;
531 	for (i = POSIX_BASE_YEAR; i < year; i++)
532 		days += days_in_year(i);
533 
534 	/* Months */
535 	for (i = 1; i < ct->mon; i++)
536 		days += days_in_month(year, i);
537 	days += (ct->day - 1);
538 
539 	ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 +
540 	    ct->sec;
541 	ts->tv_nsec = ct->nsec;
542 
543 #ifdef __FreeBSD__
544 	if (ct_debug)
545 		printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec);
546 #endif
547 	return (0);
548 }
549 
550 void
551 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct)
552 {
553 	int i, year, days;
554 	time_t rsec;	/* remainder seconds */
555 	time_t secs;
556 
557 	secs = ts->tv_sec;
558 	days = secs / SECDAY;
559 	rsec = secs % SECDAY;
560 
561 	ct->dow = day_of_week(days);
562 
563 	/* Subtract out whole years, counting them in i. */
564 	for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++)
565 		days -= days_in_year(year);
566 	ct->year = year;
567 
568 	/* Subtract out whole months, counting them in i. */
569 	for (i = 1; days >= days_in_month(year, i); i++)
570 		days -= days_in_month(year, i);
571 	ct->mon = i;
572 
573 	/* Days are what is left over (+1) from all that. */
574 	ct->day = days + 1;
575 
576 	/* Hours, minutes, seconds are easy */
577 	ct->hour = rsec / 3600;
578 	rsec = rsec % 3600;
579 	ct->min  = rsec / 60;
580 	rsec = rsec % 60;
581 	ct->sec  = rsec;
582 	ct->nsec = ts->tv_nsec;
583 #ifdef __FreeBSD__
584 	if (ct_debug) {
585 		printf("ts_to_ct(%ld.%09ld) = ",
586 		    (long)ts->tv_sec, (long)ts->tv_nsec);
587 		print_ct(ct);
588 		printf("\n");
589 	}
590 #endif
591 }
592 
593 /* Do the host CPU TSCs require offsets be applied for proper sync? */
594 static bool vmm_host_tsc_offset;
595 
596 static void
597 vmm_tsc_init(void)
598 {
599 	/*
600 	 * The timestamp logic will decide if a delta need be applied to the
601 	 * unscaled hrtime reading (effectively rdtsc), but we do require it be
602 	 * backed by the TSC itself.
603 	 */
604 	extern hrtime_t (*gethrtimeunscaledf)(void);
605 	extern hrtime_t tsc_gethrtimeunscaled(void);
606 	extern hrtime_t tsc_gethrtimeunscaled_delta(void);
607 
608 	VERIFY(*gethrtimeunscaledf == tsc_gethrtimeunscaled ||
609 	    *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta);
610 
611 	/*
612 	 * If a delta is being applied to the TSC on a per-host-CPU basis,
613 	 * expose that delta via vmm_host_tsc_delta().
614 	 */
615 	vmm_host_tsc_offset =
616 	    (*gethrtimeunscaledf == tsc_gethrtimeunscaled_delta);
617 
618 }
619 
620 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */
621 uint64_t
622 rdtsc_offset(void)
623 {
624 	return ((uint64_t)gethrtimeunscaledf());
625 }
626 
627 /*
628  * The delta (if any) which needs to be applied to the TSC of this host CPU to
629  * bring it in sync with the others.
630  */
631 uint64_t
632 vmm_host_tsc_delta(void)
633 {
634 	if (vmm_host_tsc_offset) {
635 		extern hrtime_t tsc_gethrtime_tick_delta(void);
636 		return (tsc_gethrtime_tick_delta());
637 	} else {
638 		return (0);
639 	}
640 }
641