xref: /illumos-gate/usr/src/uts/intel/io/vmm/amd/amdvi_hw.c (revision 7a6d80f1660abd4755c68cbd094d4a914681d26e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/malloc.h>
38 #include <sys/pcpu.h>
39 #include <sys/rman.h>
40 #include <sys/sysctl.h>
41 
42 #include <dev/pci/pcivar.h>
43 #include <dev/pci/pcireg.h>
44 
45 #include <machine/resource.h>
46 #include <machine/vmm.h>
47 #include <machine/vmparam.h>
48 #include <machine/pci_cfgreg.h>
49 
50 #include "ivhd_if.h"
51 #include "pcib_if.h"
52 
53 #include "io/iommu.h"
54 #include "amdvi_priv.h"
55 
56 SYSCTL_DECL(_hw_vmm);
57 SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
58     NULL);
59 
60 #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
61 #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
62 
63 /* Print RID or device ID in PCI string format. */
64 #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
65 
66 static void amdvi_dump_cmds(struct amdvi_softc *softc, int count);
67 static void amdvi_print_dev_cap(struct amdvi_softc *softc);
68 
69 MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
70 
71 extern device_t *ivhd_devs;
72 
73 extern int ivhd_count;
74 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
75     0, NULL);
76 
77 static int amdvi_enable_user = 0;
78 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
79     &amdvi_enable_user, 0, NULL);
80 TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
81 
82 #ifdef AMDVI_ATS_ENABLE
83 /* XXX: ATS is not tested. */
84 static int amdvi_enable_iotlb = 1;
85 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
86     &amdvi_enable_iotlb, 0, NULL);
87 TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
88 #endif
89 
90 static int amdvi_host_ptp = 1;	/* Use page tables for host. */
91 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
92     &amdvi_host_ptp, 0, NULL);
93 TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
94 
95 /* Page table level used <= supported by h/w[v1=7]. */
96 int amdvi_ptp_level = 4;
97 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
98     &amdvi_ptp_level, 0, NULL);
99 TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
100 
101 /* Disable fault event reporting. */
102 static int amdvi_disable_io_fault = 0;
103 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
104     &amdvi_disable_io_fault, 0, NULL);
105 TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
106 
107 static uint32_t amdvi_dom_id = 0;	/* 0 is reserved for host. */
108 SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
109     &amdvi_dom_id, 0, NULL);
110 /*
111  * Device table entry.
112  * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
113  *	= 256 * 2 * PAGE_SIZE.
114  */
115 static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
116 CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
117 CTASSERT(sizeof(amdvi_dte) == 0x200000);
118 
119 static SLIST_HEAD (, amdvi_domain) dom_head;
120 
121 static inline uint32_t
122 amdvi_pci_read(struct amdvi_softc *softc, int off)
123 {
124 
125 	return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
126 	    PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
127 	    off, 4));
128 }
129 
130 #ifdef AMDVI_ATS_ENABLE
131 /* XXX: Should be in pci.c */
132 /*
133  * Check if device has ATS capability and its enabled.
134  * If ATS is absent or disabled, return (-1), otherwise ATS
135  * queue length.
136  */
137 static int
138 amdvi_find_ats_qlen(uint16_t devid)
139 {
140 	device_t dev;
141 	uint32_t off, cap;
142 	int qlen = -1;
143 
144 	dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
145 			   PCI_RID2FUNC(devid));
146 
147 	if (!dev) {
148 		return (-1);
149 	}
150 #define PCIM_ATS_EN	BIT(31)
151 
152 	if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
153 		cap = pci_read_config(dev, off + 4, 4);
154 		qlen = (cap & 0x1F);
155 		qlen = qlen ? qlen : 32;
156 		printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
157 		       RID2PCI_STR(devid),
158 		       (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
159 		       qlen);
160 		qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
161 	}
162 
163 	return (qlen);
164 }
165 
166 /*
167  * Check if an endpoint device support device IOTLB or ATS.
168  */
169 static inline bool
170 amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
171 {
172 	struct ivhd_dev_cfg *cfg;
173 	int qlen, i;
174 	bool pci_ats, ivhd_ats;
175 
176 	qlen = amdvi_find_ats_qlen(devid);
177 	if (qlen < 0)
178 		return (false);
179 
180 	KASSERT(softc, ("softc is NULL"));
181 	cfg = softc->dev_cfg;
182 
183 	ivhd_ats = false;
184 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
185 		if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
186 			ivhd_ats = cfg->enable_ats;
187 			break;
188 		}
189 		cfg++;
190 	}
191 
192 	pci_ats = (qlen < 0) ? false : true;
193 	if (pci_ats != ivhd_ats)
194 		device_printf(softc->dev,
195 		    "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
196 		    "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
197 
198 	/* Ignore IVRS setting and respect PCI setting. */
199 	return (pci_ats);
200 }
201 #endif
202 
203 /* Enable IOTLB support for IOMMU if its supported. */
204 static inline void
205 amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
206 {
207 #ifndef AMDVI_ATS_ENABLE
208 	softc->iotlb = false;
209 #else
210 	bool supported;
211 
212 	supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
213 
214 	if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
215 		if (!supported)
216 			device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
217 
218 		if (supported && !amdvi_enable_iotlb) {
219 			device_printf(softc->dev, "IOTLB disabled by user.\n");
220 			supported = false;
221 		}
222 	} else
223 		supported = false;
224 
225 	softc->iotlb = supported;
226 
227 #endif
228 }
229 
230 static int
231 amdvi_init_cmd(struct amdvi_softc *softc)
232 {
233 	struct amdvi_ctrl *ctrl = softc->ctrl;
234 
235 	ctrl->cmd.len = 8;	/* Use 256 command buffer entries. */
236 	softc->cmd_max = 1 << ctrl->cmd.len;
237 
238 	softc->cmd = malloc(sizeof(struct amdvi_cmd) *
239 	    softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
240 
241 	if ((uintptr_t)softc->cmd & PAGE_MASK)
242 		panic("AMDVi: Command buffer not aligned on page boundary.");
243 
244 	ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
245 	/*
246 	 * XXX: Reset the h/w pointers in case IOMMU is restarting,
247 	 * h/w doesn't clear these pointers based on empirical data.
248 	 */
249 	ctrl->cmd_tail = 0;
250 	ctrl->cmd_head = 0;
251 
252 	return (0);
253 }
254 
255 /*
256  * Note: Update tail pointer after we have written the command since tail
257  * pointer update cause h/w to execute new commands, see section 3.3
258  * of AMD IOMMU spec ver 2.0.
259  */
260 /* Get the command tail pointer w/o updating it. */
261 static struct amdvi_cmd *
262 amdvi_get_cmd_tail(struct amdvi_softc *softc)
263 {
264 	struct amdvi_ctrl *ctrl;
265 	struct amdvi_cmd *tail;
266 
267 	KASSERT(softc, ("softc is NULL"));
268 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
269 
270 	ctrl = softc->ctrl;
271 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
272 
273 	tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
274 	    ctrl->cmd_tail);
275 
276 	return (tail);
277 }
278 
279 /*
280  * Update the command tail pointer which will start command execution.
281  */
282 static void
283 amdvi_update_cmd_tail(struct amdvi_softc *softc)
284 {
285 	struct amdvi_ctrl *ctrl;
286 	int size;
287 
288 	size = sizeof(struct amdvi_cmd);
289 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
290 
291 	ctrl = softc->ctrl;
292 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
293 
294 	ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
295 	softc->total_cmd++;
296 
297 #ifdef AMDVI_DEBUG_CMD
298 	device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
299 	    ctrl->cmd_tail,
300 	    ctrl->cmd_head);
301 #endif
302 
303 }
304 
305 /*
306  * Various commands supported by IOMMU.
307  */
308 
309 /* Completion wait command. */
310 static void
311 amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
312 {
313 	struct amdvi_cmd *cmd;
314 	uint64_t pa;
315 
316 	cmd = amdvi_get_cmd_tail(softc);
317 	KASSERT(cmd != NULL, ("Cmd is NULL"));
318 
319 	pa = vtophys(&softc->cmp_data);
320 	cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
321 	cmd->word0 = (pa & 0xFFFFFFF8) | AMDVI_CMP_WAIT_STORE;
322 	cmd->word1 = (pa >> 32) & 0xFFFFF;
323 	cmd->addr = data;
324 
325 	amdvi_update_cmd_tail(softc);
326 }
327 
328 /* Invalidate device table entry. */
329 static void
330 amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
331 {
332 	struct amdvi_cmd *cmd;
333 
334 	cmd = amdvi_get_cmd_tail(softc);
335 	KASSERT(cmd != NULL, ("Cmd is NULL"));
336 	cmd->opcode = AMDVI_INVD_DTE_OPCODE;
337 	cmd->word0 = devid;
338 	amdvi_update_cmd_tail(softc);
339 #ifdef AMDVI_DEBUG_CMD
340 	device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
341 #endif
342 }
343 
344 /* Invalidate IOMMU page, use for invalidation of domain. */
345 static void
346 amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
347 			  uint64_t addr, bool guest_nested,
348 			  bool pde, bool page)
349 {
350 	struct amdvi_cmd *cmd;
351 
352 	cmd = amdvi_get_cmd_tail(softc);
353 	KASSERT(cmd != NULL, ("Cmd is NULL"));
354 
355 	cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
356 	cmd->word1 = domain_id;
357 	/*
358 	 * Invalidate all addresses for this domain.
359 	 */
360 	cmd->addr = addr;
361 	cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
362 	cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
363 
364 	amdvi_update_cmd_tail(softc);
365 }
366 
367 #ifdef AMDVI_ATS_ENABLE
368 /* Invalidate device IOTLB. */
369 static void
370 amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
371 {
372 	struct amdvi_cmd *cmd;
373 	int qlen;
374 
375 	if (!softc->iotlb)
376 		return;
377 
378 	qlen = amdvi_find_ats_qlen(devid);
379 	if (qlen < 0) {
380 		panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
381 		      qlen, RID2PCI_STR(devid));
382 	}
383 	cmd = amdvi_get_cmd_tail(softc);
384 	KASSERT(cmd != NULL, ("Cmd is NULL"));
385 
386 #ifdef AMDVI_DEBUG_CMD
387 	device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
388 		      " Qlen:%d\n", devid, qlen);
389 #endif
390 	cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
391 	cmd->word0 = devid;
392 	cmd->word1 = qlen;
393 	cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
394 		AMDVI_INVD_IOTLB_S;
395 	amdvi_update_cmd_tail(softc);
396 }
397 #endif
398 
399 #ifdef notyet				/* For Interrupt Remap. */
400 static void
401 amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
402 		       uint16_t devid)
403 {
404 	struct amdvi_cmd *cmd;
405 
406 	cmd = amdvi_get_cmd_tail(softc);
407 	KASSERT(cmd != NULL, ("Cmd is NULL"));
408 	cmd->opcode = AMDVI_INVD_INTR_OPCODE;
409 	cmd->word0 = devid;
410 	amdvi_update_cmd_tail(softc);
411 #ifdef AMDVI_DEBUG_CMD
412 	device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
413 #endif
414 }
415 #endif
416 
417 /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
418 static void
419 amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
420 {
421 	struct amdvi_cmd *cmd __diagused;
422 
423 	cmd = amdvi_get_cmd_tail(softc);
424 	KASSERT(cmd != NULL, ("Cmd is NULL"));
425 
426 	/*
427 	 * See section 3.3.3 of IOMMU spec rev 2.0, software note
428 	 * for invalidating domain.
429 	 */
430 	amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
431 				false, true, true);
432 
433 #ifdef AMDVI_DEBUG_CMD
434 	device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
435 
436 #endif
437 }
438 
439 static	bool
440 amdvi_cmp_wait(struct amdvi_softc *softc)
441 {
442 #ifdef AMDVI_DEBUG_CMD
443 	struct amdvi_ctrl *ctrl = softc->ctrl;
444 #endif
445 	const uint64_t VERIFY = 0xA5A5;
446 	volatile uint64_t *read;
447 	int i;
448 	bool status;
449 
450 	read = &softc->cmp_data;
451 	*read = 0;
452 	amdvi_cmd_cmp(softc, VERIFY);
453 	/* Wait for h/w to update completion data. */
454 	for (i = 0; i < 100 && (*read != VERIFY); i++) {
455 		DELAY(1000);		/* 1 ms */
456 	}
457 	status = (VERIFY == softc->cmp_data) ? true : false;
458 
459 #ifdef AMDVI_DEBUG_CMD
460 	if (status)
461 		device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
462 			      "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
463 			      ctrl->cmd_head, loop);
464 #endif
465 	return (status);
466 }
467 
468 static void
469 amdvi_wait(struct amdvi_softc *softc)
470 {
471 	struct amdvi_ctrl *ctrl;
472 	int i;
473 
474 	KASSERT(softc, ("softc is NULL"));
475 
476 	ctrl = softc->ctrl;
477 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
478 	/* Don't wait if h/w is not enabled. */
479 	if ((ctrl->control & AMDVI_CTRL_EN) == 0)
480 		return;
481 
482 	for (i = 0; i < 10; i++) {
483 		if (amdvi_cmp_wait(softc))
484 			return;
485 	}
486 
487 	device_printf(softc->dev, "Error: completion failed"
488 		      " tail:0x%x, head:0x%x.\n",
489 		      ctrl->cmd_tail, ctrl->cmd_head);
490 	/* Dump the last command. */
491 	amdvi_dump_cmds(softc, 1);
492 }
493 
494 static void
495 amdvi_dump_cmds(struct amdvi_softc *softc, int count)
496 {
497 	struct amdvi_ctrl *ctrl;
498 	struct amdvi_cmd *cmd;
499 	int off, i;
500 
501 	ctrl = softc->ctrl;
502 	device_printf(softc->dev, "Dump last %d command(s):\n", count);
503 	/*
504 	 * If h/w is stuck in completion, it is the previous command,
505 	 * start dumping from previous command onward.
506 	 */
507 	off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
508 	    softc->cmd_max);
509 	for (i = 0; off != ctrl->cmd_tail && i < count; i++) {
510 		cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
511 		printf("  [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
512 		    " 0x%x 0x%lx\n", i, off, cmd->opcode,
513 		    cmd->word0, cmd->word1, cmd->addr);
514 		off = MOD_INC(off, sizeof(struct amdvi_cmd), softc->cmd_max);
515 	}
516 }
517 
518 static int
519 amdvi_init_event(struct amdvi_softc *softc)
520 {
521 	struct amdvi_ctrl *ctrl;
522 
523 	ctrl = softc->ctrl;
524 	ctrl->event.len = 8;
525 	softc->event_max = 1 << ctrl->event.len;
526 	softc->event = malloc(sizeof(struct amdvi_event) *
527 	    softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
528 	if ((uintptr_t)softc->event & PAGE_MASK) {
529 		device_printf(softc->dev, "Event buffer not aligned on page.");
530 		return (false);
531 	}
532 	ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
533 
534 	/* Reset the pointers. */
535 	ctrl->evt_head = 0;
536 	ctrl->evt_tail = 0;
537 
538 	return (0);
539 }
540 
541 static inline void
542 amdvi_decode_evt_flag(uint16_t flag)
543 {
544 
545 	flag &= AMDVI_EVENT_FLAG_MASK;
546 	printf(" 0x%b]\n", flag,
547 		"\020"
548 		"\001GN"
549 		"\002NX"
550 		"\003US"
551 		"\004I"
552 		"\005PR"
553 		"\006RW"
554 		"\007PE"
555 		"\010RZ"
556 		"\011TR"
557 		);
558 }
559 
560 /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
561 static inline void
562 amdvi_decode_evt_flag_type(uint8_t type)
563 {
564 
565 	switch (AMDVI_EVENT_FLAG_TYPE(type)) {
566 	case 0:
567 		printf("RSVD\n");
568 		break;
569 	case 1:
570 		printf("Master Abort\n");
571 		break;
572 	case 2:
573 		printf("Target Abort\n");
574 		break;
575 	case 3:
576 		printf("Data Err\n");
577 		break;
578 	default:
579 		break;
580 	}
581 }
582 
583 static void
584 amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
585     uint16_t flag)
586 {
587 
588 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
589 	    " Addr:0x%lx",
590 	    devid, domid, addr);
591 	amdvi_decode_evt_flag(flag);
592 }
593 
594 static void
595 amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
596     uint16_t flag)
597 {
598 
599 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
600 	    " Addr:0x%lx",
601 	    devid, domid, addr);
602 	amdvi_decode_evt_flag(flag);
603 }
604 
605 static void
606 amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
607     uint64_t addr, uint16_t flag)
608 {
609 
610 	printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
611 	    " Addr:0x%lx", devid, domid, addr);
612 	amdvi_decode_evt_flag(flag);
613 	amdvi_decode_evt_flag_type(flag);
614 }
615 
616 static void
617 amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
618     uint16_t flag)
619 {
620 
621 	printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
622 	    " Addr:0x%lx", devid, domid, addr);
623 	amdvi_decode_evt_flag(flag);
624 	amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
625 }
626 
627 static void
628 amdvi_decode_evt(struct amdvi_event *evt)
629 {
630 	struct amdvi_cmd *cmd;
631 
632 	switch (evt->opcode) {
633 	case AMDVI_EVENT_INVALID_DTE:
634 		amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
635 		    evt->addr, evt->flag);
636 		break;
637 
638 	case AMDVI_EVENT_PFAULT:
639 		amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
640 		    evt->addr, evt->flag);
641 		break;
642 
643 	case AMDVI_EVENT_DTE_HW_ERROR:
644 		amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
645 		    evt->addr, evt->flag);
646 		break;
647 
648 	case AMDVI_EVENT_PAGE_HW_ERROR:
649 		amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
650 		    evt->addr, evt->flag);
651 		break;
652 
653 	case AMDVI_EVENT_ILLEGAL_CMD:
654 		/* FALL THROUGH */
655 	case AMDVI_EVENT_CMD_HW_ERROR:
656 		printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
657 		    "ILLEGAL CMD" : "CMD HW ERR");
658 		cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
659 		printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
660 		    cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
661 		break;
662 
663 	case AMDVI_EVENT_IOTLB_TIMEOUT:
664 		printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n",
665 		    evt->devid, evt->addr);
666 		break;
667 
668 	case AMDVI_EVENT_INVALID_DTE_REQ:
669 		printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n",
670 		    evt->devid, evt->addr, evt->flag >> 9,
671 		    (evt->flag >> 8) & 1);
672 		break;
673 
674 	case AMDVI_EVENT_INVALID_PPR_REQ:
675 	case AMDVI_EVENT_COUNTER_ZERO:
676 		printf("AMD-Vi: v2 events.\n");
677 		break;
678 
679 	default:
680 		printf("Unsupported AMD-Vi event:%d\n", evt->opcode);
681 	}
682 }
683 
684 static void
685 amdvi_print_events(struct amdvi_softc *softc)
686 {
687 	struct amdvi_ctrl *ctrl;
688 	struct amdvi_event *event;
689 	int i, size;
690 
691 	ctrl = softc->ctrl;
692 	size = sizeof(struct amdvi_event);
693 	for (i = 0; i < softc->event_max; i++) {
694 		event = &softc->event[ctrl->evt_head / size];
695 		if (!event->opcode)
696 			break;
697 		device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
698 		    i, ctrl->evt_head, ctrl->evt_tail);
699 		amdvi_decode_evt(event);
700 		ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
701 		    softc->event_max);
702 	}
703 }
704 
705 static int
706 amdvi_init_dte(struct amdvi_softc *softc)
707 {
708 	struct amdvi_ctrl *ctrl;
709 
710 	ctrl = softc->ctrl;
711 	ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
712 	ctrl->dte.size = 0x1FF;		/* 2MB device table. */
713 
714 	return (0);
715 }
716 
717 /*
718  * Not all capabilities of IOMMU are available in ACPI IVHD flag
719  * or EFR entry, read directly from device.
720  */
721 static int
722 amdvi_print_pci_cap(device_t dev)
723 {
724 	struct amdvi_softc *softc;
725 	uint32_t off, cap;
726 
727 	softc = device_get_softc(dev);
728 	off = softc->cap_off;
729 
730 	/*
731 	 * Section 3.7.1 of IOMMU sepc rev 2.0.
732 	 * Read capability from device.
733 	 */
734 	cap = amdvi_pci_read(softc, off);
735 
736 	/* Make sure capability type[18:16] is 3. */
737 	KASSERT((((cap >> 16) & 0x7) == 0x3),
738 	    ("Not a IOMMU capability 0x%x@0x%x", cap, off));
739 
740 	softc->pci_cap = cap >> 24;
741 	device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
742 	    cap, off, softc->pci_cap,
743 	    "\20\1IOTLB\2HT\3NPCache\4EFR\5CapExt");
744 
745 	return (0);
746 }
747 
748 static void
749 amdvi_event_intr(void *arg)
750 {
751 	struct amdvi_softc *softc;
752 	struct amdvi_ctrl *ctrl;
753 
754 	softc = (struct amdvi_softc *)arg;
755 	ctrl = softc->ctrl;
756 	device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
757 	    " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
758 	    ctrl->status, ctrl->evt_head, ctrl->evt_tail);
759 	printf("  [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
760 	    softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
761 
762 	amdvi_print_events(softc);
763 	ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
764 }
765 
766 static void
767 amdvi_free_evt_intr_res(device_t dev)
768 {
769 
770 	struct amdvi_softc *softc;
771 	device_t mmio_dev;
772 
773 	softc = device_get_softc(dev);
774 	mmio_dev = softc->pci_dev;
775 
776 	IVHD_TEARDOWN_INTR(mmio_dev);
777 }
778 
779 static bool
780 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
781 {
782 	struct amdvi_ctrl *ctrl;
783 	device_t dev, mmio_dev;
784 	int err;
785 
786 	dev = softc->dev;
787 	mmio_dev = softc->pci_dev;
788 
789 	/* Clear interrupt status bits. */
790 	ctrl = softc->ctrl;
791 	ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
792 
793 	err = IVHD_SETUP_INTR(mmio_dev, amdvi_event_intr, softc, "fault");
794 	if (err)
795 		device_printf(dev, "Interrupt setup failed on %s\n",
796 		    device_get_nameunit(mmio_dev));
797 	return (err);
798 }
799 
800 static void
801 amdvi_print_dev_cap(struct amdvi_softc *softc)
802 {
803 	struct ivhd_dev_cfg *cfg;
804 	int i;
805 
806 	cfg = softc->dev_cfg;
807 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
808 		device_printf(softc->dev, "device [0x%x - 0x%x] "
809 		    "config:%b%s\n", cfg->start_id, cfg->end_id,
810 		    cfg->data,
811 		    "\020\001INIT\002ExtInt\003NMI"
812 		    "\007LINT0\010LINT1",
813 		    cfg->enable_ats ? "ATS enabled" : "");
814 		cfg++;
815 	}
816 }
817 
818 static int
819 amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
820 {
821 	struct amdvi_softc *softc;
822 	int result, type, error = 0;
823 
824 	softc = (struct amdvi_softc *)arg1;
825 	type = arg2;
826 
827 	switch (type) {
828 	case 0:
829 		result = softc->ctrl->cmd_head;
830 		error = sysctl_handle_int(oidp, &result, 0,
831 		    req);
832 		break;
833 	case 1:
834 		result = softc->ctrl->cmd_tail;
835 		error = sysctl_handle_int(oidp, &result, 0,
836 		    req);
837 		break;
838 	case 2:
839 		result = softc->ctrl->evt_head;
840 		error = sysctl_handle_int(oidp, &result, 0,
841 		    req);
842 		break;
843 	case 3:
844 		result = softc->ctrl->evt_tail;
845 		error = sysctl_handle_int(oidp, &result, 0,
846 		    req);
847 		break;
848 
849 	default:
850 		device_printf(softc->dev, "Unknown sysctl:%d\n", type);
851 	}
852 
853 	return (error);
854 }
855 
856 static void
857 amdvi_add_sysctl(struct amdvi_softc *softc)
858 {
859 	struct sysctl_oid_list *child;
860 	struct sysctl_ctx_list *ctx;
861 	device_t dev;
862 
863 	dev = softc->dev;
864 	ctx = device_get_sysctl_ctx(dev);
865 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
866 
867 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
868 	    &softc->event_intr_cnt, "Event interrupt count");
869 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
870 	    &softc->total_cmd, "Command submitted count");
871 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
872 	    &softc->pci_rid, 0, "IOMMU RID");
873 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
874 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 0,
875 	    amdvi_handle_sysctl, "IU", "Command head");
876 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
877 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 1,
878 	    amdvi_handle_sysctl, "IU", "Command tail");
879 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
880 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 2,
881 	    amdvi_handle_sysctl, "IU", "Command head");
882 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
883 	    CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 3,
884 	    amdvi_handle_sysctl, "IU", "Command tail");
885 }
886 
887 int
888 amdvi_setup_hw(struct amdvi_softc *softc)
889 {
890 	device_t dev;
891 	int status;
892 
893 	dev = softc->dev;
894 
895 	amdvi_hw_enable_iotlb(softc);
896 
897 	amdvi_print_dev_cap(softc);
898 
899 	if ((status = amdvi_print_pci_cap(dev)) != 0) {
900 		device_printf(dev, "PCI capability.\n");
901 		return (status);
902 	}
903 	if ((status = amdvi_init_cmd(softc)) != 0) {
904 		device_printf(dev, "Couldn't configure command buffer.\n");
905 		return (status);
906 	}
907 	if ((status = amdvi_init_event(softc)) != 0) {
908 		device_printf(dev, "Couldn't configure event buffer.\n");
909 		return (status);
910 	}
911 	if ((status = amdvi_init_dte(softc)) != 0) {
912 		device_printf(dev, "Couldn't configure device table.\n");
913 		return (status);
914 	}
915 	if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
916 		return (status);
917 	}
918 	amdvi_add_sysctl(softc);
919 	return (0);
920 }
921 
922 int
923 amdvi_teardown_hw(struct amdvi_softc *softc)
924 {
925 	device_t dev;
926 
927 	dev = softc->dev;
928 
929 	/*
930 	 * Called after disable, h/w is stopped by now, free all the resources.
931 	 */
932 	amdvi_free_evt_intr_res(dev);
933 
934 	if (softc->cmd)
935 		free(softc->cmd, M_AMDVI);
936 
937 	if (softc->event)
938 		free(softc->event, M_AMDVI);
939 
940 	return (0);
941 }
942 
943 /*********** bhyve interfaces *********************/
944 static int
945 amdvi_init(void)
946 {
947 	if (!ivhd_count) {
948 		return (EIO);
949 	}
950 	if (!amdvi_enable_user && ivhd_count) {
951 		printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
952 		    "use hw.vmm.amdvi.enable=1 to enable pass-through.\n",
953 		    ivhd_count);
954 		return (EINVAL);
955 	}
956 	return (0);
957 }
958 
959 static void
960 amdvi_cleanup(void)
961 {
962 	/* Nothing. */
963 }
964 
965 static uint16_t
966 amdvi_domainId(void)
967 {
968 
969 	/*
970 	 * If we hit maximum domain limit, rollover leaving host
971 	 * domain(0).
972 	 * XXX: make sure that this domain is not used.
973 	 */
974 	if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
975 		amdvi_dom_id = 1;
976 
977 	return ((uint16_t)amdvi_dom_id++);
978 }
979 
980 static void
981 amdvi_do_inv_domain(uint16_t domain_id, bool create)
982 {
983 	struct amdvi_softc *softc;
984 	int i;
985 
986 	for (i = 0; i < ivhd_count; i++) {
987 		softc = device_get_softc(ivhd_devs[i]);
988 		KASSERT(softc, ("softc is NULL"));
989 		/*
990 		 * If not present pages are cached, invalidate page after
991 		 * creating domain.
992 		 */
993 #if 0
994 		if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
995 			continue;
996 #endif
997 		amdvi_inv_domain(softc, domain_id);
998 		amdvi_wait(softc);
999 	}
1000 }
1001 
1002 static void *
1003 amdvi_create_domain(vm_paddr_t maxaddr)
1004 {
1005 	struct amdvi_domain *dom;
1006 
1007 	dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
1008 	dom->id = amdvi_domainId();
1009 	//dom->maxaddr = maxaddr;
1010 #ifdef AMDVI_DEBUG_CMD
1011 	printf("Created domain #%d\n", dom->id);
1012 #endif
1013 	/*
1014 	 * Host domain(#0) don't create translation table.
1015 	 */
1016 	if (dom->id || amdvi_host_ptp)
1017 		dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1018 
1019 	dom->ptp_level = amdvi_ptp_level;
1020 
1021 	amdvi_do_inv_domain(dom->id, true);
1022 	SLIST_INSERT_HEAD(&dom_head, dom, next);
1023 
1024 	return (dom);
1025 }
1026 
1027 static void
1028 amdvi_free_ptp(uint64_t *ptp, int level)
1029 {
1030 	int i;
1031 
1032 	if (level < 1)
1033 		return;
1034 
1035 	for (i = 0; i < NPTEPG ; i++) {
1036 		if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
1037 			continue;
1038 		/* XXX: Add super-page or PTE mapping > 4KB. */
1039 #ifdef notyet
1040 		/* Super-page mapping. */
1041 		if (AMDVI_PD_SUPER(ptp[i]))
1042 			continue;
1043 #endif
1044 
1045 		amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
1046 		    & AMDVI_PT_MASK), level - 1);
1047 	}
1048 
1049 	free(ptp, M_AMDVI);
1050 }
1051 
1052 static void
1053 amdvi_destroy_domain(void *arg)
1054 {
1055 	struct amdvi_domain *domain;
1056 
1057 	domain = (struct amdvi_domain *)arg;
1058 	KASSERT(domain, ("domain is NULL"));
1059 #ifdef AMDVI_DEBUG_CMD
1060 	printf("Destroying domain %d\n", domain->id);
1061 #endif
1062 	if (domain->ptp)
1063 		amdvi_free_ptp(domain->ptp, domain->ptp_level);
1064 
1065 	amdvi_do_inv_domain(domain->id, false);
1066 	SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
1067 	free(domain, M_AMDVI);
1068 }
1069 
1070 static uint64_t
1071 amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
1072     vm_paddr_t hpa, uint64_t pg_size, bool create)
1073 {
1074 	uint64_t *page, pa;
1075 	int shift, index;
1076 	const int PT_SHIFT = 9;
1077 	const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1;	/* Based on PT_SHIFT */
1078 
1079 	if (!pg_size)
1080 		return (0);
1081 
1082 	if (hpa & (pg_size - 1)) {
1083 		printf("HPA is not size aligned.\n");
1084 		return (0);
1085 	}
1086 	if (gpa & (pg_size - 1)) {
1087 		printf("HPA is not size aligned.\n");
1088 		return (0);
1089 	}
1090 	shift = PML4SHIFT;
1091 	while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
1092 		index = (gpa >> shift) & PT_INDEX_MASK;
1093 
1094 		if ((pt[index] == 0) && create) {
1095 			page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1096 			pa = vtophys(page);
1097 			pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
1098 			    ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
1099 		}
1100 #ifdef AMDVI_DEBUG_PTE
1101 		if ((gpa % 0x1000000) == 0)
1102 			printf("[level%d, shift = %d]PTE:0x%lx\n",
1103 			    level, shift, pt[index]);
1104 #endif
1105 #define PTE2PA(x)	((uint64_t)(x) & AMDVI_PT_MASK)
1106 		pa = PTE2PA(pt[index]);
1107 		pt = (uint64_t *)PHYS_TO_DMAP(pa);
1108 		shift -= PT_SHIFT;
1109 		level--;
1110 	}
1111 
1112 	/* Leaf entry. */
1113 	index = (gpa >> shift) & PT_INDEX_MASK;
1114 
1115 	if (create) {
1116 		pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
1117 	} else
1118 		pt[index] = 0;
1119 
1120 #ifdef AMDVI_DEBUG_PTE
1121 	if ((gpa % 0x1000000) == 0)
1122 		printf("[Last level%d, shift = %d]PTE:0x%lx\n",
1123 		    level, shift, pt[index]);
1124 #endif
1125 	return (1ULL << shift);
1126 }
1127 
1128 static uint64_t
1129 amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
1130     vm_paddr_t hpa, uint64_t size, bool create)
1131 {
1132 	uint64_t mapped, *ptp, len;
1133 	int level;
1134 
1135 	KASSERT(domain, ("domain is NULL"));
1136 	level = domain->ptp_level;
1137 	KASSERT(level, ("Page table level is 0"));
1138 
1139 	ptp = domain->ptp;
1140 	KASSERT(ptp, ("PTP is NULL"));
1141 	mapped = 0;
1142 	while (mapped < size) {
1143 		len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
1144 		    PAGE_SIZE, create);
1145 		if (!len) {
1146 			printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
1147 			    hpa, gpa);
1148 			return (0);
1149 		}
1150 		mapped += len;
1151 	}
1152 
1153 	return (mapped);
1154 }
1155 
1156 static uint64_t
1157 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
1158     uint64_t len)
1159 {
1160 	struct amdvi_domain *domain;
1161 
1162 	domain = (struct amdvi_domain *)arg;
1163 
1164 	if (domain->id && !domain->ptp) {
1165 		printf("ptp is NULL");
1166 		return (-1);
1167 	}
1168 
1169 	/*
1170 	 * If host domain is created w/o page table, skip IOMMU page
1171 	 * table set-up.
1172 	 */
1173 	if (domain->ptp)
1174 		return (amdvi_update_mapping(domain, gpa, hpa, len, true));
1175 	else
1176 		return (len);
1177 }
1178 
1179 static uint64_t
1180 amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
1181 {
1182 	struct amdvi_domain *domain;
1183 
1184 	domain = (struct amdvi_domain *)arg;
1185 	/*
1186 	 * If host domain is created w/o page table, skip IOMMU page
1187 	 * table set-up.
1188 	 */
1189 	if (domain->ptp)
1190 		return (amdvi_update_mapping(domain, gpa, 0, len, false));
1191 	return
1192 	    (len);
1193 }
1194 
1195 static struct amdvi_softc *
1196 amdvi_find_iommu(uint16_t devid)
1197 {
1198 	struct amdvi_softc *softc;
1199 	int i, j;
1200 
1201 	for (i = 0; i < ivhd_count; i++) {
1202 		softc = device_get_softc(ivhd_devs[i]);
1203 		for (j = 0; j < softc->dev_cfg_cnt; j++)
1204 			if ((devid >= softc->dev_cfg[j].start_id) &&
1205 			    (devid <= softc->dev_cfg[j].end_id))
1206 				return (softc);
1207 	}
1208 
1209 	return (NULL);
1210 }
1211 
1212 /*
1213  * Set-up device table entry.
1214  * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
1215  * be set concurrently, e.g. read and write bits.
1216  */
1217 static void
1218 amdvi_set_dte(struct amdvi_domain *domain, struct amdvi_softc *softc,
1219     uint16_t devid, bool enable)
1220 {
1221 	struct amdvi_dte* temp;
1222 
1223 	KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid));
1224 	KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
1225 
1226 	temp = &amdvi_dte[devid];
1227 
1228 #ifdef AMDVI_ATS_ENABLE
1229 	/* If IOMMU and device support IOTLB, enable it. */
1230 	if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
1231 		temp->iotlb_enable = 1;
1232 #endif
1233 
1234 	/* Avoid duplicate I/O faults. */
1235 	temp->sup_second_io_fault = 1;
1236 	temp->sup_all_io_fault = amdvi_disable_io_fault;
1237 
1238 	temp->dt_valid = 1;
1239 	temp->domain_id = domain->id;
1240 
1241 	if (enable) {
1242 		if (domain->ptp) {
1243 			temp->pt_base = vtophys(domain->ptp) >> 12;
1244 			temp->pt_level = amdvi_ptp_level;
1245 		}
1246 		/*
1247 		 * XXX: Page table valid[TV] bit must be set even if host domain
1248 		 * page tables are not enabled.
1249 		 */
1250 		temp->pt_valid = 1;
1251 		temp->read_allow = 1;
1252 		temp->write_allow = 1;
1253 	}
1254 }
1255 
1256 static void
1257 amdvi_inv_device(struct amdvi_softc *softc, uint16_t devid)
1258 {
1259 	KASSERT(softc, ("softc is NULL"));
1260 
1261 	amdvi_cmd_inv_dte(softc, devid);
1262 #ifdef AMDVI_ATS_ENABLE
1263 	if (amdvi_dev_support_iotlb(softc, devid))
1264 		amdvi_cmd_inv_iotlb(softc, devid);
1265 #endif
1266 	amdvi_wait(softc);
1267 }
1268 
1269 static void
1270 amdvi_add_device(void *arg, uint16_t devid)
1271 {
1272 	struct amdvi_domain *domain;
1273 	struct amdvi_softc *softc;
1274 
1275 	domain = (struct amdvi_domain *)arg;
1276 	KASSERT(domain != NULL, ("domain is NULL"));
1277 #ifdef AMDVI_DEBUG_CMD
1278 	printf("Assigning device(%d.%d.%d) to domain:%d\n",
1279 	    RID2PCI_STR(devid), domain->id);
1280 #endif
1281 	softc = amdvi_find_iommu(devid);
1282 	if (softc == NULL)
1283 		return;
1284 	amdvi_set_dte(domain, softc, devid, true);
1285 	amdvi_inv_device(softc, devid);
1286 }
1287 
1288 static void
1289 amdvi_remove_device(void *arg, uint16_t devid)
1290 {
1291 	struct amdvi_domain *domain;
1292 	struct amdvi_softc *softc;
1293 
1294 	domain = (struct amdvi_domain *)arg;
1295 #ifdef AMDVI_DEBUG_CMD
1296 	printf("Remove device(0x%x) from domain:%d\n",
1297 	       devid, domain->id);
1298 #endif
1299 	softc = amdvi_find_iommu(devid);
1300 	if (softc == NULL)
1301 		return;
1302 	amdvi_set_dte(domain, softc, devid, false);
1303 	amdvi_inv_device(softc, devid);
1304 }
1305 
1306 static void
1307 amdvi_enable(void)
1308 {
1309 	struct amdvi_ctrl *ctrl;
1310 	struct amdvi_softc *softc;
1311 	uint64_t val;
1312 	int i;
1313 
1314 	for (i = 0; i < ivhd_count; i++) {
1315 		softc = device_get_softc(ivhd_devs[i]);
1316 		KASSERT(softc, ("softc is NULL\n"));
1317 		ctrl = softc->ctrl;
1318 		KASSERT(ctrl, ("ctrl is NULL\n"));
1319 
1320 		val = (	AMDVI_CTRL_EN		|
1321 			AMDVI_CTRL_CMD		|
1322 			AMDVI_CTRL_ELOG		|
1323 			AMDVI_CTRL_ELOGINT	|
1324 			AMDVI_CTRL_INV_TO_1S);
1325 
1326 		if (softc->ivhd_flag & IVHD_FLAG_COH)
1327 			val |= AMDVI_CTRL_COH;
1328 		if (softc->ivhd_flag & IVHD_FLAG_HTT)
1329 			val |= AMDVI_CTRL_HTT;
1330 		if (softc->ivhd_flag & IVHD_FLAG_RPPW)
1331 			val |= AMDVI_CTRL_RPPW;
1332 		if (softc->ivhd_flag & IVHD_FLAG_PPW)
1333 			val |= AMDVI_CTRL_PPW;
1334 		if (softc->ivhd_flag & IVHD_FLAG_ISOC)
1335 			val |= AMDVI_CTRL_ISOC;
1336 
1337 		ctrl->control = val;
1338 	}
1339 }
1340 
1341 static void
1342 amdvi_disable(void)
1343 {
1344 	struct amdvi_ctrl *ctrl;
1345 	struct amdvi_softc *softc;
1346 	int i;
1347 
1348 	for (i = 0; i < ivhd_count; i++) {
1349 		softc = device_get_softc(ivhd_devs[i]);
1350 		KASSERT(softc, ("softc is NULL\n"));
1351 		ctrl = softc->ctrl;
1352 		KASSERT(ctrl, ("ctrl is NULL\n"));
1353 
1354 		ctrl->control = 0;
1355 	}
1356 }
1357 
1358 static void
1359 amdvi_invalidate_tlb(void *arg)
1360 {
1361 	struct amdvi_domain *domain;
1362 
1363 	domain = (struct amdvi_domain *)arg;
1364 	KASSERT(domain, ("domain is NULL"));
1365 	amdvi_do_inv_domain(domain->id, false);
1366 }
1367 
1368 const struct iommu_ops iommu_ops_amd = {
1369 	.init = amdvi_init,
1370 	.cleanup = amdvi_cleanup,
1371 	.enable = amdvi_enable,
1372 	.disable = amdvi_disable,
1373 	.create_domain = amdvi_create_domain,
1374 	.destroy_domain = amdvi_destroy_domain,
1375 	.create_mapping = amdvi_create_mapping,
1376 	.remove_mapping = amdvi_remove_mapping,
1377 	.add_device = amdvi_add_device,
1378 	.remove_device = amdvi_remove_device,
1379 	.invalidate_tlb = amdvi_invalidate_tlb
1380 };
1381