xref: /freebsd/sys/amd64/vmm/amd/amdvi_hw.c (revision f0cfa1b168014f56c02b83e5f28412cc5f78d117)
1 /*-
2  * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/malloc.h>
36 #include <sys/pcpu.h>
37 #include <sys/rman.h>
38 #include <sys/smp.h>
39 #include <sys/sysctl.h>
40 
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <machine/resource.h>
48 #include <machine/vmm.h>
49 #include <machine/pmap.h>
50 #include <machine/vmparam.h>
51 #include <machine/pci_cfgreg.h>
52 
53 #include "pcib_if.h"
54 
55 #include "io/iommu.h"
56 #include "amdvi_priv.h"
57 
58 SYSCTL_DECL(_hw_vmm);
59 SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL);
60 
61 #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
62 #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
63 
64 /* Print RID or device ID in PCI string format. */
65 #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
66 
67 static void amdvi_dump_cmds(struct amdvi_softc *softc);
68 static void amdvi_print_dev_cap(struct amdvi_softc *softc);
69 
70 MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
71 
72 extern device_t *ivhd_devs;
73 
74 extern int ivhd_count;
75 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
76     0, NULL);
77 
78 static int amdvi_enable_user = 0;
79 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
80     &amdvi_enable_user, 0, NULL);
81 TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
82 
83 #ifdef AMDVI_ATS_ENABLE
84 /* XXX: ATS is not tested. */
85 static int amdvi_enable_iotlb = 1;
86 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
87     &amdvi_enable_iotlb, 0, NULL);
88 TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
89 #endif
90 
91 static int amdvi_host_ptp = 1;	/* Use page tables for host. */
92 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
93     &amdvi_host_ptp, 0, NULL);
94 TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
95 
96 /* Page table level used <= supported by h/w[v1=7]. */
97 static int amdvi_ptp_level = 4;
98 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
99     &amdvi_ptp_level, 0, NULL);
100 TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
101 
102 /* Disable fault event reporting. */
103 static int amdvi_disable_io_fault = 0;
104 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
105     &amdvi_disable_io_fault, 0, NULL);
106 TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
107 
108 static uint32_t amdvi_dom_id = 0;	/* 0 is reserved for host. */
109 SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
110     &amdvi_dom_id, 0, NULL);
111 /*
112  * Device table entry.
113  * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
114  *	= 256 * 2 * PAGE_SIZE.
115  */
116 static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
117 CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
118 CTASSERT(sizeof(amdvi_dte) == 0x200000);
119 
120 static SLIST_HEAD (, amdvi_domain) dom_head;
121 
122 static inline void
123 amdvi_pci_write(struct amdvi_softc *softc, int off, uint32_t data)
124 {
125 
126 	pci_cfgregwrite(PCI_RID2BUS(softc->pci_rid),
127 	    PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
128 	    off, data, 4);
129 }
130 
131 static inline uint32_t
132 amdvi_pci_read(struct amdvi_softc *softc, int off)
133 {
134 
135 	return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
136 	    PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
137 	    off, 4));
138 }
139 
140 static int
141 amdvi_find_pci_cap(struct amdvi_softc *softc, uint8_t capability, int *off)
142 {
143 	uint32_t read;
144 	uint8_t ptr;
145 
146 	read = amdvi_pci_read(softc, PCIR_COMMAND);
147 	if (((read >> 16) & PCIM_STATUS_CAPPRESENT) == 0)
148 		return (ENXIO);
149 
150 	/* Read the starting of capability pointer. */
151 	read = amdvi_pci_read(softc, PCIR_CAP_PTR);
152 	ptr = read & 0xFF;
153 
154 	while (ptr != 0) {
155 		read = amdvi_pci_read(softc, ptr);
156 		if ((read & 0xFF) == capability) {
157 			*off = ptr;
158 			return (0);
159 		}
160 		ptr = (read >> 8) & 0xFF;
161 	}
162 
163 	return (ENOENT);
164 }
165 
166 #ifdef AMDVI_ATS_ENABLE
167 /* XXX: Should be in pci.c */
168 /*
169  * Check if device has ATS capability and its enabled.
170  * If ATS is absent or disabled, return (-1), otherwise ATS
171  * queue length.
172  */
173 static int
174 amdvi_find_ats_qlen(uint16_t devid)
175 {
176 	device_t dev;
177 	uint32_t off, cap;
178 	int qlen = -1;
179 
180 	dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
181 			   PCI_RID2FUNC(devid));
182 
183 	if (!dev) {
184 		return (-1);
185 	}
186 #define PCIM_ATS_EN	BIT(31)
187 
188 	if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
189 		cap = pci_read_config(dev, off + 4, 4);
190 		qlen = (cap & 0x1F);
191 		qlen = qlen ? qlen : 32;
192 		printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
193 		       RID2PCI_STR(devid),
194 		       (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
195 		       qlen);
196 		qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
197 	}
198 
199 	return (qlen);
200 }
201 
202 /*
203  * Check if an endpoint device support device IOTLB or ATS.
204  */
205 static inline bool
206 amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
207 {
208 	struct ivhd_dev_cfg *cfg;
209 	int qlen, i;
210 	bool pci_ats, ivhd_ats;
211 
212 	qlen = amdvi_find_ats_qlen(devid);
213 	if (qlen < 0)
214 		return (false);
215 
216 	KASSERT(softc, ("softc is NULL"));
217 	cfg = softc->dev_cfg;
218 
219 	ivhd_ats = false;
220 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
221 		if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
222 			ivhd_ats = cfg->enable_ats;
223 			break;
224 		}
225 		cfg++;
226 	}
227 
228 	pci_ats = (qlen < 0) ? false : true;
229 	if (pci_ats != ivhd_ats)
230 		device_printf(softc->dev,
231 		    "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
232 		    "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
233 
234 	/* Ignore IVRS setting and respect PCI setting. */
235 	return (pci_ats);
236 }
237 #endif
238 
239 /* Enable IOTLB support for IOMMU if its supported. */
240 static inline void
241 amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
242 {
243 #ifndef AMDVI_ATS_ENABLE
244 	softc->iotlb = false;
245 #else
246 	bool supported;
247 
248 	supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
249 
250 	if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
251 		if (!supported)
252 			device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
253 
254 		if (supported && !amdvi_enable_iotlb) {
255 			device_printf(softc->dev, "IOTLB disabled by user.\n");
256 			supported = false;
257 		}
258 	} else
259 		supported = false;
260 
261 	softc->iotlb = supported;
262 
263 #endif
264 }
265 
266 static int
267 amdvi_init_cmd(struct amdvi_softc *softc)
268 {
269 	struct amdvi_ctrl *ctrl = softc->ctrl;
270 
271 	ctrl->cmd.len = 8;	/* Use 256 command buffer entries. */
272 	softc->cmd_max = 1 << ctrl->cmd.len;
273 
274 	softc->cmd = malloc(sizeof(struct amdvi_cmd) *
275 	    softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
276 
277 	if ((uintptr_t)softc->cmd & PAGE_MASK)
278 		panic("AMDVi: Command buffer not aligned on page boundary.");
279 
280 	ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
281 	/*
282 	 * XXX: Reset the h/w pointers in case IOMMU is restarting,
283 	 * h/w doesn't clear these pointers based on empirical data.
284 	 */
285 	ctrl->cmd_tail = 0;
286 	ctrl->cmd_head = 0;
287 
288 	return (0);
289 }
290 
291 /*
292  * Note: Update tail pointer after we have written the command since tail
293  * pointer update cause h/w to execute new commands, see section 3.3
294  * of AMD IOMMU spec ver 2.0.
295  */
296 /* Get the command tail pointer w/o updating it. */
297 static struct amdvi_cmd *
298 amdvi_get_cmd_tail(struct amdvi_softc *softc)
299 {
300 	struct amdvi_ctrl *ctrl;
301 	struct amdvi_cmd *tail;
302 
303 	KASSERT(softc, ("softc is NULL"));
304 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
305 
306 	ctrl = softc->ctrl;
307 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
308 
309 	tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
310 	    ctrl->cmd_tail);
311 
312 	return (tail);
313 }
314 
315 /*
316  * Update the command tail pointer which will start command execution.
317  */
318 static void
319 amdvi_update_cmd_tail(struct amdvi_softc *softc)
320 {
321 	struct amdvi_ctrl *ctrl;
322 	int size;
323 
324 	size = sizeof(struct amdvi_cmd);
325 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
326 
327 	ctrl = softc->ctrl;
328 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
329 
330 	ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
331 	softc->total_cmd++;
332 
333 #ifdef AMDVI_DEBUG_CMD
334 	device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
335 	    ctrl->cmd_tail,
336 	    ctrl->cmd_head);
337 #endif
338 
339 }
340 
341 /*
342  * Various commands supported by IOMMU.
343  */
344 
345 /* Completion wait command. */
346 static void
347 amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
348 {
349 	struct amdvi_cmd *cmd;
350 	uint64_t pa;
351 
352 	cmd = amdvi_get_cmd_tail(softc);
353 	KASSERT(cmd != NULL, ("Cmd is NULL"));
354 
355 	pa = vtophys(&softc->cmp_data);
356 	cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
357 	cmd->word0 = (pa & 0xFFFFFFF8) |
358 	    (AMDVI_CMP_WAIT_STORE);
359 	//(AMDVI_CMP_WAIT_FLUSH | AMDVI_CMP_WAIT_STORE);
360 	cmd->word1 = (pa >> 32) & 0xFFFFF;
361 	cmd->addr = data;
362 
363 	amdvi_update_cmd_tail(softc);
364 }
365 
366 /* Invalidate device table entry. */
367 static void
368 amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
369 {
370 	struct amdvi_cmd *cmd;
371 
372 	cmd = amdvi_get_cmd_tail(softc);
373 	KASSERT(cmd != NULL, ("Cmd is NULL"));
374 	cmd->opcode = AMDVI_INVD_DTE_OPCODE;
375 	cmd->word0 = devid;
376 	amdvi_update_cmd_tail(softc);
377 #ifdef AMDVI_DEBUG_CMD
378 	device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
379 #endif
380 }
381 
382 /* Invalidate IOMMU page, use for invalidation of domain. */
383 static void
384 amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
385 			  uint64_t addr, bool guest_nested,
386 			  bool pde, bool page)
387 {
388 	struct amdvi_cmd *cmd;
389 
390 	cmd = amdvi_get_cmd_tail(softc);
391 	KASSERT(cmd != NULL, ("Cmd is NULL"));
392 
393 
394 	cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
395 	cmd->word1 = domain_id;
396 	/*
397 	 * Invalidate all addresses for this domain.
398 	 */
399 	cmd->addr = addr;
400 	cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
401 	cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
402 
403 	amdvi_update_cmd_tail(softc);
404 }
405 
406 #ifdef AMDVI_ATS_ENABLE
407 /* Invalidate device IOTLB. */
408 static void
409 amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
410 {
411 	struct amdvi_cmd *cmd;
412 	int qlen;
413 
414 	if (!softc->iotlb)
415 		return;
416 
417 	qlen = amdvi_find_ats_qlen(devid);
418 	if (qlen < 0) {
419 		panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
420 		      qlen, RID2PCI_STR(devid));
421 	}
422 	cmd = amdvi_get_cmd_tail(softc);
423 	KASSERT(cmd != NULL, ("Cmd is NULL"));
424 
425 #ifdef AMDVI_DEBUG_CMD
426 	device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
427 		      " Qlen:%d\n", devid, qlen);
428 #endif
429 	cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
430 	cmd->word0 = devid;
431 	cmd->word1 = qlen;
432 	cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
433 		AMDVI_INVD_IOTLB_S;
434 	amdvi_update_cmd_tail(softc);
435 }
436 #endif
437 
438 #ifdef notyet				/* For Interrupt Remap. */
439 static void
440 amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
441 		       uint16_t devid)
442 {
443 	struct amdvi_cmd *cmd;
444 
445 	cmd = amdvi_get_cmd_tail(softc);
446 	KASSERT(cmd != NULL, ("Cmd is NULL"));
447 	cmd->opcode = AMDVI_INVD_INTR_OPCODE;
448 	cmd->word0 = devid;
449 	amdvi_update_cmd_tail(softc);
450 #ifdef AMDVI_DEBUG_CMD
451 	device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
452 #endif
453 }
454 #endif
455 
456 /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
457 static void
458 amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
459 {
460 	struct amdvi_cmd *cmd;
461 
462 	cmd = amdvi_get_cmd_tail(softc);
463 	KASSERT(cmd != NULL, ("Cmd is NULL"));
464 
465 	/*
466 	 * See section 3.3.3 of IOMMU spec rev 2.0, software note
467 	 * for invalidating domain.
468 	 */
469 	amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
470 				false, true, true);
471 
472 #ifdef AMDVI_DEBUG_CMD
473 	device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
474 
475 #endif
476 }
477 
478 static	bool
479 amdvi_cmp_wait(struct amdvi_softc *softc)
480 {
481 	struct amdvi_ctrl *ctrl;
482 	const uint64_t VERIFY = 0xA5A5;
483 	volatile uint64_t *read;
484 	int i;
485 	bool status;
486 
487 	ctrl = softc->ctrl;
488 	read = &softc->cmp_data;
489 	*read = 0;
490 	amdvi_cmd_cmp(softc, VERIFY);
491 	/* Wait for h/w to update completion data. */
492 	for (i = 0; i < 100 && (*read != VERIFY); i++) {
493 		DELAY(1000);		/* 1 ms */
494 	}
495 	status = (VERIFY == softc->cmp_data) ? true : false;
496 
497 #ifdef AMDVI_DEBUG_CMD
498 	if (status)
499 		device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
500 			      "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
501 			      ctrl->cmd_head, loop);
502 #endif
503 	return (status);
504 }
505 
506 static void
507 amdvi_wait(struct amdvi_softc *softc)
508 {
509 	struct amdvi_ctrl *ctrl;
510 	int i;
511 
512 	KASSERT(softc, ("softc is NULL"));
513 
514 	ctrl = softc->ctrl;
515 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
516 	/* Don't wait if h/w is not enabled. */
517 	if ((ctrl->control & AMDVI_CTRL_EN) == 0)
518 		return;
519 
520 	for (i = 0; i < 10; i++) {
521 		if (amdvi_cmp_wait(softc))
522 			return;
523 	}
524 
525 	device_printf(softc->dev, "Error: completion failed"
526 		      " tail:0x%x, head:0x%x.\n",
527 		      ctrl->cmd_tail, ctrl->cmd_head);
528 	amdvi_dump_cmds(softc);
529 }
530 
531 static void
532 amdvi_dump_cmds(struct amdvi_softc *softc)
533 {
534 	struct amdvi_ctrl *ctrl;
535 	struct amdvi_cmd *cmd;
536 	int off, i;
537 
538 	ctrl = softc->ctrl;
539 	device_printf(softc->dev, "Dump all the commands:\n");
540 	/*
541 	 * If h/w is stuck in completion, it is the previous command,
542 	 * start dumping from previous command onward.
543 	 */
544 	off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
545 	    softc->cmd_max);
546 	for (i = 0; off != ctrl->cmd_tail &&
547 	    i < softc->cmd_max; i++) {
548 		cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
549 		printf("  [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
550 		    " 0x%x 0x%lx\n", i, off, cmd->opcode,
551 		    cmd->word0, cmd->word1, cmd->addr);
552 		off = (off + sizeof(struct amdvi_cmd)) %
553 		    (softc->cmd_max * sizeof(struct amdvi_cmd));
554 	}
555 }
556 
557 static int
558 amdvi_init_event(struct amdvi_softc *softc)
559 {
560 	struct amdvi_ctrl *ctrl;
561 
562 	ctrl = softc->ctrl;
563 	ctrl->event.len = 8;
564 	softc->event_max = 1 << ctrl->event.len;
565 	softc->event = malloc(sizeof(struct amdvi_event) *
566 	    softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
567 	if ((uintptr_t)softc->event & PAGE_MASK) {
568 		device_printf(softc->dev, "Event buffer not aligned on page.");
569 		return (false);
570 	}
571 	ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
572 
573 	/* Reset the pointers. */
574 	ctrl->evt_head = 0;
575 	ctrl->evt_tail = 0;
576 
577 	return (0);
578 }
579 
580 static inline void
581 amdvi_decode_evt_flag(uint16_t flag)
582 {
583 
584 	flag &= AMDVI_EVENT_FLAG_MASK;
585 	printf(" 0x%b]\n", flag,
586 		"\020"
587 		"\001GN"
588 		"\002NX"
589 		"\003US"
590 		"\004I"
591 		"\005PR"
592 		"\006RW"
593 		"\007PE"
594 		"\010RZ"
595 		"\011TR"
596 		);
597 }
598 
599 /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
600 static inline void
601 amdvi_decode_evt_flag_type(uint8_t type)
602 {
603 
604 	switch (AMDVI_EVENT_FLAG_TYPE(type)) {
605 	case 0:
606 		printf("RSVD\n");
607 		break;
608 	case 1:
609 		printf("Master Abort\n");
610 		break;
611 	case 2:
612 		printf("Target Abort\n");
613 		break;
614 	case 3:
615 		printf("Data Err\n");
616 		break;
617 	default:
618 		break;
619 	}
620 }
621 
622 static void
623 amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
624     uint16_t flag)
625 {
626 
627 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
628 	    " Addr:0x%lx",
629 	    devid, domid, addr);
630 	amdvi_decode_evt_flag(flag);
631 }
632 
633 static void
634 amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
635     uint16_t flag)
636 {
637 
638 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
639 	    " Addr:0x%lx",
640 	    devid, domid, addr);
641 	amdvi_decode_evt_flag(flag);
642 }
643 
644 static void
645 amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
646     uint64_t addr, uint16_t flag)
647 {
648 
649 	printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
650 	    " Addr:0x%lx", devid, domid, addr);
651 	amdvi_decode_evt_flag(flag);
652 	amdvi_decode_evt_flag_type(flag);
653 }
654 
655 static void
656 amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
657     uint16_t flag)
658 {
659 
660 	printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
661 	    " Addr:0x%lx", devid, domid, addr);
662 	amdvi_decode_evt_flag(flag);
663 	amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
664 }
665 
666 static void
667 amdvi_decode_evt(struct amdvi_event *evt)
668 {
669 	struct amdvi_cmd *cmd;
670 
671 	switch (evt->opcode) {
672 	case AMDVI_EVENT_INVALID_DTE:
673 		amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
674 		    evt->addr, evt->flag);
675 		break;
676 
677 	case AMDVI_EVENT_PFAULT:
678 		amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
679 		    evt->addr, evt->flag);
680 		break;
681 
682 	case AMDVI_EVENT_DTE_HW_ERROR:
683 		amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
684 		    evt->addr, evt->flag);
685 		break;
686 
687 	case AMDVI_EVENT_PAGE_HW_ERROR:
688 		amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
689 		    evt->addr, evt->flag);
690 		break;
691 
692 	case AMDVI_EVENT_ILLEGAL_CMD:
693 		/* FALL THROUGH */
694 	case AMDVI_EVENT_CMD_HW_ERROR:
695 		printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
696 		    "ILLEGAL CMD" : "CMD HW ERR");
697 		cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
698 		printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
699 		    cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
700 		break;
701 
702 	case AMDVI_EVENT_IOTLB_TIMEOUT:
703 		printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n",
704 		    evt->devid, evt->addr);
705 		break;
706 
707 	case AMDVI_EVENT_INVALID_DTE_REQ:
708 		printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n",
709 		    evt->devid, evt->addr, evt->flag >> 9,
710 		    (evt->flag >> 8) & 1);
711 		break;
712 
713 	case AMDVI_EVENT_INVALID_PPR_REQ:
714 	case AMDVI_EVENT_COUNTER_ZERO:
715 		printf("AMD-Vi: v2 events.\n");
716 		break;
717 
718 	default:
719 		printf("Unsupported AMD-Vi event:%d\n", evt->opcode);
720 	}
721 }
722 
723 static void
724 amdvi_print_events(struct amdvi_softc *softc)
725 {
726 	struct amdvi_ctrl *ctrl;
727 	struct amdvi_event *event;
728 	int i, size;
729 
730 	ctrl = softc->ctrl;
731 	size = sizeof(struct amdvi_event);
732 	for (i = 0; i < softc->event_max; i++) {
733 		event = &softc->event[ctrl->evt_head / size];
734 		if (!event->opcode)
735 			break;
736 		device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
737 		    i, ctrl->evt_head, ctrl->evt_tail);
738 		amdvi_decode_evt(event);
739 		ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
740 		    softc->event_max);
741 	}
742 }
743 
744 static int
745 amdvi_init_dte(struct amdvi_softc *softc)
746 {
747 	struct amdvi_ctrl *ctrl;
748 
749 	ctrl = softc->ctrl;
750 	ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
751 	ctrl->dte.size = 0x1FF;		/* 2MB device table. */
752 
753 	return (0);
754 }
755 
756 /*
757  * Not all capabilities of IOMMU are available in ACPI IVHD flag
758  * or EFR entry, read directly from device.
759  */
760 static int
761 amdvi_print_pci_cap(device_t dev)
762 {
763 	struct amdvi_softc *softc;
764 	uint32_t off, cap;
765 
766 
767 	softc = device_get_softc(dev);
768 	off = softc->cap_off;
769 
770 	/*
771 	 * Section 3.7.1 of IOMMU sepc rev 2.0.
772 	 * Read capability from device.
773 	 */
774 	cap = amdvi_pci_read(softc, off);
775 
776 	/* Make sure capability type[18:16] is 3. */
777 	KASSERT((((cap >> 16) & 0x7) == 0x3),
778 	    ("Not a IOMMU capability 0x%x@0x%x", cap, off));
779 
780 	softc->pci_cap = cap >> 24;
781 	device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
782 	    cap, off, softc->pci_cap,
783 	    "\020\001IOTLB\002HT\003NPCache\004EFR");
784 
785 	/* IOMMU spec Rev 2.0, section 3.7.2.1 */
786 	softc->pci_efr = softc->ctrl->ex_feature;
787 	if (softc->pci_efr) {
788 		device_printf(softc->dev, "PCI extended Feature:%b\n",
789 		    (int)softc->pci_efr,
790 		    "\020\001PreFSup\002PPRSup\003XTSup\004NXSup\006IASup"
791 		    "\007GASup\008HESup\009PCSup");
792 		device_printf(softc->dev,
793 		    "PCI HATS = %d GATS = %d GLXSup = %d, max PASID: 0x%x ",
794 		    (int)((softc->pci_efr >> 10) & 0x3),
795 		    (int)((softc->pci_efr >> 12) & 0x3),
796 		    (int)((softc->pci_efr >> 14) & 0x3),
797 		    (int)((softc->pci_efr >> 32) & 0x1F) + 1);
798 	}
799 
800 	return (0);
801 }
802 
803 static void
804 amdvi_event_intr(void *arg)
805 {
806 	struct amdvi_softc *softc;
807 	struct amdvi_ctrl *ctrl;
808 
809 	softc = (struct amdvi_softc *)arg;
810 	ctrl = softc->ctrl;
811 	device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
812 	    " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
813 	    ctrl->status, ctrl->evt_head, ctrl->evt_tail);
814 	printf("  [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
815 	    softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
816 
817 	amdvi_print_events(softc);
818 }
819 
820 static void
821 amdvi_free_evt_intr_res(device_t dev)
822 {
823 
824 	struct amdvi_softc *softc;
825 
826 	softc = device_get_softc(dev);
827 	if (softc->event_tag != NULL) {
828 		bus_teardown_intr(dev, softc->event_res, softc->event_tag);
829 	}
830 	if (softc->event_res != NULL) {
831 		bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
832 		    softc->event_res);
833 	}
834 	bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
835 	PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
836 	    dev, 1, &softc->event_irq);
837 }
838 
839 static	bool
840 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
841 {
842 	device_t dev, pcib;
843 	uint64_t msi_addr;
844 	uint32_t msi_data, temp;
845 	int err, msi_off;
846 
847 	dev = softc->dev;
848 	pcib = device_get_parent(device_get_parent(dev));
849 	softc->event_irq = -1;
850 	softc->event_rid = 0;
851 	/*
852 	 * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
853 	 * interrupt. XXX: Enable MSI/X support.
854 	 */
855 
856 	err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
857 	if (err) {
858 		device_printf(dev,
859 		    "Couldn't find event MSI IRQ resource.\n");
860 		return (ENOENT);
861 	}
862 	err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
863 	    softc->event_irq, 1);
864 	if (err) {
865 		device_printf(dev, "Couldn't set event MSI resource.\n");
866 		return (ENXIO);
867 	}
868 	softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
869 	    &softc->event_rid, RF_ACTIVE);
870 	if (!softc->event_res) {
871 		device_printf(dev,
872 		    "Unable to allocate event INTR resource.\n");
873 		return (ENOMEM);
874 	}
875 
876 	if (bus_setup_intr(dev, softc->event_res,
877 	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
878 	    softc, &softc->event_tag)) {
879 		device_printf(dev, "Fail to setup event intr\n");
880 		bus_release_resource(softc->dev, SYS_RES_IRQ,
881 		    softc->event_rid, softc->event_res);
882 		softc->event_res = NULL;
883 		return (ENXIO);
884 	}
885 
886 	bus_describe_intr(dev, softc->event_res, softc->event_tag,
887 	    "fault");
888 
889 	err = amdvi_find_pci_cap(softc, PCIY_MSI, &msi_off);
890 	if (err) {
891 		device_printf(dev, "Couldn't find MSI capability, err = %d.\n",
892 			      err);
893 		return (err);
894 	}
895 
896 	err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
897 	    &msi_data);
898 	if (err) {
899 		device_printf(dev,
900 		    "Event interrupt config failed, err=%d.\n",
901 		    err);
902 		amdvi_free_evt_intr_res(softc->dev);
903 		return (err);
904 	}
905 
906 	/* Configure MSI */
907 	amdvi_pci_write(softc, msi_off + PCIR_MSI_ADDR, msi_addr);
908 	amdvi_pci_write(softc, msi_off + PCIR_MSI_ADDR_HIGH,
909 	    msi_addr >> 32);
910 	amdvi_pci_write(softc, msi_off + PCIR_MSI_DATA_64BIT, msi_data);
911 
912 	/* Now enable MSI interrupt. */
913 	temp = amdvi_pci_read(softc, msi_off);
914 	temp |= (PCIM_MSICTRL_MSI_ENABLE << 16);	/* MSI enable. */
915 	amdvi_pci_write(softc, msi_off, temp);
916 
917 	return (0);
918 }
919 
920 
921 static void
922 amdvi_print_dev_cap(struct amdvi_softc *softc)
923 {
924 	struct ivhd_dev_cfg *cfg;
925 	int i;
926 
927 	cfg = softc->dev_cfg;
928 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
929 		device_printf(softc->dev, "device [0x%x - 0x%x]"
930 		    "config:%b%s\n", cfg->start_id, cfg->end_id,
931 		    cfg->data,
932 		    "\020\001INIT\002ExtInt\003NMI"
933 		    "\007LINT0\008LINT1",
934 		    cfg->enable_ats ? "ATS enabled" : "");
935 		cfg++;
936 	}
937 }
938 
939 static int
940 amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
941 {
942 	struct amdvi_softc *softc;
943 	int result, type, error = 0;
944 
945 	softc = (struct amdvi_softc *)arg1;
946 	type = arg2;
947 
948 	switch (type) {
949 	case 0:
950 		result = softc->ctrl->cmd_head;
951 		error = sysctl_handle_int(oidp, &result, 0,
952 		    req);
953 		break;
954 	case 1:
955 		result = softc->ctrl->cmd_tail;
956 		error = sysctl_handle_int(oidp, &result, 0,
957 		    req);
958 		break;
959 	case 2:
960 		result = softc->ctrl->evt_head;
961 		error = sysctl_handle_int(oidp, &result, 0,
962 		    req);
963 		break;
964 	case 3:
965 		result = softc->ctrl->evt_tail;
966 		error = sysctl_handle_int(oidp, &result, 0,
967 		    req);
968 		break;
969 
970 	default:
971 		device_printf(softc->dev, "Unknown sysctl:%d\n", type);
972 	}
973 
974 	return (error);
975 }
976 
977 static void
978 amdvi_add_sysctl(struct amdvi_softc *softc)
979 {
980 	struct sysctl_oid_list *child;
981 	struct sysctl_ctx_list *ctx;
982 	device_t dev;
983 
984 	dev = softc->dev;
985 	ctx = device_get_sysctl_ctx(dev);
986 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
987 
988 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
989 	    &softc->event_intr_cnt, "Event interrupt count");
990 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
991 	    &softc->total_cmd, "Command submitted count");
992 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
993 	    &softc->pci_rid, 0, "IOMMU RID");
994 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
995 	    &softc->start_dev_rid, 0, "Start of device under this IOMMU");
996 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
997 	    &softc->end_dev_rid, 0, "End of device under this IOMMU");
998 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
999 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 0,
1000 	    amdvi_handle_sysctl, "IU", "Command head");
1001 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
1002 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 1,
1003 	    amdvi_handle_sysctl, "IU", "Command tail");
1004 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
1005 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 2,
1006 	    amdvi_handle_sysctl, "IU", "Command head");
1007 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
1008 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 3,
1009 	    amdvi_handle_sysctl, "IU", "Command tail");
1010 }
1011 
1012 int
1013 amdvi_setup_hw(struct amdvi_softc *softc)
1014 {
1015 	device_t dev;
1016 	int status;
1017 
1018 	dev = softc->dev;
1019 
1020 	amdvi_hw_enable_iotlb(softc);
1021 
1022 	amdvi_print_dev_cap(softc);
1023 
1024 	if ((status = amdvi_print_pci_cap(dev)) != 0) {
1025 		device_printf(dev, "PCI capability.\n");
1026 		return (status);
1027 	}
1028 	if ((status = amdvi_init_cmd(softc)) != 0) {
1029 		device_printf(dev, "Couldn't configure command buffer.\n");
1030 		return (status);
1031 	}
1032 	if ((status = amdvi_init_event(softc)) != 0) {
1033 		device_printf(dev, "Couldn't configure event buffer.\n");
1034 		return (status);
1035 	}
1036 	if ((status = amdvi_init_dte(softc)) != 0) {
1037 		device_printf(dev, "Couldn't configure device table.\n");
1038 		return (status);
1039 	}
1040 	if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
1041 		return (status);
1042 	}
1043 	amdvi_add_sysctl(softc);
1044 	return (0);
1045 }
1046 
1047 int
1048 amdvi_teardown_hw(struct amdvi_softc *softc)
1049 {
1050 	device_t dev;
1051 
1052 	dev = softc->dev;
1053 
1054 	/*
1055 	 * Called after disable, h/w is stopped by now, free all the resources.
1056 	 */
1057 	amdvi_free_evt_intr_res(dev);
1058 
1059 	if (softc->cmd)
1060 		free(softc->cmd, M_AMDVI);
1061 
1062 	if (softc->event)
1063 		free(softc->event, M_AMDVI);
1064 
1065 	return (0);
1066 }
1067 
1068 /*********** bhyve interfaces *********************/
1069 static int
1070 amdvi_init(void)
1071 {
1072 	if (!ivhd_count) {
1073 		return (EIO);
1074 	}
1075 	if (!amdvi_enable_user && ivhd_count) {
1076 		printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
1077 		    	"use hw.vmm.amdvi_enable=1 to enable pass-through.\n",
1078 		    ivhd_count);
1079 		return (EINVAL);
1080 	}
1081 	return (0);
1082 }
1083 
1084 static void
1085 amdvi_cleanup(void)
1086 {
1087 	/* Nothing. */
1088 }
1089 
1090 static uint16_t
1091 amdvi_domainId(void)
1092 {
1093 
1094 	/*
1095 	 * If we hit maximum domain limit, rollover leaving host
1096 	 * domain(0).
1097 	 * XXX: make sure that this domain is not used.
1098 	 */
1099 	if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
1100 		amdvi_dom_id = 1;
1101 
1102 	return ((uint16_t)amdvi_dom_id++);
1103 }
1104 
1105 static void
1106 amdvi_do_inv_domain(uint16_t domain_id, bool create)
1107 {
1108 	struct amdvi_softc *softc;
1109 	int i;
1110 
1111 	for (i = 0; i < ivhd_count; i++) {
1112 		softc = device_get_softc(ivhd_devs[i]);
1113 		KASSERT(softc, ("softc is NULL"));
1114 		/*
1115 		 * If not present pages are cached, invalidate page after
1116 		 * creating domain.
1117 		 */
1118 #if 0
1119 		if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
1120 			continue;
1121 #endif
1122 		amdvi_inv_domain(softc, domain_id);
1123 		amdvi_wait(softc);
1124 	}
1125 }
1126 
1127 static void *
1128 amdvi_create_domain(vm_paddr_t maxaddr)
1129 {
1130 	struct amdvi_domain *dom;
1131 
1132 	dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
1133 	dom->id = amdvi_domainId();
1134 	//dom->maxaddr = maxaddr;
1135 #ifdef AMDVI_DEBUG_CMD
1136 	printf("Created domain #%d\n", dom->id);
1137 #endif
1138 	/*
1139 	 * Host domain(#0) don't create translation table.
1140 	 */
1141 	if (dom->id || amdvi_host_ptp)
1142 		dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1143 
1144 	dom->ptp_level = amdvi_ptp_level;
1145 
1146 	amdvi_do_inv_domain(dom->id, true);
1147 	SLIST_INSERT_HEAD(&dom_head, dom, next);
1148 
1149 	return (dom);
1150 }
1151 
1152 static void
1153 amdvi_free_ptp(uint64_t *ptp, int level)
1154 {
1155 	int i;
1156 
1157 	if (level < 1)
1158 		return;
1159 
1160 	for (i = 0; i < NPTEPG ; i++) {
1161 		if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
1162 			continue;
1163 		/* XXX: Add super-page or PTE mapping > 4KB. */
1164 #ifdef notyet
1165 		/* Super-page mapping. */
1166 		if (AMDVI_PD_SUPER(ptp[i]))
1167 			continue;
1168 #endif
1169 
1170 		amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
1171 		    & AMDVI_PT_MASK), level - 1);
1172 
1173 	}
1174 
1175 	free(ptp, M_AMDVI);
1176 }
1177 
1178 static void
1179 amdvi_destroy_domain(void *arg)
1180 {
1181 	struct amdvi_domain *domain;
1182 
1183 	domain = (struct amdvi_domain *)arg;
1184 	KASSERT(domain, ("domain is NULL"));
1185 #ifdef AMDVI_DEBUG_CMD
1186 	printf("Destroying domain %d\n", domain->id);
1187 #endif
1188 	if (domain->ptp)
1189 		amdvi_free_ptp(domain->ptp, domain->ptp_level);
1190 
1191 	amdvi_do_inv_domain(domain->id, false);
1192 	SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
1193 	free(domain, M_AMDVI);
1194 }
1195 
1196 static uint64_t
1197 amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
1198     vm_paddr_t hpa, uint64_t pg_size, bool create)
1199 {
1200 	uint64_t *page, pa;
1201 	int shift, index;
1202 	const int PT_SHIFT = 9;
1203 	const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1;	/* Based on PT_SHIFT */
1204 
1205 	if (!pg_size)
1206 		return (0);
1207 
1208 	if (hpa & (pg_size - 1)) {
1209 		printf("HPA is not size aligned.\n");
1210 		return (0);
1211 	}
1212 	if (gpa & (pg_size - 1)) {
1213 		printf("HPA is not size aligned.\n");
1214 		return (0);
1215 	}
1216 	shift = PML4SHIFT;
1217 	while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
1218 		index = (gpa >> shift) & PT_INDEX_MASK;
1219 
1220 		if ((pt[index] == 0) && create) {
1221 			page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1222 			pa = vtophys(page);
1223 			pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
1224 			    ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
1225 		}
1226 #ifdef AMDVI_DEBUG_PTE
1227 		if ((gpa % 0x1000000) == 0)
1228 			printf("[level%d, shift = %d]PTE:0x%lx\n",
1229 			    level, shift, pt[index]);
1230 #endif
1231 #define PTE2PA(x)	((uint64_t)(x) & AMDVI_PT_MASK)
1232 		pa = PTE2PA(pt[index]);
1233 		pt = (uint64_t *)PHYS_TO_DMAP(pa);
1234 		shift -= PT_SHIFT;
1235 		level--;
1236 	}
1237 
1238 	/* Leaf entry. */
1239 	index = (gpa >> shift) & PT_INDEX_MASK;
1240 
1241 	if (create) {
1242 		pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
1243 	} else
1244 		pt[index] = 0;
1245 
1246 #ifdef AMDVI_DEBUG_PTE
1247 	if ((gpa % 0x1000000) == 0)
1248 		printf("[Last level%d, shift = %d]PTE:0x%lx\n",
1249 		    level, shift, pt[index]);
1250 #endif
1251 	return (1ULL << shift);
1252 }
1253 
1254 static uint64_t
1255 amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
1256     vm_paddr_t hpa, uint64_t size, bool create)
1257 {
1258 	uint64_t mapped, *ptp, len;
1259 	int level;
1260 
1261 	KASSERT(domain, ("domain is NULL"));
1262 	level = domain->ptp_level;
1263 	KASSERT(level, ("Page table level is 0"));
1264 
1265 	ptp = domain->ptp;
1266 	KASSERT(ptp, ("PTP is NULL"));
1267 	mapped = 0;
1268 	while (mapped < size) {
1269 		len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
1270 		    PAGE_SIZE, create);
1271 		if (!len) {
1272 			printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
1273 			    hpa, gpa);
1274 			return (0);
1275 		}
1276 		mapped += len;
1277 	}
1278 
1279 	return (mapped);
1280 }
1281 
1282 static uint64_t
1283 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
1284     uint64_t len)
1285 {
1286 	struct amdvi_domain *domain;
1287 
1288 	domain = (struct amdvi_domain *)arg;
1289 
1290 	if (domain->id && !domain->ptp) {
1291 		printf("ptp is NULL");
1292 		return (-1);
1293 	}
1294 
1295 	/*
1296 	 * If host domain is created w/o page table, skip IOMMU page
1297 	 * table set-up.
1298 	 */
1299 	if (domain->ptp)
1300 		return (amdvi_update_mapping(domain, gpa, hpa, len, true));
1301 	else
1302 		return (len);
1303 }
1304 
1305 static uint64_t
1306 amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
1307 {
1308 	struct amdvi_domain *domain;
1309 
1310 	domain = (struct amdvi_domain *)arg;
1311 	/*
1312 	 * If host domain is created w/o page table, skip IOMMU page
1313 	 * table set-up.
1314 	 */
1315 	if (domain->ptp)
1316 		return (amdvi_update_mapping(domain, gpa, 0, len, false));
1317 	return
1318 	    (len);
1319 }
1320 
1321 static struct amdvi_softc *
1322 amdvi_find_iommu(uint16_t devid)
1323 {
1324 	struct amdvi_softc *softc;
1325 	int i;
1326 
1327 	for (i = 0; i < ivhd_count; i++) {
1328 		softc = device_get_softc(ivhd_devs[i]);
1329 		if ((devid >= softc->start_dev_rid) &&
1330 		    (devid <= softc->end_dev_rid))
1331 			return (softc);
1332 	}
1333 
1334 	/*
1335 	 * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
1336 	 */
1337 	printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
1338 	    RID2PCI_STR(devid));
1339 
1340 	return (device_get_softc(ivhd_devs[0]));
1341 }
1342 
1343 /*
1344  * Set-up device table entry.
1345  * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
1346  * be set concurrently, e.g. read and write bits.
1347  */
1348 static void
1349 amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
1350 {
1351 	struct amdvi_softc *softc;
1352 	struct amdvi_dte temp;
1353 
1354 	softc = amdvi_find_iommu(devid);
1355 	KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
1356 
1357 	memset(&temp, 0, sizeof(struct amdvi_dte));
1358 
1359 #ifdef AMDVI_ATS_ENABLE
1360 	/* If IOMMU and device support IOTLB, enable it. */
1361 	if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
1362 		temp.iotlb_enable = 1;
1363 #endif
1364 
1365 	/* Avoid duplicate I/O faults. */
1366 	temp.sup_second_io_fault = 1;
1367 	temp.sup_all_io_fault = amdvi_disable_io_fault;
1368 
1369 	temp.dt_valid = 1;
1370 	temp.domain_id = domain->id;
1371 
1372 	if (enable) {
1373 		if (domain->ptp) {
1374 			temp.pt_base = vtophys(domain->ptp) >> 12;
1375 			temp.pt_level = amdvi_ptp_level;
1376 		}
1377 		/*
1378 		 * XXX: Page table valid[TV] bit must be set even if host domain
1379 		 * page tables are not enabled.
1380 		 */
1381 		temp.pt_valid = 1;
1382 		temp.read_allow = 1;
1383 		temp.write_allow = 1;
1384 	}
1385 	amdvi_dte[devid] = temp;
1386 }
1387 
1388 static void
1389 amdvi_inv_device(uint16_t devid)
1390 {
1391 	struct amdvi_softc *softc;
1392 
1393 	softc = amdvi_find_iommu(devid);
1394 	KASSERT(softc, ("softc is NULL"));
1395 
1396 	amdvi_cmd_inv_dte(softc, devid);
1397 #ifdef AMDVI_ATS_ENABLE
1398 	if (amdvi_dev_support_iotlb(softc, devid))
1399 		amdvi_cmd_inv_iotlb(softc, devid);
1400 #endif
1401 	amdvi_wait(softc);
1402 }
1403 
1404 static void
1405 amdvi_add_device(void *arg, uint16_t devid)
1406 {
1407 	struct amdvi_domain *domain;
1408 
1409 	domain = (struct amdvi_domain *)arg;
1410 	KASSERT(domain != NULL, ("domain is NULL"));
1411 #ifdef AMDVI_DEBUG_CMD
1412 	printf("Assigning device(%d.%d.%d) to domain:%d\n",
1413 	    RID2PCI_STR(devid), domain->id);
1414 #endif
1415 	amdvi_set_dte(domain, devid, true);
1416 	amdvi_inv_device(devid);
1417 }
1418 
1419 static void
1420 amdvi_remove_device(void *arg, uint16_t devid)
1421 {
1422 	struct amdvi_domain *domain;
1423 
1424 	domain = (struct amdvi_domain *)arg;
1425 #ifdef AMDVI_DEBUG_CMD
1426 	printf("Remove device(0x%x) from domain:%d\n",
1427 	       devid, domain->id);
1428 #endif
1429 	amdvi_set_dte(domain, devid, false);
1430 	amdvi_inv_device(devid);
1431 }
1432 
1433 static void
1434 amdvi_enable(void)
1435 {
1436 	struct amdvi_ctrl *ctrl;
1437 	struct amdvi_softc *softc;
1438 	uint64_t val;
1439 	int i;
1440 
1441 	for (i = 0; i < ivhd_count; i++) {
1442 		softc = device_get_softc(ivhd_devs[i]);
1443 		KASSERT(softc, ("softc is NULL\n"));
1444 		ctrl = softc->ctrl;
1445 		KASSERT(ctrl, ("ctrl is NULL\n"));
1446 
1447 		val = (	AMDVI_CTRL_EN 		|
1448 			AMDVI_CTRL_CMD 		|
1449 		    	AMDVI_CTRL_ELOG 	|
1450 		    	AMDVI_CTRL_ELOGINT 	|
1451 		    	AMDVI_CTRL_INV_TO_1S);
1452 
1453 		if (softc->ivhd_flag & IVHD_FLAG_COH)
1454 			val |= AMDVI_CTRL_COH;
1455 		if (softc->ivhd_flag & IVHD_FLAG_HTT)
1456 			val |= AMDVI_CTRL_HTT;
1457 		if (softc->ivhd_flag & IVHD_FLAG_RPPW)
1458 			val |= AMDVI_CTRL_RPPW;
1459 		if (softc->ivhd_flag & IVHD_FLAG_PPW)
1460 			val |= AMDVI_CTRL_PPW;
1461 		if (softc->ivhd_flag & IVHD_FLAG_ISOC)
1462 			val |= AMDVI_CTRL_ISOC;
1463 
1464 		ctrl->control = val;
1465 	}
1466 }
1467 
1468 static void
1469 amdvi_disable(void)
1470 {
1471 	struct amdvi_ctrl *ctrl;
1472 	struct amdvi_softc *softc;
1473 	int i;
1474 
1475 	for (i = 0; i < ivhd_count; i++) {
1476 		softc = device_get_softc(ivhd_devs[i]);
1477 		KASSERT(softc, ("softc is NULL\n"));
1478 		ctrl = softc->ctrl;
1479 		KASSERT(ctrl, ("ctrl is NULL\n"));
1480 
1481 		ctrl->control = 0;
1482 	}
1483 }
1484 
1485 static void
1486 amdvi_inv_tlb(void *arg)
1487 {
1488 	struct amdvi_domain *domain;
1489 
1490 	domain = (struct amdvi_domain *)arg;
1491 	KASSERT(domain, ("domain is NULL"));
1492 	amdvi_do_inv_domain(domain->id, false);
1493 }
1494 
1495 struct iommu_ops iommu_ops_amd = {
1496 	amdvi_init,
1497 	amdvi_cleanup,
1498 	amdvi_enable,
1499 	amdvi_disable,
1500 	amdvi_create_domain,
1501 	amdvi_destroy_domain,
1502 	amdvi_create_mapping,
1503 	amdvi_destroy_mapping,
1504 	amdvi_add_device,
1505 	amdvi_remove_device,
1506 	amdvi_inv_tlb
1507 };
1508