xref: /freebsd/sys/amd64/vmm/amd/amdvi_hw.c (revision 6e778a7efdc0e804471750157f6bacd1ef7d1580)
1 /*-
2  * Copyright (c) 2016, Anish Gupta (anish@freebsd.org)
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/kernel.h>
34 #include <sys/module.h>
35 #include <sys/malloc.h>
36 #include <sys/pcpu.h>
37 #include <sys/rman.h>
38 #include <sys/smp.h>
39 #include <sys/sysctl.h>
40 
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 
44 #include <dev/pci/pcivar.h>
45 #include <dev/pci/pcireg.h>
46 
47 #include <machine/resource.h>
48 #include <machine/vmm.h>
49 #include <machine/pmap.h>
50 #include <machine/vmparam.h>
51 #include <machine/pci_cfgreg.h>
52 
53 #include "pcib_if.h"
54 
55 #include "io/iommu.h"
56 #include "amdvi_priv.h"
57 
58 SYSCTL_DECL(_hw_vmm);
59 SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL);
60 
61 #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s)))
62 #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s)))
63 
64 /* Print RID or device ID in PCI string format. */
65 #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d)
66 
67 static void amdvi_dump_cmds(struct amdvi_softc *softc);
68 static void amdvi_print_dev_cap(struct amdvi_softc *softc);
69 
70 MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi");
71 
72 extern device_t *ivhd_devs;
73 
74 extern int ivhd_count;
75 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count,
76     0, NULL);
77 
78 static int amdvi_enable_user = 0;
79 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN,
80     &amdvi_enable_user, 0, NULL);
81 TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user);
82 
83 #ifdef AMDVI_ATS_ENABLE
84 /* XXX: ATS is not tested. */
85 static int amdvi_enable_iotlb = 1;
86 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN,
87     &amdvi_enable_iotlb, 0, NULL);
88 TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb);
89 #endif
90 
91 static int amdvi_host_ptp = 1;	/* Use page tables for host. */
92 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN,
93     &amdvi_host_ptp, 0, NULL);
94 TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp);
95 
96 /* Page table level used <= supported by h/w[v1=7]. */
97 static int amdvi_ptp_level = 4;
98 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN,
99     &amdvi_ptp_level, 0, NULL);
100 TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level);
101 
102 /* Disable fault event reporting. */
103 static int amdvi_disable_io_fault = 0;
104 SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN,
105     &amdvi_disable_io_fault, 0, NULL);
106 TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault);
107 
108 static uint32_t amdvi_dom_id = 0;	/* 0 is reserved for host. */
109 SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD,
110     &amdvi_dom_id, 0, NULL);
111 /*
112  * Device table entry.
113  * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes).
114  *	= 256 * 2 * PAGE_SIZE.
115  */
116 static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE);
117 CTASSERT(PCI_NUM_DEV_MAX == 0x10000);
118 CTASSERT(sizeof(amdvi_dte) == 0x200000);
119 
120 static SLIST_HEAD (, amdvi_domain) dom_head;
121 
122 static inline uint32_t
123 amdvi_pci_read(struct amdvi_softc *softc, int off)
124 {
125 
126 	return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid),
127 	    PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid),
128 	    off, 4));
129 }
130 
131 #ifdef AMDVI_ATS_ENABLE
132 /* XXX: Should be in pci.c */
133 /*
134  * Check if device has ATS capability and its enabled.
135  * If ATS is absent or disabled, return (-1), otherwise ATS
136  * queue length.
137  */
138 static int
139 amdvi_find_ats_qlen(uint16_t devid)
140 {
141 	device_t dev;
142 	uint32_t off, cap;
143 	int qlen = -1;
144 
145 	dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid),
146 			   PCI_RID2FUNC(devid));
147 
148 	if (!dev) {
149 		return (-1);
150 	}
151 #define PCIM_ATS_EN	BIT(31)
152 
153 	if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) {
154 		cap = pci_read_config(dev, off + 4, 4);
155 		qlen = (cap & 0x1F);
156 		qlen = qlen ? qlen : 32;
157 		printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n",
158 		       RID2PCI_STR(devid),
159 		       (cap & PCIM_ATS_EN) ? "enabled" : "Disabled",
160 		       qlen);
161 		qlen = (cap & PCIM_ATS_EN) ? qlen : -1;
162 	}
163 
164 	return (qlen);
165 }
166 
167 /*
168  * Check if an endpoint device support device IOTLB or ATS.
169  */
170 static inline bool
171 amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid)
172 {
173 	struct ivhd_dev_cfg *cfg;
174 	int qlen, i;
175 	bool pci_ats, ivhd_ats;
176 
177 	qlen = amdvi_find_ats_qlen(devid);
178 	if (qlen < 0)
179 		return (false);
180 
181 	KASSERT(softc, ("softc is NULL"));
182 	cfg = softc->dev_cfg;
183 
184 	ivhd_ats = false;
185 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
186 		if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) {
187 			ivhd_ats = cfg->enable_ats;
188 			break;
189 		}
190 		cfg++;
191 	}
192 
193 	pci_ats = (qlen < 0) ? false : true;
194 	if (pci_ats != ivhd_ats)
195 		device_printf(softc->dev,
196 		    "BIOS bug: mismatch in ATS setting for %d.%d.%d,"
197 		    "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen);
198 
199 	/* Ignore IVRS setting and respect PCI setting. */
200 	return (pci_ats);
201 }
202 #endif
203 
204 /* Enable IOTLB support for IOMMU if its supported. */
205 static inline void
206 amdvi_hw_enable_iotlb(struct amdvi_softc *softc)
207 {
208 #ifndef AMDVI_ATS_ENABLE
209 	softc->iotlb = false;
210 #else
211 	bool supported;
212 
213 	supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false;
214 
215 	if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) {
216 		if (!supported)
217 			device_printf(softc->dev, "IOTLB disabled by BIOS.\n");
218 
219 		if (supported && !amdvi_enable_iotlb) {
220 			device_printf(softc->dev, "IOTLB disabled by user.\n");
221 			supported = false;
222 		}
223 	} else
224 		supported = false;
225 
226 	softc->iotlb = supported;
227 
228 #endif
229 }
230 
231 static int
232 amdvi_init_cmd(struct amdvi_softc *softc)
233 {
234 	struct amdvi_ctrl *ctrl = softc->ctrl;
235 
236 	ctrl->cmd.len = 8;	/* Use 256 command buffer entries. */
237 	softc->cmd_max = 1 << ctrl->cmd.len;
238 
239 	softc->cmd = malloc(sizeof(struct amdvi_cmd) *
240 	    softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO);
241 
242 	if ((uintptr_t)softc->cmd & PAGE_MASK)
243 		panic("AMDVi: Command buffer not aligned on page boundary.");
244 
245 	ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE;
246 	/*
247 	 * XXX: Reset the h/w pointers in case IOMMU is restarting,
248 	 * h/w doesn't clear these pointers based on empirical data.
249 	 */
250 	ctrl->cmd_tail = 0;
251 	ctrl->cmd_head = 0;
252 
253 	return (0);
254 }
255 
256 /*
257  * Note: Update tail pointer after we have written the command since tail
258  * pointer update cause h/w to execute new commands, see section 3.3
259  * of AMD IOMMU spec ver 2.0.
260  */
261 /* Get the command tail pointer w/o updating it. */
262 static struct amdvi_cmd *
263 amdvi_get_cmd_tail(struct amdvi_softc *softc)
264 {
265 	struct amdvi_ctrl *ctrl;
266 	struct amdvi_cmd *tail;
267 
268 	KASSERT(softc, ("softc is NULL"));
269 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
270 
271 	ctrl = softc->ctrl;
272 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
273 
274 	tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd +
275 	    ctrl->cmd_tail);
276 
277 	return (tail);
278 }
279 
280 /*
281  * Update the command tail pointer which will start command execution.
282  */
283 static void
284 amdvi_update_cmd_tail(struct amdvi_softc *softc)
285 {
286 	struct amdvi_ctrl *ctrl;
287 	int size;
288 
289 	size = sizeof(struct amdvi_cmd);
290 	KASSERT(softc->cmd != NULL, ("cmd is NULL"));
291 
292 	ctrl = softc->ctrl;
293 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
294 
295 	ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max);
296 	softc->total_cmd++;
297 
298 #ifdef AMDVI_DEBUG_CMD
299 	device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n",
300 	    ctrl->cmd_tail,
301 	    ctrl->cmd_head);
302 #endif
303 
304 }
305 
306 /*
307  * Various commands supported by IOMMU.
308  */
309 
310 /* Completion wait command. */
311 static void
312 amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data)
313 {
314 	struct amdvi_cmd *cmd;
315 	uint64_t pa;
316 
317 	cmd = amdvi_get_cmd_tail(softc);
318 	KASSERT(cmd != NULL, ("Cmd is NULL"));
319 
320 	pa = vtophys(&softc->cmp_data);
321 	cmd->opcode = AMDVI_CMP_WAIT_OPCODE;
322 	cmd->word0 = (pa & 0xFFFFFFF8) |
323 	    (AMDVI_CMP_WAIT_STORE);
324 	//(AMDVI_CMP_WAIT_FLUSH | AMDVI_CMP_WAIT_STORE);
325 	cmd->word1 = (pa >> 32) & 0xFFFFF;
326 	cmd->addr = data;
327 
328 	amdvi_update_cmd_tail(softc);
329 }
330 
331 /* Invalidate device table entry. */
332 static void
333 amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid)
334 {
335 	struct amdvi_cmd *cmd;
336 
337 	cmd = amdvi_get_cmd_tail(softc);
338 	KASSERT(cmd != NULL, ("Cmd is NULL"));
339 	cmd->opcode = AMDVI_INVD_DTE_OPCODE;
340 	cmd->word0 = devid;
341 	amdvi_update_cmd_tail(softc);
342 #ifdef AMDVI_DEBUG_CMD
343 	device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid);
344 #endif
345 }
346 
347 /* Invalidate IOMMU page, use for invalidation of domain. */
348 static void
349 amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id,
350 			  uint64_t addr, bool guest_nested,
351 			  bool pde, bool page)
352 {
353 	struct amdvi_cmd *cmd;
354 
355 	cmd = amdvi_get_cmd_tail(softc);
356 	KASSERT(cmd != NULL, ("Cmd is NULL"));
357 
358 
359 	cmd->opcode = AMDVI_INVD_PAGE_OPCODE;
360 	cmd->word1 = domain_id;
361 	/*
362 	 * Invalidate all addresses for this domain.
363 	 */
364 	cmd->addr = addr;
365 	cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0;
366 	cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0;
367 
368 	amdvi_update_cmd_tail(softc);
369 }
370 
371 #ifdef AMDVI_ATS_ENABLE
372 /* Invalidate device IOTLB. */
373 static void
374 amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid)
375 {
376 	struct amdvi_cmd *cmd;
377 	int qlen;
378 
379 	if (!softc->iotlb)
380 		return;
381 
382 	qlen = amdvi_find_ats_qlen(devid);
383 	if (qlen < 0) {
384 		panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n",
385 		      qlen, RID2PCI_STR(devid));
386 	}
387 	cmd = amdvi_get_cmd_tail(softc);
388 	KASSERT(cmd != NULL, ("Cmd is NULL"));
389 
390 #ifdef AMDVI_DEBUG_CMD
391 	device_printf(softc->dev, "Invalidate IOTLB devID 0x%x"
392 		      " Qlen:%d\n", devid, qlen);
393 #endif
394 	cmd->opcode = AMDVI_INVD_IOTLB_OPCODE;
395 	cmd->word0 = devid;
396 	cmd->word1 = qlen;
397 	cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR |
398 		AMDVI_INVD_IOTLB_S;
399 	amdvi_update_cmd_tail(softc);
400 }
401 #endif
402 
403 #ifdef notyet				/* For Interrupt Remap. */
404 static void
405 amdvi_cmd_inv_intr_map(struct amdvi_softc *softc,
406 		       uint16_t devid)
407 {
408 	struct amdvi_cmd *cmd;
409 
410 	cmd = amdvi_get_cmd_tail(softc);
411 	KASSERT(cmd != NULL, ("Cmd is NULL"));
412 	cmd->opcode = AMDVI_INVD_INTR_OPCODE;
413 	cmd->word0 = devid;
414 	amdvi_update_cmd_tail(softc);
415 #ifdef AMDVI_DEBUG_CMD
416 	device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid);
417 #endif
418 }
419 #endif
420 
421 /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */
422 static void
423 amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id)
424 {
425 	struct amdvi_cmd *cmd;
426 
427 	cmd = amdvi_get_cmd_tail(softc);
428 	KASSERT(cmd != NULL, ("Cmd is NULL"));
429 
430 	/*
431 	 * See section 3.3.3 of IOMMU spec rev 2.0, software note
432 	 * for invalidating domain.
433 	 */
434 	amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR,
435 				false, true, true);
436 
437 #ifdef AMDVI_DEBUG_CMD
438 	device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id);
439 
440 #endif
441 }
442 
443 static	bool
444 amdvi_cmp_wait(struct amdvi_softc *softc)
445 {
446 	struct amdvi_ctrl *ctrl;
447 	const uint64_t VERIFY = 0xA5A5;
448 	volatile uint64_t *read;
449 	int i;
450 	bool status;
451 
452 	ctrl = softc->ctrl;
453 	read = &softc->cmp_data;
454 	*read = 0;
455 	amdvi_cmd_cmp(softc, VERIFY);
456 	/* Wait for h/w to update completion data. */
457 	for (i = 0; i < 100 && (*read != VERIFY); i++) {
458 		DELAY(1000);		/* 1 ms */
459 	}
460 	status = (VERIFY == softc->cmp_data) ? true : false;
461 
462 #ifdef AMDVI_DEBUG_CMD
463 	if (status)
464 		device_printf(softc->dev, "CMD completion DONE Tail:0x%x, "
465 			      "Head:0x%x, loop:%d.\n", ctrl->cmd_tail,
466 			      ctrl->cmd_head, loop);
467 #endif
468 	return (status);
469 }
470 
471 static void
472 amdvi_wait(struct amdvi_softc *softc)
473 {
474 	struct amdvi_ctrl *ctrl;
475 	int i;
476 
477 	KASSERT(softc, ("softc is NULL"));
478 
479 	ctrl = softc->ctrl;
480 	KASSERT(ctrl != NULL, ("ctrl is NULL"));
481 	/* Don't wait if h/w is not enabled. */
482 	if ((ctrl->control & AMDVI_CTRL_EN) == 0)
483 		return;
484 
485 	for (i = 0; i < 10; i++) {
486 		if (amdvi_cmp_wait(softc))
487 			return;
488 	}
489 
490 	device_printf(softc->dev, "Error: completion failed"
491 		      " tail:0x%x, head:0x%x.\n",
492 		      ctrl->cmd_tail, ctrl->cmd_head);
493 	amdvi_dump_cmds(softc);
494 }
495 
496 static void
497 amdvi_dump_cmds(struct amdvi_softc *softc)
498 {
499 	struct amdvi_ctrl *ctrl;
500 	struct amdvi_cmd *cmd;
501 	int off, i;
502 
503 	ctrl = softc->ctrl;
504 	device_printf(softc->dev, "Dump all the commands:\n");
505 	/*
506 	 * If h/w is stuck in completion, it is the previous command,
507 	 * start dumping from previous command onward.
508 	 */
509 	off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd),
510 	    softc->cmd_max);
511 	for (i = 0; off != ctrl->cmd_tail &&
512 	    i < softc->cmd_max; i++) {
513 		cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off);
514 		printf("  [CMD%d, off:0x%x] opcode= 0x%x 0x%x"
515 		    " 0x%x 0x%lx\n", i, off, cmd->opcode,
516 		    cmd->word0, cmd->word1, cmd->addr);
517 		off = (off + sizeof(struct amdvi_cmd)) %
518 		    (softc->cmd_max * sizeof(struct amdvi_cmd));
519 	}
520 }
521 
522 static int
523 amdvi_init_event(struct amdvi_softc *softc)
524 {
525 	struct amdvi_ctrl *ctrl;
526 
527 	ctrl = softc->ctrl;
528 	ctrl->event.len = 8;
529 	softc->event_max = 1 << ctrl->event.len;
530 	softc->event = malloc(sizeof(struct amdvi_event) *
531 	    softc->event_max, M_AMDVI, M_WAITOK | M_ZERO);
532 	if ((uintptr_t)softc->event & PAGE_MASK) {
533 		device_printf(softc->dev, "Event buffer not aligned on page.");
534 		return (false);
535 	}
536 	ctrl->event.base = vtophys(softc->event) / PAGE_SIZE;
537 
538 	/* Reset the pointers. */
539 	ctrl->evt_head = 0;
540 	ctrl->evt_tail = 0;
541 
542 	return (0);
543 }
544 
545 static inline void
546 amdvi_decode_evt_flag(uint16_t flag)
547 {
548 
549 	flag &= AMDVI_EVENT_FLAG_MASK;
550 	printf(" 0x%b]\n", flag,
551 		"\020"
552 		"\001GN"
553 		"\002NX"
554 		"\003US"
555 		"\004I"
556 		"\005PR"
557 		"\006RW"
558 		"\007PE"
559 		"\010RZ"
560 		"\011TR"
561 		);
562 }
563 
564 /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/
565 static inline void
566 amdvi_decode_evt_flag_type(uint8_t type)
567 {
568 
569 	switch (AMDVI_EVENT_FLAG_TYPE(type)) {
570 	case 0:
571 		printf("RSVD\n");
572 		break;
573 	case 1:
574 		printf("Master Abort\n");
575 		break;
576 	case 2:
577 		printf("Target Abort\n");
578 		break;
579 	case 3:
580 		printf("Data Err\n");
581 		break;
582 	default:
583 		break;
584 	}
585 }
586 
587 static void
588 amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr,
589     uint16_t flag)
590 {
591 
592 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
593 	    " Addr:0x%lx",
594 	    devid, domid, addr);
595 	amdvi_decode_evt_flag(flag);
596 }
597 
598 static void
599 amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr,
600     uint16_t flag)
601 {
602 
603 	printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x"
604 	    " Addr:0x%lx",
605 	    devid, domid, addr);
606 	amdvi_decode_evt_flag(flag);
607 }
608 
609 static void
610 amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid,
611     uint64_t addr, uint16_t flag)
612 {
613 
614 	printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
615 	    " Addr:0x%lx", devid, domid, addr);
616 	amdvi_decode_evt_flag(flag);
617 	amdvi_decode_evt_flag_type(flag);
618 }
619 
620 static void
621 amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr,
622     uint16_t flag)
623 {
624 
625 	printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x"
626 	    " Addr:0x%lx", devid, domid, addr);
627 	amdvi_decode_evt_flag(flag);
628 	amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag));
629 }
630 
631 static void
632 amdvi_decode_evt(struct amdvi_event *evt)
633 {
634 	struct amdvi_cmd *cmd;
635 
636 	switch (evt->opcode) {
637 	case AMDVI_EVENT_INVALID_DTE:
638 		amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid,
639 		    evt->addr, evt->flag);
640 		break;
641 
642 	case AMDVI_EVENT_PFAULT:
643 		amdvi_decode_pf_evt(evt->devid, evt->pasid_domid,
644 		    evt->addr, evt->flag);
645 		break;
646 
647 	case AMDVI_EVENT_DTE_HW_ERROR:
648 		amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid,
649 		    evt->addr, evt->flag);
650 		break;
651 
652 	case AMDVI_EVENT_PAGE_HW_ERROR:
653 		amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid,
654 		    evt->addr, evt->flag);
655 		break;
656 
657 	case AMDVI_EVENT_ILLEGAL_CMD:
658 		/* FALL THROUGH */
659 	case AMDVI_EVENT_CMD_HW_ERROR:
660 		printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ?
661 		    "ILLEGAL CMD" : "CMD HW ERR");
662 		cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr);
663 		printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n",
664 		    cmd->opcode, cmd->word0, cmd->word1, cmd->addr);
665 		break;
666 
667 	case AMDVI_EVENT_IOTLB_TIMEOUT:
668 		printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n",
669 		    evt->devid, evt->addr);
670 		break;
671 
672 	case AMDVI_EVENT_INVALID_DTE_REQ:
673 		printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n",
674 		    evt->devid, evt->addr, evt->flag >> 9,
675 		    (evt->flag >> 8) & 1);
676 		break;
677 
678 	case AMDVI_EVENT_INVALID_PPR_REQ:
679 	case AMDVI_EVENT_COUNTER_ZERO:
680 		printf("AMD-Vi: v2 events.\n");
681 		break;
682 
683 	default:
684 		printf("Unsupported AMD-Vi event:%d\n", evt->opcode);
685 	}
686 }
687 
688 static void
689 amdvi_print_events(struct amdvi_softc *softc)
690 {
691 	struct amdvi_ctrl *ctrl;
692 	struct amdvi_event *event;
693 	int i, size;
694 
695 	ctrl = softc->ctrl;
696 	size = sizeof(struct amdvi_event);
697 	for (i = 0; i < softc->event_max; i++) {
698 		event = &softc->event[ctrl->evt_head / size];
699 		if (!event->opcode)
700 			break;
701 		device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n",
702 		    i, ctrl->evt_head, ctrl->evt_tail);
703 		amdvi_decode_evt(event);
704 		ctrl->evt_head = MOD_INC(ctrl->evt_head, size,
705 		    softc->event_max);
706 	}
707 }
708 
709 static int
710 amdvi_init_dte(struct amdvi_softc *softc)
711 {
712 	struct amdvi_ctrl *ctrl;
713 
714 	ctrl = softc->ctrl;
715 	ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE;
716 	ctrl->dte.size = 0x1FF;		/* 2MB device table. */
717 
718 	return (0);
719 }
720 
721 /*
722  * Not all capabilities of IOMMU are available in ACPI IVHD flag
723  * or EFR entry, read directly from device.
724  */
725 static int
726 amdvi_print_pci_cap(device_t dev)
727 {
728 	struct amdvi_softc *softc;
729 	uint32_t off, cap;
730 
731 
732 	softc = device_get_softc(dev);
733 	off = softc->cap_off;
734 
735 	/*
736 	 * Section 3.7.1 of IOMMU sepc rev 2.0.
737 	 * Read capability from device.
738 	 */
739 	cap = amdvi_pci_read(softc, off);
740 
741 	/* Make sure capability type[18:16] is 3. */
742 	KASSERT((((cap >> 16) & 0x7) == 0x3),
743 	    ("Not a IOMMU capability 0x%x@0x%x", cap, off));
744 
745 	softc->pci_cap = cap >> 24;
746 	device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n",
747 	    cap, off, softc->pci_cap,
748 	    "\020\001IOTLB\002HT\003NPCache\004EFR");
749 
750 	/* IOMMU spec Rev 2.0, section 3.7.2.1 */
751 	softc->pci_efr = softc->ctrl->ex_feature;
752 	if (softc->pci_efr) {
753 		device_printf(softc->dev, "PCI extended Feature:%b\n",
754 		    (int)softc->pci_efr,
755 		    "\020\001PreFSup\002PPRSup\003XTSup\004NXSup\006IASup"
756 		    "\007GASup\008HESup\009PCSup");
757 		device_printf(softc->dev,
758 		    "PCI HATS = %d GATS = %d GLXSup = %d, max PASID: 0x%x ",
759 		    (int)((softc->pci_efr >> 10) & 0x3),
760 		    (int)((softc->pci_efr >> 12) & 0x3),
761 		    (int)((softc->pci_efr >> 14) & 0x3),
762 		    (int)((softc->pci_efr >> 32) & 0x1F) + 1);
763 	}
764 
765 	return (0);
766 }
767 
768 static void
769 amdvi_event_intr(void *arg)
770 {
771 	struct amdvi_softc *softc;
772 	struct amdvi_ctrl *ctrl;
773 
774 	softc = (struct amdvi_softc *)arg;
775 	ctrl = softc->ctrl;
776 	device_printf(softc->dev, "EVT INTR %ld Status:0x%x"
777 	    " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++,
778 	    ctrl->status, ctrl->evt_head, ctrl->evt_tail);
779 	printf("  [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n",
780 	    softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head);
781 
782 	amdvi_print_events(softc);
783 	ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
784 }
785 
786 static void
787 amdvi_free_evt_intr_res(device_t dev)
788 {
789 
790 	struct amdvi_softc *softc;
791 
792 	softc = device_get_softc(dev);
793 	if (softc->event_tag != NULL) {
794 		bus_teardown_intr(dev, softc->event_res, softc->event_tag);
795 	}
796 	if (softc->event_res != NULL) {
797 		bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid,
798 		    softc->event_res);
799 	}
800 	bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid);
801 	PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)),
802 	    dev, 1, &softc->event_irq);
803 }
804 
805 static bool
806 amdvi_alloc_intr_resources(struct amdvi_softc *softc)
807 {
808 	struct amdvi_ctrl *ctrl;
809 	device_t dev, pcib;
810 	device_t mmio_dev;
811 	uint64_t msi_addr;
812 	uint32_t msi_data;
813 	int err;
814 
815 	dev = softc->dev;
816 	pcib = device_get_parent(device_get_parent(dev));
817 	mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid),
818             PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid));
819 	if (device_is_attached(mmio_dev)) {
820 		device_printf(dev,
821 		    "warning: IOMMU device is claimed by another driver %s\n",
822 		    device_get_driver(mmio_dev)->name);
823 	}
824 
825 	softc->event_irq = -1;
826 	softc->event_rid = 0;
827 
828 	/*
829 	 * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one
830 	 * interrupt. XXX: Enable MSI/X support.
831 	 */
832 	err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq);
833 	if (err) {
834 		device_printf(dev,
835 		    "Couldn't find event MSI IRQ resource.\n");
836 		return (ENOENT);
837 	}
838 
839 	err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid,
840 	    softc->event_irq, 1);
841 	if (err) {
842 		device_printf(dev, "Couldn't set event MSI resource.\n");
843 		return (ENXIO);
844 	}
845 
846 	softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
847 	    &softc->event_rid, RF_ACTIVE);
848 	if (!softc->event_res) {
849 		device_printf(dev,
850 		    "Unable to allocate event INTR resource.\n");
851 		return (ENOMEM);
852 	}
853 
854 	if (bus_setup_intr(dev, softc->event_res,
855 	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr,
856 	    softc, &softc->event_tag)) {
857 		device_printf(dev, "Fail to setup event intr\n");
858 		bus_release_resource(softc->dev, SYS_RES_IRQ,
859 		    softc->event_rid, softc->event_res);
860 		softc->event_res = NULL;
861 		return (ENXIO);
862 	}
863 
864 	bus_describe_intr(dev, softc->event_res, softc->event_tag,
865 	    "fault");
866 
867 	err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr,
868 	    &msi_data);
869 	if (err) {
870 		device_printf(dev,
871 		    "Event interrupt config failed, err=%d.\n",
872 		    err);
873 		amdvi_free_evt_intr_res(softc->dev);
874 		return (err);
875 	}
876 
877 	/* Clear interrupt status bits. */
878 	ctrl = softc->ctrl;
879 	ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR;
880 
881 	/* Now enable MSI interrupt. */
882 	pci_enable_msi(mmio_dev, msi_addr, msi_data);
883 	return (0);
884 }
885 
886 
887 static void
888 amdvi_print_dev_cap(struct amdvi_softc *softc)
889 {
890 	struct ivhd_dev_cfg *cfg;
891 	int i;
892 
893 	cfg = softc->dev_cfg;
894 	for (i = 0; i < softc->dev_cfg_cnt; i++) {
895 		device_printf(softc->dev, "device [0x%x - 0x%x]"
896 		    "config:%b%s\n", cfg->start_id, cfg->end_id,
897 		    cfg->data,
898 		    "\020\001INIT\002ExtInt\003NMI"
899 		    "\007LINT0\008LINT1",
900 		    cfg->enable_ats ? "ATS enabled" : "");
901 		cfg++;
902 	}
903 }
904 
905 static int
906 amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS)
907 {
908 	struct amdvi_softc *softc;
909 	int result, type, error = 0;
910 
911 	softc = (struct amdvi_softc *)arg1;
912 	type = arg2;
913 
914 	switch (type) {
915 	case 0:
916 		result = softc->ctrl->cmd_head;
917 		error = sysctl_handle_int(oidp, &result, 0,
918 		    req);
919 		break;
920 	case 1:
921 		result = softc->ctrl->cmd_tail;
922 		error = sysctl_handle_int(oidp, &result, 0,
923 		    req);
924 		break;
925 	case 2:
926 		result = softc->ctrl->evt_head;
927 		error = sysctl_handle_int(oidp, &result, 0,
928 		    req);
929 		break;
930 	case 3:
931 		result = softc->ctrl->evt_tail;
932 		error = sysctl_handle_int(oidp, &result, 0,
933 		    req);
934 		break;
935 
936 	default:
937 		device_printf(softc->dev, "Unknown sysctl:%d\n", type);
938 	}
939 
940 	return (error);
941 }
942 
943 static void
944 amdvi_add_sysctl(struct amdvi_softc *softc)
945 {
946 	struct sysctl_oid_list *child;
947 	struct sysctl_ctx_list *ctx;
948 	device_t dev;
949 
950 	dev = softc->dev;
951 	ctx = device_get_sysctl_ctx(dev);
952 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
953 
954 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD,
955 	    &softc->event_intr_cnt, "Event interrupt count");
956 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD,
957 	    &softc->total_cmd, "Command submitted count");
958 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD,
959 	    &softc->pci_rid, 0, "IOMMU RID");
960 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD,
961 	    &softc->start_dev_rid, 0, "Start of device under this IOMMU");
962 	SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD,
963 	    &softc->end_dev_rid, 0, "End of device under this IOMMU");
964 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head",
965 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 0,
966 	    amdvi_handle_sysctl, "IU", "Command head");
967 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail",
968 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 1,
969 	    amdvi_handle_sysctl, "IU", "Command tail");
970 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head",
971 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 2,
972 	    amdvi_handle_sysctl, "IU", "Command head");
973 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail",
974 	    CTLTYPE_UINT | CTLFLAG_RD, softc, 3,
975 	    amdvi_handle_sysctl, "IU", "Command tail");
976 }
977 
978 int
979 amdvi_setup_hw(struct amdvi_softc *softc)
980 {
981 	device_t dev;
982 	int status;
983 
984 	dev = softc->dev;
985 
986 	amdvi_hw_enable_iotlb(softc);
987 
988 	amdvi_print_dev_cap(softc);
989 
990 	if ((status = amdvi_print_pci_cap(dev)) != 0) {
991 		device_printf(dev, "PCI capability.\n");
992 		return (status);
993 	}
994 	if ((status = amdvi_init_cmd(softc)) != 0) {
995 		device_printf(dev, "Couldn't configure command buffer.\n");
996 		return (status);
997 	}
998 	if ((status = amdvi_init_event(softc)) != 0) {
999 		device_printf(dev, "Couldn't configure event buffer.\n");
1000 		return (status);
1001 	}
1002 	if ((status = amdvi_init_dte(softc)) != 0) {
1003 		device_printf(dev, "Couldn't configure device table.\n");
1004 		return (status);
1005 	}
1006 	if ((status = amdvi_alloc_intr_resources(softc)) != 0) {
1007 		return (status);
1008 	}
1009 	amdvi_add_sysctl(softc);
1010 	return (0);
1011 }
1012 
1013 int
1014 amdvi_teardown_hw(struct amdvi_softc *softc)
1015 {
1016 	device_t dev;
1017 
1018 	dev = softc->dev;
1019 
1020 	/*
1021 	 * Called after disable, h/w is stopped by now, free all the resources.
1022 	 */
1023 	amdvi_free_evt_intr_res(dev);
1024 
1025 	if (softc->cmd)
1026 		free(softc->cmd, M_AMDVI);
1027 
1028 	if (softc->event)
1029 		free(softc->event, M_AMDVI);
1030 
1031 	return (0);
1032 }
1033 
1034 /*********** bhyve interfaces *********************/
1035 static int
1036 amdvi_init(void)
1037 {
1038 	if (!ivhd_count) {
1039 		return (EIO);
1040 	}
1041 	if (!amdvi_enable_user && ivhd_count) {
1042 		printf("bhyve: Found %d AMD-Vi/IOMMU device(s), "
1043 		    	"use hw.vmm.amdvi_enable=1 to enable pass-through.\n",
1044 		    ivhd_count);
1045 		return (EINVAL);
1046 	}
1047 	return (0);
1048 }
1049 
1050 static void
1051 amdvi_cleanup(void)
1052 {
1053 	/* Nothing. */
1054 }
1055 
1056 static uint16_t
1057 amdvi_domainId(void)
1058 {
1059 
1060 	/*
1061 	 * If we hit maximum domain limit, rollover leaving host
1062 	 * domain(0).
1063 	 * XXX: make sure that this domain is not used.
1064 	 */
1065 	if (amdvi_dom_id == AMDVI_MAX_DOMAIN)
1066 		amdvi_dom_id = 1;
1067 
1068 	return ((uint16_t)amdvi_dom_id++);
1069 }
1070 
1071 static void
1072 amdvi_do_inv_domain(uint16_t domain_id, bool create)
1073 {
1074 	struct amdvi_softc *softc;
1075 	int i;
1076 
1077 	for (i = 0; i < ivhd_count; i++) {
1078 		softc = device_get_softc(ivhd_devs[i]);
1079 		KASSERT(softc, ("softc is NULL"));
1080 		/*
1081 		 * If not present pages are cached, invalidate page after
1082 		 * creating domain.
1083 		 */
1084 #if 0
1085 		if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0))
1086 			continue;
1087 #endif
1088 		amdvi_inv_domain(softc, domain_id);
1089 		amdvi_wait(softc);
1090 	}
1091 }
1092 
1093 static void *
1094 amdvi_create_domain(vm_paddr_t maxaddr)
1095 {
1096 	struct amdvi_domain *dom;
1097 
1098 	dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK);
1099 	dom->id = amdvi_domainId();
1100 	//dom->maxaddr = maxaddr;
1101 #ifdef AMDVI_DEBUG_CMD
1102 	printf("Created domain #%d\n", dom->id);
1103 #endif
1104 	/*
1105 	 * Host domain(#0) don't create translation table.
1106 	 */
1107 	if (dom->id || amdvi_host_ptp)
1108 		dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1109 
1110 	dom->ptp_level = amdvi_ptp_level;
1111 
1112 	amdvi_do_inv_domain(dom->id, true);
1113 	SLIST_INSERT_HEAD(&dom_head, dom, next);
1114 
1115 	return (dom);
1116 }
1117 
1118 static void
1119 amdvi_free_ptp(uint64_t *ptp, int level)
1120 {
1121 	int i;
1122 
1123 	if (level < 1)
1124 		return;
1125 
1126 	for (i = 0; i < NPTEPG ; i++) {
1127 		if ((ptp[i] & AMDVI_PT_PRESENT) == 0)
1128 			continue;
1129 		/* XXX: Add super-page or PTE mapping > 4KB. */
1130 #ifdef notyet
1131 		/* Super-page mapping. */
1132 		if (AMDVI_PD_SUPER(ptp[i]))
1133 			continue;
1134 #endif
1135 
1136 		amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i]
1137 		    & AMDVI_PT_MASK), level - 1);
1138 
1139 	}
1140 
1141 	free(ptp, M_AMDVI);
1142 }
1143 
1144 static void
1145 amdvi_destroy_domain(void *arg)
1146 {
1147 	struct amdvi_domain *domain;
1148 
1149 	domain = (struct amdvi_domain *)arg;
1150 	KASSERT(domain, ("domain is NULL"));
1151 #ifdef AMDVI_DEBUG_CMD
1152 	printf("Destroying domain %d\n", domain->id);
1153 #endif
1154 	if (domain->ptp)
1155 		amdvi_free_ptp(domain->ptp, domain->ptp_level);
1156 
1157 	amdvi_do_inv_domain(domain->id, false);
1158 	SLIST_REMOVE(&dom_head, domain, amdvi_domain, next);
1159 	free(domain, M_AMDVI);
1160 }
1161 
1162 static uint64_t
1163 amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa,
1164     vm_paddr_t hpa, uint64_t pg_size, bool create)
1165 {
1166 	uint64_t *page, pa;
1167 	int shift, index;
1168 	const int PT_SHIFT = 9;
1169 	const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1;	/* Based on PT_SHIFT */
1170 
1171 	if (!pg_size)
1172 		return (0);
1173 
1174 	if (hpa & (pg_size - 1)) {
1175 		printf("HPA is not size aligned.\n");
1176 		return (0);
1177 	}
1178 	if (gpa & (pg_size - 1)) {
1179 		printf("HPA is not size aligned.\n");
1180 		return (0);
1181 	}
1182 	shift = PML4SHIFT;
1183 	while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
1184 		index = (gpa >> shift) & PT_INDEX_MASK;
1185 
1186 		if ((pt[index] == 0) && create) {
1187 			page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO);
1188 			pa = vtophys(page);
1189 			pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW |
1190 			    ((level - 1) << AMDVI_PD_LEVEL_SHIFT);
1191 		}
1192 #ifdef AMDVI_DEBUG_PTE
1193 		if ((gpa % 0x1000000) == 0)
1194 			printf("[level%d, shift = %d]PTE:0x%lx\n",
1195 			    level, shift, pt[index]);
1196 #endif
1197 #define PTE2PA(x)	((uint64_t)(x) & AMDVI_PT_MASK)
1198 		pa = PTE2PA(pt[index]);
1199 		pt = (uint64_t *)PHYS_TO_DMAP(pa);
1200 		shift -= PT_SHIFT;
1201 		level--;
1202 	}
1203 
1204 	/* Leaf entry. */
1205 	index = (gpa >> shift) & PT_INDEX_MASK;
1206 
1207 	if (create) {
1208 		pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT;
1209 	} else
1210 		pt[index] = 0;
1211 
1212 #ifdef AMDVI_DEBUG_PTE
1213 	if ((gpa % 0x1000000) == 0)
1214 		printf("[Last level%d, shift = %d]PTE:0x%lx\n",
1215 		    level, shift, pt[index]);
1216 #endif
1217 	return (1ULL << shift);
1218 }
1219 
1220 static uint64_t
1221 amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa,
1222     vm_paddr_t hpa, uint64_t size, bool create)
1223 {
1224 	uint64_t mapped, *ptp, len;
1225 	int level;
1226 
1227 	KASSERT(domain, ("domain is NULL"));
1228 	level = domain->ptp_level;
1229 	KASSERT(level, ("Page table level is 0"));
1230 
1231 	ptp = domain->ptp;
1232 	KASSERT(ptp, ("PTP is NULL"));
1233 	mapped = 0;
1234 	while (mapped < size) {
1235 		len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped,
1236 		    PAGE_SIZE, create);
1237 		if (!len) {
1238 			printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n",
1239 			    hpa, gpa);
1240 			return (0);
1241 		}
1242 		mapped += len;
1243 	}
1244 
1245 	return (mapped);
1246 }
1247 
1248 static uint64_t
1249 amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
1250     uint64_t len)
1251 {
1252 	struct amdvi_domain *domain;
1253 
1254 	domain = (struct amdvi_domain *)arg;
1255 
1256 	if (domain->id && !domain->ptp) {
1257 		printf("ptp is NULL");
1258 		return (-1);
1259 	}
1260 
1261 	/*
1262 	 * If host domain is created w/o page table, skip IOMMU page
1263 	 * table set-up.
1264 	 */
1265 	if (domain->ptp)
1266 		return (amdvi_update_mapping(domain, gpa, hpa, len, true));
1267 	else
1268 		return (len);
1269 }
1270 
1271 static uint64_t
1272 amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
1273 {
1274 	struct amdvi_domain *domain;
1275 
1276 	domain = (struct amdvi_domain *)arg;
1277 	/*
1278 	 * If host domain is created w/o page table, skip IOMMU page
1279 	 * table set-up.
1280 	 */
1281 	if (domain->ptp)
1282 		return (amdvi_update_mapping(domain, gpa, 0, len, false));
1283 	return
1284 	    (len);
1285 }
1286 
1287 static struct amdvi_softc *
1288 amdvi_find_iommu(uint16_t devid)
1289 {
1290 	struct amdvi_softc *softc;
1291 	int i;
1292 
1293 	for (i = 0; i < ivhd_count; i++) {
1294 		softc = device_get_softc(ivhd_devs[i]);
1295 		if ((devid >= softc->start_dev_rid) &&
1296 		    (devid <= softc->end_dev_rid))
1297 			return (softc);
1298 	}
1299 
1300 	/*
1301 	 * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU.
1302 	 */
1303 	printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n",
1304 	    RID2PCI_STR(devid));
1305 
1306 	return (device_get_softc(ivhd_devs[0]));
1307 }
1308 
1309 /*
1310  * Set-up device table entry.
1311  * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must
1312  * be set concurrently, e.g. read and write bits.
1313  */
1314 static void
1315 amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable)
1316 {
1317 	struct amdvi_softc *softc;
1318 	struct amdvi_dte temp;
1319 
1320 	softc = amdvi_find_iommu(devid);
1321 	KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid));
1322 
1323 	memset(&temp, 0, sizeof(struct amdvi_dte));
1324 
1325 #ifdef AMDVI_ATS_ENABLE
1326 	/* If IOMMU and device support IOTLB, enable it. */
1327 	if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb)
1328 		temp.iotlb_enable = 1;
1329 #endif
1330 
1331 	/* Avoid duplicate I/O faults. */
1332 	temp.sup_second_io_fault = 1;
1333 	temp.sup_all_io_fault = amdvi_disable_io_fault;
1334 
1335 	temp.dt_valid = 1;
1336 	temp.domain_id = domain->id;
1337 
1338 	if (enable) {
1339 		if (domain->ptp) {
1340 			temp.pt_base = vtophys(domain->ptp) >> 12;
1341 			temp.pt_level = amdvi_ptp_level;
1342 		}
1343 		/*
1344 		 * XXX: Page table valid[TV] bit must be set even if host domain
1345 		 * page tables are not enabled.
1346 		 */
1347 		temp.pt_valid = 1;
1348 		temp.read_allow = 1;
1349 		temp.write_allow = 1;
1350 	}
1351 	amdvi_dte[devid] = temp;
1352 }
1353 
1354 static void
1355 amdvi_inv_device(uint16_t devid)
1356 {
1357 	struct amdvi_softc *softc;
1358 
1359 	softc = amdvi_find_iommu(devid);
1360 	KASSERT(softc, ("softc is NULL"));
1361 
1362 	amdvi_cmd_inv_dte(softc, devid);
1363 #ifdef AMDVI_ATS_ENABLE
1364 	if (amdvi_dev_support_iotlb(softc, devid))
1365 		amdvi_cmd_inv_iotlb(softc, devid);
1366 #endif
1367 	amdvi_wait(softc);
1368 }
1369 
1370 static void
1371 amdvi_add_device(void *arg, uint16_t devid)
1372 {
1373 	struct amdvi_domain *domain;
1374 
1375 	domain = (struct amdvi_domain *)arg;
1376 	KASSERT(domain != NULL, ("domain is NULL"));
1377 #ifdef AMDVI_DEBUG_CMD
1378 	printf("Assigning device(%d.%d.%d) to domain:%d\n",
1379 	    RID2PCI_STR(devid), domain->id);
1380 #endif
1381 	amdvi_set_dte(domain, devid, true);
1382 	amdvi_inv_device(devid);
1383 }
1384 
1385 static void
1386 amdvi_remove_device(void *arg, uint16_t devid)
1387 {
1388 	struct amdvi_domain *domain;
1389 
1390 	domain = (struct amdvi_domain *)arg;
1391 #ifdef AMDVI_DEBUG_CMD
1392 	printf("Remove device(0x%x) from domain:%d\n",
1393 	       devid, domain->id);
1394 #endif
1395 	amdvi_set_dte(domain, devid, false);
1396 	amdvi_inv_device(devid);
1397 }
1398 
1399 static void
1400 amdvi_enable(void)
1401 {
1402 	struct amdvi_ctrl *ctrl;
1403 	struct amdvi_softc *softc;
1404 	uint64_t val;
1405 	int i;
1406 
1407 	for (i = 0; i < ivhd_count; i++) {
1408 		softc = device_get_softc(ivhd_devs[i]);
1409 		KASSERT(softc, ("softc is NULL\n"));
1410 		ctrl = softc->ctrl;
1411 		KASSERT(ctrl, ("ctrl is NULL\n"));
1412 
1413 		val = (	AMDVI_CTRL_EN 		|
1414 			AMDVI_CTRL_CMD 		|
1415 		    	AMDVI_CTRL_ELOG 	|
1416 		    	AMDVI_CTRL_ELOGINT 	|
1417 		    	AMDVI_CTRL_INV_TO_1S);
1418 
1419 		if (softc->ivhd_flag & IVHD_FLAG_COH)
1420 			val |= AMDVI_CTRL_COH;
1421 		if (softc->ivhd_flag & IVHD_FLAG_HTT)
1422 			val |= AMDVI_CTRL_HTT;
1423 		if (softc->ivhd_flag & IVHD_FLAG_RPPW)
1424 			val |= AMDVI_CTRL_RPPW;
1425 		if (softc->ivhd_flag & IVHD_FLAG_PPW)
1426 			val |= AMDVI_CTRL_PPW;
1427 		if (softc->ivhd_flag & IVHD_FLAG_ISOC)
1428 			val |= AMDVI_CTRL_ISOC;
1429 
1430 		ctrl->control = val;
1431 	}
1432 }
1433 
1434 static void
1435 amdvi_disable(void)
1436 {
1437 	struct amdvi_ctrl *ctrl;
1438 	struct amdvi_softc *softc;
1439 	int i;
1440 
1441 	for (i = 0; i < ivhd_count; i++) {
1442 		softc = device_get_softc(ivhd_devs[i]);
1443 		KASSERT(softc, ("softc is NULL\n"));
1444 		ctrl = softc->ctrl;
1445 		KASSERT(ctrl, ("ctrl is NULL\n"));
1446 
1447 		ctrl->control = 0;
1448 	}
1449 }
1450 
1451 static void
1452 amdvi_inv_tlb(void *arg)
1453 {
1454 	struct amdvi_domain *domain;
1455 
1456 	domain = (struct amdvi_domain *)arg;
1457 	KASSERT(domain, ("domain is NULL"));
1458 	amdvi_do_inv_domain(domain->id, false);
1459 }
1460 
1461 struct iommu_ops iommu_ops_amd = {
1462 	amdvi_init,
1463 	amdvi_cleanup,
1464 	amdvi_enable,
1465 	amdvi_disable,
1466 	amdvi_create_domain,
1467 	amdvi_destroy_domain,
1468 	amdvi_create_mapping,
1469 	amdvi_destroy_mapping,
1470 	amdvi_add_device,
1471 	amdvi_remove_device,
1472 	amdvi_inv_tlb
1473 };
1474