xref: /linux/drivers/iommu/intel/dmar.c (revision c01044cc819160323f3ca4acd44fca487c4432e6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 
35 #include "../irq_remapping.h"
36 
37 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
38 struct dmar_res_callback {
39 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
40 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
41 	bool			ignore_unhandled;
42 	bool			print_entry;
43 };
44 
45 /*
46  * Assumptions:
47  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
48  *    before IO devices managed by that unit.
49  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
50  *    after IO devices managed by that unit.
51  * 3) Hotplug events are rare.
52  *
53  * Locking rules for DMA and interrupt remapping related global data structures:
54  * 1) Use dmar_global_lock in process context
55  * 2) Use RCU in interrupt context
56  */
57 DECLARE_RWSEM(dmar_global_lock);
58 LIST_HEAD(dmar_drhd_units);
59 
60 struct acpi_table_header * __initdata dmar_tbl;
61 static int dmar_dev_scope_status = 1;
62 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
63 
64 static int alloc_iommu(struct dmar_drhd_unit *drhd);
65 static void free_iommu(struct intel_iommu *iommu);
66 
67 extern const struct iommu_ops intel_iommu_ops;
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			pr_warn("Out of memory when allocating notify_info "
151 				"for %s.\n", pci_name(dev));
152 			if (dmar_dev_scope_status == 0)
153 				dmar_dev_scope_status = -ENOMEM;
154 			return NULL;
155 		}
156 	}
157 
158 	info->event = event;
159 	info->dev = dev;
160 	info->seg = pci_domain_nr(dev->bus);
161 	info->level = level;
162 	if (event == BUS_NOTIFY_ADD_DEVICE) {
163 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
164 			level--;
165 			info->path[level].bus = tmp->bus->number;
166 			info->path[level].device = PCI_SLOT(tmp->devfn);
167 			info->path[level].function = PCI_FUNC(tmp->devfn);
168 			if (pci_is_root_bus(tmp->bus))
169 				info->bus = tmp->bus->number;
170 		}
171 	}
172 
173 	return info;
174 }
175 
176 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
177 {
178 	if ((void *)info != dmar_pci_notify_info_buf)
179 		kfree(info);
180 }
181 
182 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
183 				struct acpi_dmar_pci_path *path, int count)
184 {
185 	int i;
186 
187 	if (info->bus != bus)
188 		goto fallback;
189 	if (info->level != count)
190 		goto fallback;
191 
192 	for (i = 0; i < count; i++) {
193 		if (path[i].device != info->path[i].device ||
194 		    path[i].function != info->path[i].function)
195 			goto fallback;
196 	}
197 
198 	return true;
199 
200 fallback:
201 
202 	if (count != 1)
203 		return false;
204 
205 	i = info->level - 1;
206 	if (bus              == info->path[i].bus &&
207 	    path[0].device   == info->path[i].device &&
208 	    path[0].function == info->path[i].function) {
209 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
210 			bus, path[0].device, path[0].function);
211 		return true;
212 	}
213 
214 	return false;
215 }
216 
217 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
218 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
219 			  void *start, void*end, u16 segment,
220 			  struct dmar_dev_scope *devices,
221 			  int devices_cnt)
222 {
223 	int i, level;
224 	struct device *tmp, *dev = &info->dev->dev;
225 	struct acpi_dmar_device_scope *scope;
226 	struct acpi_dmar_pci_path *path;
227 
228 	if (segment != info->seg)
229 		return 0;
230 
231 	for (; start < end; start += scope->length) {
232 		scope = start;
233 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
234 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
235 			continue;
236 
237 		path = (struct acpi_dmar_pci_path *)(scope + 1);
238 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
239 		if (!dmar_match_pci_path(info, scope->bus, path, level))
240 			continue;
241 
242 		/*
243 		 * We expect devices with endpoint scope to have normal PCI
244 		 * headers, and devices with bridge scope to have bridge PCI
245 		 * headers.  However PCI NTB devices may be listed in the
246 		 * DMAR table with bridge scope, even though they have a
247 		 * normal PCI header.  NTB devices are identified by class
248 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
249 		 * for this special case.
250 		 */
251 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
252 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
253 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
254 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
255 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
256 			pr_warn("Device scope type does not match for %s\n",
257 				pci_name(info->dev));
258 			return -EINVAL;
259 		}
260 
261 		for_each_dev_scope(devices, devices_cnt, i, tmp)
262 			if (tmp == NULL) {
263 				devices[i].bus = info->dev->bus->number;
264 				devices[i].devfn = info->dev->devfn;
265 				rcu_assign_pointer(devices[i].dev,
266 						   get_device(dev));
267 				return 1;
268 			}
269 		BUG_ON(i >= devices_cnt);
270 	}
271 
272 	return 0;
273 }
274 
275 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
276 			  struct dmar_dev_scope *devices, int count)
277 {
278 	int index;
279 	struct device *tmp;
280 
281 	if (info->seg != segment)
282 		return 0;
283 
284 	for_each_active_dev_scope(devices, count, index, tmp)
285 		if (tmp == &info->dev->dev) {
286 			RCU_INIT_POINTER(devices[index].dev, NULL);
287 			synchronize_rcu();
288 			put_device(tmp);
289 			return 1;
290 		}
291 
292 	return 0;
293 }
294 
295 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
296 {
297 	int ret = 0;
298 	struct dmar_drhd_unit *dmaru;
299 	struct acpi_dmar_hardware_unit *drhd;
300 
301 	for_each_drhd_unit(dmaru) {
302 		if (dmaru->include_all)
303 			continue;
304 
305 		drhd = container_of(dmaru->hdr,
306 				    struct acpi_dmar_hardware_unit, header);
307 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
308 				((void *)drhd) + drhd->header.length,
309 				dmaru->segment,
310 				dmaru->devices, dmaru->devices_cnt);
311 		if (ret)
312 			break;
313 	}
314 	if (ret >= 0)
315 		ret = dmar_iommu_notify_scope_dev(info);
316 	if (ret < 0 && dmar_dev_scope_status == 0)
317 		dmar_dev_scope_status = ret;
318 
319 	if (ret >= 0)
320 		intel_irq_remap_add_device(info);
321 
322 	return ret;
323 }
324 
325 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
326 {
327 	struct dmar_drhd_unit *dmaru;
328 
329 	for_each_drhd_unit(dmaru)
330 		if (dmar_remove_dev_scope(info, dmaru->segment,
331 			dmaru->devices, dmaru->devices_cnt))
332 			break;
333 	dmar_iommu_notify_scope_dev(info);
334 }
335 
336 static int dmar_pci_bus_notifier(struct notifier_block *nb,
337 				 unsigned long action, void *data)
338 {
339 	struct pci_dev *pdev = to_pci_dev(data);
340 	struct dmar_pci_notify_info *info;
341 
342 	/* Only care about add/remove events for physical functions.
343 	 * For VFs we actually do the lookup based on the corresponding
344 	 * PF in device_to_iommu() anyway. */
345 	if (pdev->is_virtfn)
346 		return NOTIFY_DONE;
347 	if (action != BUS_NOTIFY_ADD_DEVICE &&
348 	    action != BUS_NOTIFY_REMOVED_DEVICE)
349 		return NOTIFY_DONE;
350 
351 	info = dmar_alloc_pci_notify_info(pdev, action);
352 	if (!info)
353 		return NOTIFY_DONE;
354 
355 	down_write(&dmar_global_lock);
356 	if (action == BUS_NOTIFY_ADD_DEVICE)
357 		dmar_pci_bus_add_dev(info);
358 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
359 		dmar_pci_bus_del_dev(info);
360 	up_write(&dmar_global_lock);
361 
362 	dmar_free_pci_notify_info(info);
363 
364 	return NOTIFY_OK;
365 }
366 
367 static struct notifier_block dmar_pci_bus_nb = {
368 	.notifier_call = dmar_pci_bus_notifier,
369 	.priority = INT_MIN,
370 };
371 
372 static struct dmar_drhd_unit *
373 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
374 {
375 	struct dmar_drhd_unit *dmaru;
376 
377 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
378 				dmar_rcu_check())
379 		if (dmaru->segment == drhd->segment &&
380 		    dmaru->reg_base_addr == drhd->address)
381 			return dmaru;
382 
383 	return NULL;
384 }
385 
386 /**
387  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
388  * structure which uniquely represent one DMA remapping hardware unit
389  * present in the platform
390  */
391 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
392 {
393 	struct acpi_dmar_hardware_unit *drhd;
394 	struct dmar_drhd_unit *dmaru;
395 	int ret;
396 
397 	drhd = (struct acpi_dmar_hardware_unit *)header;
398 	dmaru = dmar_find_dmaru(drhd);
399 	if (dmaru)
400 		goto out;
401 
402 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
403 	if (!dmaru)
404 		return -ENOMEM;
405 
406 	/*
407 	 * If header is allocated from slab by ACPI _DSM method, we need to
408 	 * copy the content because the memory buffer will be freed on return.
409 	 */
410 	dmaru->hdr = (void *)(dmaru + 1);
411 	memcpy(dmaru->hdr, header, header->length);
412 	dmaru->reg_base_addr = drhd->address;
413 	dmaru->segment = drhd->segment;
414 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
415 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
416 					      ((void *)drhd) + drhd->header.length,
417 					      &dmaru->devices_cnt);
418 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
419 		kfree(dmaru);
420 		return -ENOMEM;
421 	}
422 
423 	ret = alloc_iommu(dmaru);
424 	if (ret) {
425 		dmar_free_dev_scope(&dmaru->devices,
426 				    &dmaru->devices_cnt);
427 		kfree(dmaru);
428 		return ret;
429 	}
430 	dmar_register_drhd_unit(dmaru);
431 
432 out:
433 	if (arg)
434 		(*(int *)arg)++;
435 
436 	return 0;
437 }
438 
439 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
440 {
441 	if (dmaru->devices && dmaru->devices_cnt)
442 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
443 	if (dmaru->iommu)
444 		free_iommu(dmaru->iommu);
445 	kfree(dmaru);
446 }
447 
448 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
449 				      void *arg)
450 {
451 	struct acpi_dmar_andd *andd = (void *)header;
452 
453 	/* Check for NUL termination within the designated length */
454 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
455 		pr_warn(FW_BUG
456 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
457 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
458 			   dmi_get_system_info(DMI_BIOS_VENDOR),
459 			   dmi_get_system_info(DMI_BIOS_VERSION),
460 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
461 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
462 		return -EINVAL;
463 	}
464 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
465 		andd->device_name);
466 
467 	return 0;
468 }
469 
470 #ifdef CONFIG_ACPI_NUMA
471 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
472 {
473 	struct acpi_dmar_rhsa *rhsa;
474 	struct dmar_drhd_unit *drhd;
475 
476 	rhsa = (struct acpi_dmar_rhsa *)header;
477 	for_each_drhd_unit(drhd) {
478 		if (drhd->reg_base_addr == rhsa->base_address) {
479 			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
480 
481 			if (!node_online(node))
482 				node = NUMA_NO_NODE;
483 			drhd->iommu->node = node;
484 			return 0;
485 		}
486 	}
487 	pr_warn(FW_BUG
488 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
489 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
490 		rhsa->base_address,
491 		dmi_get_system_info(DMI_BIOS_VENDOR),
492 		dmi_get_system_info(DMI_BIOS_VERSION),
493 		dmi_get_system_info(DMI_PRODUCT_VERSION));
494 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
495 
496 	return 0;
497 }
498 #else
499 #define	dmar_parse_one_rhsa		dmar_res_noop
500 #endif
501 
502 static void
503 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
504 {
505 	struct acpi_dmar_hardware_unit *drhd;
506 	struct acpi_dmar_reserved_memory *rmrr;
507 	struct acpi_dmar_atsr *atsr;
508 	struct acpi_dmar_rhsa *rhsa;
509 
510 	switch (header->type) {
511 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
512 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
513 				    header);
514 		pr_info("DRHD base: %#016Lx flags: %#x\n",
515 			(unsigned long long)drhd->address, drhd->flags);
516 		break;
517 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
518 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
519 				    header);
520 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
521 			(unsigned long long)rmrr->base_address,
522 			(unsigned long long)rmrr->end_address);
523 		break;
524 	case ACPI_DMAR_TYPE_ROOT_ATS:
525 		atsr = container_of(header, struct acpi_dmar_atsr, header);
526 		pr_info("ATSR flags: %#x\n", atsr->flags);
527 		break;
528 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
529 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
530 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
531 		       (unsigned long long)rhsa->base_address,
532 		       rhsa->proximity_domain);
533 		break;
534 	case ACPI_DMAR_TYPE_NAMESPACE:
535 		/* We don't print this here because we need to sanity-check
536 		   it first. So print it in dmar_parse_one_andd() instead. */
537 		break;
538 	}
539 }
540 
541 /**
542  * dmar_table_detect - checks to see if the platform supports DMAR devices
543  */
544 static int __init dmar_table_detect(void)
545 {
546 	acpi_status status = AE_OK;
547 
548 	/* if we could find DMAR table, then there are DMAR devices */
549 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
550 
551 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
552 		pr_warn("Unable to map DMAR\n");
553 		status = AE_NOT_FOUND;
554 	}
555 
556 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
557 }
558 
559 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
560 				       size_t len, struct dmar_res_callback *cb)
561 {
562 	struct acpi_dmar_header *iter, *next;
563 	struct acpi_dmar_header *end = ((void *)start) + len;
564 
565 	for (iter = start; iter < end; iter = next) {
566 		next = (void *)iter + iter->length;
567 		if (iter->length == 0) {
568 			/* Avoid looping forever on bad ACPI tables */
569 			pr_debug(FW_BUG "Invalid 0-length structure\n");
570 			break;
571 		} else if (next > end) {
572 			/* Avoid passing table end */
573 			pr_warn(FW_BUG "Record passes table end\n");
574 			return -EINVAL;
575 		}
576 
577 		if (cb->print_entry)
578 			dmar_table_print_dmar_entry(iter);
579 
580 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
581 			/* continue for forward compatibility */
582 			pr_debug("Unknown DMAR structure type %d\n",
583 				 iter->type);
584 		} else if (cb->cb[iter->type]) {
585 			int ret;
586 
587 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
588 			if (ret)
589 				return ret;
590 		} else if (!cb->ignore_unhandled) {
591 			pr_warn("No handler for DMAR structure type %d\n",
592 				iter->type);
593 			return -EINVAL;
594 		}
595 	}
596 
597 	return 0;
598 }
599 
600 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
601 				       struct dmar_res_callback *cb)
602 {
603 	return dmar_walk_remapping_entries((void *)(dmar + 1),
604 			dmar->header.length - sizeof(*dmar), cb);
605 }
606 
607 /**
608  * parse_dmar_table - parses the DMA reporting table
609  */
610 static int __init
611 parse_dmar_table(void)
612 {
613 	struct acpi_table_dmar *dmar;
614 	int drhd_count = 0;
615 	int ret;
616 	struct dmar_res_callback cb = {
617 		.print_entry = true,
618 		.ignore_unhandled = true,
619 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
620 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
621 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
622 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
623 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
624 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
625 	};
626 
627 	/*
628 	 * Do it again, earlier dmar_tbl mapping could be mapped with
629 	 * fixed map.
630 	 */
631 	dmar_table_detect();
632 
633 	/*
634 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
635 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
636 	 */
637 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
638 
639 	dmar = (struct acpi_table_dmar *)dmar_tbl;
640 	if (!dmar)
641 		return -ENODEV;
642 
643 	if (dmar->width < PAGE_SHIFT - 1) {
644 		pr_warn("Invalid DMAR haw\n");
645 		return -EINVAL;
646 	}
647 
648 	pr_info("Host address width %d\n", dmar->width + 1);
649 	ret = dmar_walk_dmar_table(dmar, &cb);
650 	if (ret == 0 && drhd_count == 0)
651 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
652 
653 	return ret;
654 }
655 
656 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
657 				 int cnt, struct pci_dev *dev)
658 {
659 	int index;
660 	struct device *tmp;
661 
662 	while (dev) {
663 		for_each_active_dev_scope(devices, cnt, index, tmp)
664 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
665 				return 1;
666 
667 		/* Check our parent */
668 		dev = dev->bus->self;
669 	}
670 
671 	return 0;
672 }
673 
674 struct dmar_drhd_unit *
675 dmar_find_matched_drhd_unit(struct pci_dev *dev)
676 {
677 	struct dmar_drhd_unit *dmaru;
678 	struct acpi_dmar_hardware_unit *drhd;
679 
680 	dev = pci_physfn(dev);
681 
682 	rcu_read_lock();
683 	for_each_drhd_unit(dmaru) {
684 		drhd = container_of(dmaru->hdr,
685 				    struct acpi_dmar_hardware_unit,
686 				    header);
687 
688 		if (dmaru->include_all &&
689 		    drhd->segment == pci_domain_nr(dev->bus))
690 			goto out;
691 
692 		if (dmar_pci_device_match(dmaru->devices,
693 					  dmaru->devices_cnt, dev))
694 			goto out;
695 	}
696 	dmaru = NULL;
697 out:
698 	rcu_read_unlock();
699 
700 	return dmaru;
701 }
702 
703 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
704 					      struct acpi_device *adev)
705 {
706 	struct dmar_drhd_unit *dmaru;
707 	struct acpi_dmar_hardware_unit *drhd;
708 	struct acpi_dmar_device_scope *scope;
709 	struct device *tmp;
710 	int i;
711 	struct acpi_dmar_pci_path *path;
712 
713 	for_each_drhd_unit(dmaru) {
714 		drhd = container_of(dmaru->hdr,
715 				    struct acpi_dmar_hardware_unit,
716 				    header);
717 
718 		for (scope = (void *)(drhd + 1);
719 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
720 		     scope = ((void *)scope) + scope->length) {
721 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
722 				continue;
723 			if (scope->enumeration_id != device_number)
724 				continue;
725 
726 			path = (void *)(scope + 1);
727 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
728 				dev_name(&adev->dev), dmaru->reg_base_addr,
729 				scope->bus, path->device, path->function);
730 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
731 				if (tmp == NULL) {
732 					dmaru->devices[i].bus = scope->bus;
733 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
734 									    path->function);
735 					rcu_assign_pointer(dmaru->devices[i].dev,
736 							   get_device(&adev->dev));
737 					return;
738 				}
739 			BUG_ON(i >= dmaru->devices_cnt);
740 		}
741 	}
742 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
743 		device_number, dev_name(&adev->dev));
744 }
745 
746 static int __init dmar_acpi_dev_scope_init(void)
747 {
748 	struct acpi_dmar_andd *andd;
749 
750 	if (dmar_tbl == NULL)
751 		return -ENODEV;
752 
753 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
754 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
755 	     andd = ((void *)andd) + andd->header.length) {
756 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
757 			acpi_handle h;
758 			struct acpi_device *adev;
759 
760 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
761 							  andd->device_name,
762 							  &h))) {
763 				pr_err("Failed to find handle for ACPI object %s\n",
764 				       andd->device_name);
765 				continue;
766 			}
767 			if (acpi_bus_get_device(h, &adev)) {
768 				pr_err("Failed to get device for ACPI object %s\n",
769 				       andd->device_name);
770 				continue;
771 			}
772 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
773 		}
774 	}
775 	return 0;
776 }
777 
778 int __init dmar_dev_scope_init(void)
779 {
780 	struct pci_dev *dev = NULL;
781 	struct dmar_pci_notify_info *info;
782 
783 	if (dmar_dev_scope_status != 1)
784 		return dmar_dev_scope_status;
785 
786 	if (list_empty(&dmar_drhd_units)) {
787 		dmar_dev_scope_status = -ENODEV;
788 	} else {
789 		dmar_dev_scope_status = 0;
790 
791 		dmar_acpi_dev_scope_init();
792 
793 		for_each_pci_dev(dev) {
794 			if (dev->is_virtfn)
795 				continue;
796 
797 			info = dmar_alloc_pci_notify_info(dev,
798 					BUS_NOTIFY_ADD_DEVICE);
799 			if (!info) {
800 				return dmar_dev_scope_status;
801 			} else {
802 				dmar_pci_bus_add_dev(info);
803 				dmar_free_pci_notify_info(info);
804 			}
805 		}
806 	}
807 
808 	return dmar_dev_scope_status;
809 }
810 
811 void __init dmar_register_bus_notifier(void)
812 {
813 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
814 }
815 
816 
817 int __init dmar_table_init(void)
818 {
819 	static int dmar_table_initialized;
820 	int ret;
821 
822 	if (dmar_table_initialized == 0) {
823 		ret = parse_dmar_table();
824 		if (ret < 0) {
825 			if (ret != -ENODEV)
826 				pr_info("Parse DMAR table failure.\n");
827 		} else  if (list_empty(&dmar_drhd_units)) {
828 			pr_info("No DMAR devices found\n");
829 			ret = -ENODEV;
830 		}
831 
832 		if (ret < 0)
833 			dmar_table_initialized = ret;
834 		else
835 			dmar_table_initialized = 1;
836 	}
837 
838 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
839 }
840 
841 static void warn_invalid_dmar(u64 addr, const char *message)
842 {
843 	pr_warn_once(FW_BUG
844 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
845 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
846 		addr, message,
847 		dmi_get_system_info(DMI_BIOS_VENDOR),
848 		dmi_get_system_info(DMI_BIOS_VERSION),
849 		dmi_get_system_info(DMI_PRODUCT_VERSION));
850 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
851 }
852 
853 static int __ref
854 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
855 {
856 	struct acpi_dmar_hardware_unit *drhd;
857 	void __iomem *addr;
858 	u64 cap, ecap;
859 
860 	drhd = (void *)entry;
861 	if (!drhd->address) {
862 		warn_invalid_dmar(0, "");
863 		return -EINVAL;
864 	}
865 
866 	if (arg)
867 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
868 	else
869 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
870 	if (!addr) {
871 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
872 		return -EINVAL;
873 	}
874 
875 	cap = dmar_readq(addr + DMAR_CAP_REG);
876 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
877 
878 	if (arg)
879 		iounmap(addr);
880 	else
881 		early_iounmap(addr, VTD_PAGE_SIZE);
882 
883 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
884 		warn_invalid_dmar(drhd->address, " returns all ones");
885 		return -EINVAL;
886 	}
887 
888 	return 0;
889 }
890 
891 int __init detect_intel_iommu(void)
892 {
893 	int ret;
894 	struct dmar_res_callback validate_drhd_cb = {
895 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
896 		.ignore_unhandled = true,
897 	};
898 
899 	down_write(&dmar_global_lock);
900 	ret = dmar_table_detect();
901 	if (!ret)
902 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
903 					   &validate_drhd_cb);
904 	if (!ret && !no_iommu && !iommu_detected &&
905 	    (!dmar_disabled || dmar_platform_optin())) {
906 		iommu_detected = 1;
907 		/* Make sure ACS will be enabled */
908 		pci_request_acs();
909 	}
910 
911 #ifdef CONFIG_X86
912 	if (!ret) {
913 		x86_init.iommu.iommu_init = intel_iommu_init;
914 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
915 	}
916 
917 #endif
918 
919 	if (dmar_tbl) {
920 		acpi_put_table(dmar_tbl);
921 		dmar_tbl = NULL;
922 	}
923 	up_write(&dmar_global_lock);
924 
925 	return ret ? ret : 1;
926 }
927 
928 static void unmap_iommu(struct intel_iommu *iommu)
929 {
930 	iounmap(iommu->reg);
931 	release_mem_region(iommu->reg_phys, iommu->reg_size);
932 }
933 
934 /**
935  * map_iommu: map the iommu's registers
936  * @iommu: the iommu to map
937  * @phys_addr: the physical address of the base resgister
938  *
939  * Memory map the iommu's registers.  Start w/ a single page, and
940  * possibly expand if that turns out to be insufficent.
941  */
942 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
943 {
944 	int map_size, err=0;
945 
946 	iommu->reg_phys = phys_addr;
947 	iommu->reg_size = VTD_PAGE_SIZE;
948 
949 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
950 		pr_err("Can't reserve memory\n");
951 		err = -EBUSY;
952 		goto out;
953 	}
954 
955 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
956 	if (!iommu->reg) {
957 		pr_err("Can't map the region\n");
958 		err = -ENOMEM;
959 		goto release;
960 	}
961 
962 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
963 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
964 
965 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
966 		err = -EINVAL;
967 		warn_invalid_dmar(phys_addr, " returns all ones");
968 		goto unmap;
969 	}
970 	iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
971 
972 	/* the registers might be more than one page */
973 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
974 			 cap_max_fault_reg_offset(iommu->cap));
975 	map_size = VTD_PAGE_ALIGN(map_size);
976 	if (map_size > iommu->reg_size) {
977 		iounmap(iommu->reg);
978 		release_mem_region(iommu->reg_phys, iommu->reg_size);
979 		iommu->reg_size = map_size;
980 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
981 					iommu->name)) {
982 			pr_err("Can't reserve memory\n");
983 			err = -EBUSY;
984 			goto out;
985 		}
986 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
987 		if (!iommu->reg) {
988 			pr_err("Can't map the region\n");
989 			err = -ENOMEM;
990 			goto release;
991 		}
992 	}
993 	err = 0;
994 	goto out;
995 
996 unmap:
997 	iounmap(iommu->reg);
998 release:
999 	release_mem_region(iommu->reg_phys, iommu->reg_size);
1000 out:
1001 	return err;
1002 }
1003 
1004 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1005 {
1006 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1007 					    DMAR_UNITS_SUPPORTED);
1008 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1009 		iommu->seq_id = -1;
1010 	} else {
1011 		set_bit(iommu->seq_id, dmar_seq_ids);
1012 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1013 	}
1014 
1015 	return iommu->seq_id;
1016 }
1017 
1018 static void dmar_free_seq_id(struct intel_iommu *iommu)
1019 {
1020 	if (iommu->seq_id >= 0) {
1021 		clear_bit(iommu->seq_id, dmar_seq_ids);
1022 		iommu->seq_id = -1;
1023 	}
1024 }
1025 
1026 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1027 {
1028 	struct intel_iommu *iommu;
1029 	u32 ver, sts;
1030 	int agaw = 0;
1031 	int msagaw = 0;
1032 	int err;
1033 
1034 	if (!drhd->reg_base_addr) {
1035 		warn_invalid_dmar(0, "");
1036 		return -EINVAL;
1037 	}
1038 
1039 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1040 	if (!iommu)
1041 		return -ENOMEM;
1042 
1043 	if (dmar_alloc_seq_id(iommu) < 0) {
1044 		pr_err("Failed to allocate seq_id\n");
1045 		err = -ENOSPC;
1046 		goto error;
1047 	}
1048 
1049 	err = map_iommu(iommu, drhd->reg_base_addr);
1050 	if (err) {
1051 		pr_err("Failed to map %s\n", iommu->name);
1052 		goto error_free_seq_id;
1053 	}
1054 
1055 	err = -EINVAL;
1056 	agaw = iommu_calculate_agaw(iommu);
1057 	if (agaw < 0) {
1058 		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1059 			iommu->seq_id);
1060 		goto err_unmap;
1061 	}
1062 	msagaw = iommu_calculate_max_sagaw(iommu);
1063 	if (msagaw < 0) {
1064 		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1065 			iommu->seq_id);
1066 		goto err_unmap;
1067 	}
1068 	iommu->agaw = agaw;
1069 	iommu->msagaw = msagaw;
1070 	iommu->segment = drhd->segment;
1071 
1072 	iommu->node = NUMA_NO_NODE;
1073 
1074 	ver = readl(iommu->reg + DMAR_VER_REG);
1075 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1076 		iommu->name,
1077 		(unsigned long long)drhd->reg_base_addr,
1078 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1079 		(unsigned long long)iommu->cap,
1080 		(unsigned long long)iommu->ecap);
1081 
1082 	/* Reflect status in gcmd */
1083 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1084 	if (sts & DMA_GSTS_IRES)
1085 		iommu->gcmd |= DMA_GCMD_IRE;
1086 	if (sts & DMA_GSTS_TES)
1087 		iommu->gcmd |= DMA_GCMD_TE;
1088 	if (sts & DMA_GSTS_QIES)
1089 		iommu->gcmd |= DMA_GCMD_QIE;
1090 
1091 	raw_spin_lock_init(&iommu->register_lock);
1092 
1093 	if (intel_iommu_enabled) {
1094 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1095 					     intel_iommu_groups,
1096 					     "%s", iommu->name);
1097 		if (err)
1098 			goto err_unmap;
1099 
1100 		iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
1101 
1102 		err = iommu_device_register(&iommu->iommu);
1103 		if (err)
1104 			goto err_unmap;
1105 	}
1106 
1107 	drhd->iommu = iommu;
1108 	iommu->drhd = drhd;
1109 
1110 	return 0;
1111 
1112 err_unmap:
1113 	unmap_iommu(iommu);
1114 error_free_seq_id:
1115 	dmar_free_seq_id(iommu);
1116 error:
1117 	kfree(iommu);
1118 	return err;
1119 }
1120 
1121 static void free_iommu(struct intel_iommu *iommu)
1122 {
1123 	if (intel_iommu_enabled) {
1124 		iommu_device_unregister(&iommu->iommu);
1125 		iommu_device_sysfs_remove(&iommu->iommu);
1126 	}
1127 
1128 	if (iommu->irq) {
1129 		if (iommu->pr_irq) {
1130 			free_irq(iommu->pr_irq, iommu);
1131 			dmar_free_hwirq(iommu->pr_irq);
1132 			iommu->pr_irq = 0;
1133 		}
1134 		free_irq(iommu->irq, iommu);
1135 		dmar_free_hwirq(iommu->irq);
1136 		iommu->irq = 0;
1137 	}
1138 
1139 	if (iommu->qi) {
1140 		free_page((unsigned long)iommu->qi->desc);
1141 		kfree(iommu->qi->desc_status);
1142 		kfree(iommu->qi);
1143 	}
1144 
1145 	if (iommu->reg)
1146 		unmap_iommu(iommu);
1147 
1148 	dmar_free_seq_id(iommu);
1149 	kfree(iommu);
1150 }
1151 
1152 /*
1153  * Reclaim all the submitted descriptors which have completed its work.
1154  */
1155 static inline void reclaim_free_desc(struct q_inval *qi)
1156 {
1157 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1158 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1159 		qi->desc_status[qi->free_tail] = QI_FREE;
1160 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1161 		qi->free_cnt++;
1162 	}
1163 }
1164 
1165 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1166 {
1167 	u32 fault;
1168 	int head, tail;
1169 	struct q_inval *qi = iommu->qi;
1170 	int shift = qi_shift(iommu);
1171 
1172 	if (qi->desc_status[wait_index] == QI_ABORT)
1173 		return -EAGAIN;
1174 
1175 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1176 
1177 	/*
1178 	 * If IQE happens, the head points to the descriptor associated
1179 	 * with the error. No new descriptors are fetched until the IQE
1180 	 * is cleared.
1181 	 */
1182 	if (fault & DMA_FSTS_IQE) {
1183 		head = readl(iommu->reg + DMAR_IQH_REG);
1184 		if ((head >> shift) == index) {
1185 			struct qi_desc *desc = qi->desc + head;
1186 
1187 			/*
1188 			 * desc->qw2 and desc->qw3 are either reserved or
1189 			 * used by software as private data. We won't print
1190 			 * out these two qw's for security consideration.
1191 			 */
1192 			pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
1193 			       (unsigned long long)desc->qw0,
1194 			       (unsigned long long)desc->qw1);
1195 			memcpy(desc, qi->desc + (wait_index << shift),
1196 			       1 << shift);
1197 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1198 			return -EINVAL;
1199 		}
1200 	}
1201 
1202 	/*
1203 	 * If ITE happens, all pending wait_desc commands are aborted.
1204 	 * No new descriptors are fetched until the ITE is cleared.
1205 	 */
1206 	if (fault & DMA_FSTS_ITE) {
1207 		head = readl(iommu->reg + DMAR_IQH_REG);
1208 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1209 		head |= 1;
1210 		tail = readl(iommu->reg + DMAR_IQT_REG);
1211 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1212 
1213 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1214 
1215 		do {
1216 			if (qi->desc_status[head] == QI_IN_USE)
1217 				qi->desc_status[head] = QI_ABORT;
1218 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1219 		} while (head != tail);
1220 
1221 		if (qi->desc_status[wait_index] == QI_ABORT)
1222 			return -EAGAIN;
1223 	}
1224 
1225 	if (fault & DMA_FSTS_ICE)
1226 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1227 
1228 	return 0;
1229 }
1230 
1231 /*
1232  * Function to submit invalidation descriptors of all types to the queued
1233  * invalidation interface(QI). Multiple descriptors can be submitted at a
1234  * time, a wait descriptor will be appended to each submission to ensure
1235  * hardware has completed the invalidation before return. Wait descriptors
1236  * can be part of the submission but it will not be polled for completion.
1237  */
1238 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1239 		   unsigned int count, unsigned long options)
1240 {
1241 	struct q_inval *qi = iommu->qi;
1242 	struct qi_desc wait_desc;
1243 	int wait_index, index;
1244 	unsigned long flags;
1245 	int offset, shift;
1246 	int rc, i;
1247 
1248 	if (!qi)
1249 		return 0;
1250 
1251 restart:
1252 	rc = 0;
1253 
1254 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1255 	/*
1256 	 * Check if we have enough empty slots in the queue to submit,
1257 	 * the calculation is based on:
1258 	 * # of desc + 1 wait desc + 1 space between head and tail
1259 	 */
1260 	while (qi->free_cnt < count + 2) {
1261 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1262 		cpu_relax();
1263 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1264 	}
1265 
1266 	index = qi->free_head;
1267 	wait_index = (index + count) % QI_LENGTH;
1268 	shift = qi_shift(iommu);
1269 
1270 	for (i = 0; i < count; i++) {
1271 		offset = ((index + i) % QI_LENGTH) << shift;
1272 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1273 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1274 	}
1275 	qi->desc_status[wait_index] = QI_IN_USE;
1276 
1277 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1278 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1279 	if (options & QI_OPT_WAIT_DRAIN)
1280 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1281 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1282 	wait_desc.qw2 = 0;
1283 	wait_desc.qw3 = 0;
1284 
1285 	offset = wait_index << shift;
1286 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1287 
1288 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1289 	qi->free_cnt -= count + 1;
1290 
1291 	/*
1292 	 * update the HW tail register indicating the presence of
1293 	 * new descriptors.
1294 	 */
1295 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1296 
1297 	while (qi->desc_status[wait_index] != QI_DONE) {
1298 		/*
1299 		 * We will leave the interrupts disabled, to prevent interrupt
1300 		 * context to queue another cmd while a cmd is already submitted
1301 		 * and waiting for completion on this cpu. This is to avoid
1302 		 * a deadlock where the interrupt context can wait indefinitely
1303 		 * for free slots in the queue.
1304 		 */
1305 		rc = qi_check_fault(iommu, index, wait_index);
1306 		if (rc)
1307 			break;
1308 
1309 		raw_spin_unlock(&qi->q_lock);
1310 		cpu_relax();
1311 		raw_spin_lock(&qi->q_lock);
1312 	}
1313 
1314 	for (i = 0; i < count; i++)
1315 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1316 
1317 	reclaim_free_desc(qi);
1318 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1319 
1320 	if (rc == -EAGAIN)
1321 		goto restart;
1322 
1323 	return rc;
1324 }
1325 
1326 /*
1327  * Flush the global interrupt entry cache.
1328  */
1329 void qi_global_iec(struct intel_iommu *iommu)
1330 {
1331 	struct qi_desc desc;
1332 
1333 	desc.qw0 = QI_IEC_TYPE;
1334 	desc.qw1 = 0;
1335 	desc.qw2 = 0;
1336 	desc.qw3 = 0;
1337 
1338 	/* should never fail */
1339 	qi_submit_sync(iommu, &desc, 1, 0);
1340 }
1341 
1342 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1343 		      u64 type)
1344 {
1345 	struct qi_desc desc;
1346 
1347 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1348 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1349 	desc.qw1 = 0;
1350 	desc.qw2 = 0;
1351 	desc.qw3 = 0;
1352 
1353 	qi_submit_sync(iommu, &desc, 1, 0);
1354 }
1355 
1356 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1357 		    unsigned int size_order, u64 type)
1358 {
1359 	u8 dw = 0, dr = 0;
1360 
1361 	struct qi_desc desc;
1362 	int ih = 0;
1363 
1364 	if (cap_write_drain(iommu->cap))
1365 		dw = 1;
1366 
1367 	if (cap_read_drain(iommu->cap))
1368 		dr = 1;
1369 
1370 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1371 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1372 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1373 		| QI_IOTLB_AM(size_order);
1374 	desc.qw2 = 0;
1375 	desc.qw3 = 0;
1376 
1377 	qi_submit_sync(iommu, &desc, 1, 0);
1378 }
1379 
1380 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1381 			u16 qdep, u64 addr, unsigned mask)
1382 {
1383 	struct qi_desc desc;
1384 
1385 	if (mask) {
1386 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1387 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1388 	} else
1389 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1390 
1391 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1392 		qdep = 0;
1393 
1394 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1395 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1396 	desc.qw2 = 0;
1397 	desc.qw3 = 0;
1398 
1399 	qi_submit_sync(iommu, &desc, 1, 0);
1400 }
1401 
1402 /* PASID-based IOTLB invalidation */
1403 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1404 		     unsigned long npages, bool ih)
1405 {
1406 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1407 
1408 	/*
1409 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1410 	 * a positive value for Page-selective-within-PASID invalidation.
1411 	 * 0 is not a valid input.
1412 	 */
1413 	if (WARN_ON(!npages)) {
1414 		pr_err("Invalid input npages = %ld\n", npages);
1415 		return;
1416 	}
1417 
1418 	if (npages == -1) {
1419 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1420 				QI_EIOTLB_DID(did) |
1421 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1422 				QI_EIOTLB_TYPE;
1423 		desc.qw1 = 0;
1424 	} else {
1425 		int mask = ilog2(__roundup_pow_of_two(npages));
1426 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1427 
1428 		if (WARN_ON_ONCE(!ALIGN(addr, align)))
1429 			addr &= ~(align - 1);
1430 
1431 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1432 				QI_EIOTLB_DID(did) |
1433 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1434 				QI_EIOTLB_TYPE;
1435 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1436 				QI_EIOTLB_IH(ih) |
1437 				QI_EIOTLB_AM(mask);
1438 	}
1439 
1440 	qi_submit_sync(iommu, &desc, 1, 0);
1441 }
1442 
1443 /* PASID-based device IOTLB Invalidate */
1444 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1445 			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1446 {
1447 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1448 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1449 
1450 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1451 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1452 		QI_DEV_IOTLB_PFSID(pfsid);
1453 
1454 	/*
1455 	 * If S bit is 0, we only flush a single page. If S bit is set,
1456 	 * The least significant zero bit indicates the invalidation address
1457 	 * range. VT-d spec 6.5.2.6.
1458 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1459 	 * size order = 0 is PAGE_SIZE 4KB
1460 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1461 	 * ECAP.
1462 	 */
1463 	if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
1464 		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1465 				    addr, size_order);
1466 
1467 	/* Take page address */
1468 	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1469 
1470 	if (size_order) {
1471 		/*
1472 		 * Existing 0s in address below size_order may be the least
1473 		 * significant bit, we must set them to 1s to avoid having
1474 		 * smaller size than desired.
1475 		 */
1476 		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1477 					VTD_PAGE_SHIFT);
1478 		/* Clear size_order bit to indicate size */
1479 		desc.qw1 &= ~mask;
1480 		/* Set the S bit to indicate flushing more than 1 page */
1481 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1482 	}
1483 
1484 	qi_submit_sync(iommu, &desc, 1, 0);
1485 }
1486 
1487 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1488 			  u64 granu, u32 pasid)
1489 {
1490 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1491 
1492 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1493 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1494 	qi_submit_sync(iommu, &desc, 1, 0);
1495 }
1496 
1497 /*
1498  * Disable Queued Invalidation interface.
1499  */
1500 void dmar_disable_qi(struct intel_iommu *iommu)
1501 {
1502 	unsigned long flags;
1503 	u32 sts;
1504 	cycles_t start_time = get_cycles();
1505 
1506 	if (!ecap_qis(iommu->ecap))
1507 		return;
1508 
1509 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1510 
1511 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1512 	if (!(sts & DMA_GSTS_QIES))
1513 		goto end;
1514 
1515 	/*
1516 	 * Give a chance to HW to complete the pending invalidation requests.
1517 	 */
1518 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1519 		readl(iommu->reg + DMAR_IQH_REG)) &&
1520 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1521 		cpu_relax();
1522 
1523 	iommu->gcmd &= ~DMA_GCMD_QIE;
1524 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1525 
1526 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1527 		      !(sts & DMA_GSTS_QIES), sts);
1528 end:
1529 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1530 }
1531 
1532 /*
1533  * Enable queued invalidation.
1534  */
1535 static void __dmar_enable_qi(struct intel_iommu *iommu)
1536 {
1537 	u32 sts;
1538 	unsigned long flags;
1539 	struct q_inval *qi = iommu->qi;
1540 	u64 val = virt_to_phys(qi->desc);
1541 
1542 	qi->free_head = qi->free_tail = 0;
1543 	qi->free_cnt = QI_LENGTH;
1544 
1545 	/*
1546 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1547 	 * is present.
1548 	 */
1549 	if (ecap_smts(iommu->ecap))
1550 		val |= (1 << 11) | 1;
1551 
1552 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1553 
1554 	/* write zero to the tail reg */
1555 	writel(0, iommu->reg + DMAR_IQT_REG);
1556 
1557 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1558 
1559 	iommu->gcmd |= DMA_GCMD_QIE;
1560 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1561 
1562 	/* Make sure hardware complete it */
1563 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1564 
1565 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1566 }
1567 
1568 /*
1569  * Enable Queued Invalidation interface. This is a must to support
1570  * interrupt-remapping. Also used by DMA-remapping, which replaces
1571  * register based IOTLB invalidation.
1572  */
1573 int dmar_enable_qi(struct intel_iommu *iommu)
1574 {
1575 	struct q_inval *qi;
1576 	struct page *desc_page;
1577 
1578 	if (!ecap_qis(iommu->ecap))
1579 		return -ENOENT;
1580 
1581 	/*
1582 	 * queued invalidation is already setup and enabled.
1583 	 */
1584 	if (iommu->qi)
1585 		return 0;
1586 
1587 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1588 	if (!iommu->qi)
1589 		return -ENOMEM;
1590 
1591 	qi = iommu->qi;
1592 
1593 	/*
1594 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1595 	 * if the remapping hardware supports scalable mode translation.
1596 	 */
1597 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1598 				     !!ecap_smts(iommu->ecap));
1599 	if (!desc_page) {
1600 		kfree(qi);
1601 		iommu->qi = NULL;
1602 		return -ENOMEM;
1603 	}
1604 
1605 	qi->desc = page_address(desc_page);
1606 
1607 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1608 	if (!qi->desc_status) {
1609 		free_page((unsigned long) qi->desc);
1610 		kfree(qi);
1611 		iommu->qi = NULL;
1612 		return -ENOMEM;
1613 	}
1614 
1615 	raw_spin_lock_init(&qi->q_lock);
1616 
1617 	__dmar_enable_qi(iommu);
1618 
1619 	return 0;
1620 }
1621 
1622 /* iommu interrupt handling. Most stuff are MSI-like. */
1623 
1624 enum faulttype {
1625 	DMA_REMAP,
1626 	INTR_REMAP,
1627 	UNKNOWN,
1628 };
1629 
1630 static const char *dma_remap_fault_reasons[] =
1631 {
1632 	"Software",
1633 	"Present bit in root entry is clear",
1634 	"Present bit in context entry is clear",
1635 	"Invalid context entry",
1636 	"Access beyond MGAW",
1637 	"PTE Write access is not set",
1638 	"PTE Read access is not set",
1639 	"Next page table ptr is invalid",
1640 	"Root table address invalid",
1641 	"Context table ptr is invalid",
1642 	"non-zero reserved fields in RTP",
1643 	"non-zero reserved fields in CTP",
1644 	"non-zero reserved fields in PTE",
1645 	"PCE for translation request specifies blocking",
1646 };
1647 
1648 static const char * const dma_remap_sm_fault_reasons[] = {
1649 	"SM: Invalid Root Table Address",
1650 	"SM: TTM 0 for request with PASID",
1651 	"SM: TTM 0 for page group request",
1652 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1653 	"SM: Error attempting to access Root Entry",
1654 	"SM: Present bit in Root Entry is clear",
1655 	"SM: Non-zero reserved field set in Root Entry",
1656 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1657 	"SM: Error attempting to access Context Entry",
1658 	"SM: Present bit in Context Entry is clear",
1659 	"SM: Non-zero reserved field set in the Context Entry",
1660 	"SM: Invalid Context Entry",
1661 	"SM: DTE field in Context Entry is clear",
1662 	"SM: PASID Enable field in Context Entry is clear",
1663 	"SM: PASID is larger than the max in Context Entry",
1664 	"SM: PRE field in Context-Entry is clear",
1665 	"SM: RID_PASID field error in Context-Entry",
1666 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1667 	"SM: Error attempting to access the PASID Directory Entry",
1668 	"SM: Present bit in Directory Entry is clear",
1669 	"SM: Non-zero reserved field set in PASID Directory Entry",
1670 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1671 	"SM: Error attempting to access PASID Table Entry",
1672 	"SM: Present bit in PASID Table Entry is clear",
1673 	"SM: Non-zero reserved field set in PASID Table Entry",
1674 	"SM: Invalid Scalable-Mode PASID Table Entry",
1675 	"SM: ERE field is clear in PASID Table Entry",
1676 	"SM: SRE field is clear in PASID Table Entry",
1677 	"Unknown", "Unknown",/* 0x5E-0x5F */
1678 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1679 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1680 	"SM: Error attempting to access first-level paging entry",
1681 	"SM: Present bit in first-level paging entry is clear",
1682 	"SM: Non-zero reserved field set in first-level paging entry",
1683 	"SM: Error attempting to access FL-PML4 entry",
1684 	"SM: First-level entry address beyond MGAW in Nested translation",
1685 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1686 	"SM: Read permission error in first-level paging entry in Nested translation",
1687 	"SM: Write permission error in first-level paging entry in Nested translation",
1688 	"SM: Error attempting to access second-level paging entry",
1689 	"SM: Read/Write permission error in second-level paging entry",
1690 	"SM: Non-zero reserved field set in second-level paging entry",
1691 	"SM: Invalid second-level page table pointer",
1692 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1693 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1694 	"SM: Address in first-level translation is not canonical",
1695 	"SM: U/S set 0 for first-level translation with user privilege",
1696 	"SM: No execute permission for request with PASID and ER=1",
1697 	"SM: Address beyond the DMA hardware max",
1698 	"SM: Second-level entry address beyond the max",
1699 	"SM: No write permission for Write/AtomicOp request",
1700 	"SM: No read permission for Read/AtomicOp request",
1701 	"SM: Invalid address-interrupt address",
1702 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1703 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1704 };
1705 
1706 static const char *irq_remap_fault_reasons[] =
1707 {
1708 	"Detected reserved fields in the decoded interrupt-remapped request",
1709 	"Interrupt index exceeded the interrupt-remapping table size",
1710 	"Present field in the IRTE entry is clear",
1711 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1712 	"Detected reserved fields in the IRTE entry",
1713 	"Blocked a compatibility format interrupt request",
1714 	"Blocked an interrupt request due to source-id verification failure",
1715 };
1716 
1717 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1718 {
1719 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1720 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1721 		*fault_type = INTR_REMAP;
1722 		return irq_remap_fault_reasons[fault_reason - 0x20];
1723 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1724 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1725 		*fault_type = DMA_REMAP;
1726 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1727 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1728 		*fault_type = DMA_REMAP;
1729 		return dma_remap_fault_reasons[fault_reason];
1730 	} else {
1731 		*fault_type = UNKNOWN;
1732 		return "Unknown";
1733 	}
1734 }
1735 
1736 
1737 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1738 {
1739 	if (iommu->irq == irq)
1740 		return DMAR_FECTL_REG;
1741 	else if (iommu->pr_irq == irq)
1742 		return DMAR_PECTL_REG;
1743 	else
1744 		BUG();
1745 }
1746 
1747 void dmar_msi_unmask(struct irq_data *data)
1748 {
1749 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1750 	int reg = dmar_msi_reg(iommu, data->irq);
1751 	unsigned long flag;
1752 
1753 	/* unmask it */
1754 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1755 	writel(0, iommu->reg + reg);
1756 	/* Read a reg to force flush the post write */
1757 	readl(iommu->reg + reg);
1758 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1759 }
1760 
1761 void dmar_msi_mask(struct irq_data *data)
1762 {
1763 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1764 	int reg = dmar_msi_reg(iommu, data->irq);
1765 	unsigned long flag;
1766 
1767 	/* mask it */
1768 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1769 	writel(DMA_FECTL_IM, iommu->reg + reg);
1770 	/* Read a reg to force flush the post write */
1771 	readl(iommu->reg + reg);
1772 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1773 }
1774 
1775 void dmar_msi_write(int irq, struct msi_msg *msg)
1776 {
1777 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1778 	int reg = dmar_msi_reg(iommu, irq);
1779 	unsigned long flag;
1780 
1781 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1782 	writel(msg->data, iommu->reg + reg + 4);
1783 	writel(msg->address_lo, iommu->reg + reg + 8);
1784 	writel(msg->address_hi, iommu->reg + reg + 12);
1785 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1786 }
1787 
1788 void dmar_msi_read(int irq, struct msi_msg *msg)
1789 {
1790 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1791 	int reg = dmar_msi_reg(iommu, irq);
1792 	unsigned long flag;
1793 
1794 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1795 	msg->data = readl(iommu->reg + reg + 4);
1796 	msg->address_lo = readl(iommu->reg + reg + 8);
1797 	msg->address_hi = readl(iommu->reg + reg + 12);
1798 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1799 }
1800 
1801 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1802 		u8 fault_reason, u32 pasid, u16 source_id,
1803 		unsigned long long addr)
1804 {
1805 	const char *reason;
1806 	int fault_type;
1807 
1808 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1809 
1810 	if (fault_type == INTR_REMAP)
1811 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
1812 			source_id >> 8, PCI_SLOT(source_id & 0xFF),
1813 			PCI_FUNC(source_id & 0xFF), addr >> 48,
1814 			fault_reason, reason);
1815 	else
1816 		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
1817 		       type ? "DMA Read" : "DMA Write",
1818 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1819 		       PCI_FUNC(source_id & 0xFF), pasid, addr,
1820 		       fault_reason, reason);
1821 	return 0;
1822 }
1823 
1824 #define PRIMARY_FAULT_REG_LEN (16)
1825 irqreturn_t dmar_fault(int irq, void *dev_id)
1826 {
1827 	struct intel_iommu *iommu = dev_id;
1828 	int reg, fault_index;
1829 	u32 fault_status;
1830 	unsigned long flag;
1831 	static DEFINE_RATELIMIT_STATE(rs,
1832 				      DEFAULT_RATELIMIT_INTERVAL,
1833 				      DEFAULT_RATELIMIT_BURST);
1834 
1835 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1836 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1837 	if (fault_status && __ratelimit(&rs))
1838 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1839 
1840 	/* TBD: ignore advanced fault log currently */
1841 	if (!(fault_status & DMA_FSTS_PPF))
1842 		goto unlock_exit;
1843 
1844 	fault_index = dma_fsts_fault_record_index(fault_status);
1845 	reg = cap_fault_reg_offset(iommu->cap);
1846 	while (1) {
1847 		/* Disable printing, simply clear the fault when ratelimited */
1848 		bool ratelimited = !__ratelimit(&rs);
1849 		u8 fault_reason;
1850 		u16 source_id;
1851 		u64 guest_addr;
1852 		u32 pasid;
1853 		int type;
1854 		u32 data;
1855 		bool pasid_present;
1856 
1857 		/* highest 32 bits */
1858 		data = readl(iommu->reg + reg +
1859 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
1860 		if (!(data & DMA_FRCD_F))
1861 			break;
1862 
1863 		if (!ratelimited) {
1864 			fault_reason = dma_frcd_fault_reason(data);
1865 			type = dma_frcd_type(data);
1866 
1867 			pasid = dma_frcd_pasid_value(data);
1868 			data = readl(iommu->reg + reg +
1869 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
1870 			source_id = dma_frcd_source_id(data);
1871 
1872 			pasid_present = dma_frcd_pasid_present(data);
1873 			guest_addr = dmar_readq(iommu->reg + reg +
1874 					fault_index * PRIMARY_FAULT_REG_LEN);
1875 			guest_addr = dma_frcd_page_addr(guest_addr);
1876 		}
1877 
1878 		/* clear the fault */
1879 		writel(DMA_FRCD_F, iommu->reg + reg +
1880 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
1881 
1882 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1883 
1884 		if (!ratelimited)
1885 			/* Using pasid -1 if pasid is not present */
1886 			dmar_fault_do_one(iommu, type, fault_reason,
1887 					  pasid_present ? pasid : -1,
1888 					  source_id, guest_addr);
1889 
1890 		fault_index++;
1891 		if (fault_index >= cap_num_fault_regs(iommu->cap))
1892 			fault_index = 0;
1893 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1894 	}
1895 
1896 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
1897 	       iommu->reg + DMAR_FSTS_REG);
1898 
1899 unlock_exit:
1900 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1901 	return IRQ_HANDLED;
1902 }
1903 
1904 int dmar_set_interrupt(struct intel_iommu *iommu)
1905 {
1906 	int irq, ret;
1907 
1908 	/*
1909 	 * Check if the fault interrupt is already initialized.
1910 	 */
1911 	if (iommu->irq)
1912 		return 0;
1913 
1914 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
1915 	if (irq > 0) {
1916 		iommu->irq = irq;
1917 	} else {
1918 		pr_err("No free IRQ vectors\n");
1919 		return -EINVAL;
1920 	}
1921 
1922 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1923 	if (ret)
1924 		pr_err("Can't request irq\n");
1925 	return ret;
1926 }
1927 
1928 int __init enable_drhd_fault_handling(void)
1929 {
1930 	struct dmar_drhd_unit *drhd;
1931 	struct intel_iommu *iommu;
1932 
1933 	/*
1934 	 * Enable fault control interrupt.
1935 	 */
1936 	for_each_iommu(iommu, drhd) {
1937 		u32 fault_status;
1938 		int ret = dmar_set_interrupt(iommu);
1939 
1940 		if (ret) {
1941 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1942 			       (unsigned long long)drhd->reg_base_addr, ret);
1943 			return -1;
1944 		}
1945 
1946 		/*
1947 		 * Clear any previous faults.
1948 		 */
1949 		dmar_fault(iommu->irq, iommu);
1950 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1951 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1952 	}
1953 
1954 	return 0;
1955 }
1956 
1957 /*
1958  * Re-enable Queued Invalidation interface.
1959  */
1960 int dmar_reenable_qi(struct intel_iommu *iommu)
1961 {
1962 	if (!ecap_qis(iommu->ecap))
1963 		return -ENOENT;
1964 
1965 	if (!iommu->qi)
1966 		return -ENOENT;
1967 
1968 	/*
1969 	 * First disable queued invalidation.
1970 	 */
1971 	dmar_disable_qi(iommu);
1972 	/*
1973 	 * Then enable queued invalidation again. Since there is no pending
1974 	 * invalidation requests now, it's safe to re-enable queued
1975 	 * invalidation.
1976 	 */
1977 	__dmar_enable_qi(iommu);
1978 
1979 	return 0;
1980 }
1981 
1982 /*
1983  * Check interrupt remapping support in DMAR table description.
1984  */
1985 int __init dmar_ir_support(void)
1986 {
1987 	struct acpi_table_dmar *dmar;
1988 	dmar = (struct acpi_table_dmar *)dmar_tbl;
1989 	if (!dmar)
1990 		return 0;
1991 	return dmar->flags & 0x1;
1992 }
1993 
1994 /* Check whether DMAR units are in use */
1995 static inline bool dmar_in_use(void)
1996 {
1997 	return irq_remapping_enabled || intel_iommu_enabled;
1998 }
1999 
2000 static int __init dmar_free_unused_resources(void)
2001 {
2002 	struct dmar_drhd_unit *dmaru, *dmaru_n;
2003 
2004 	if (dmar_in_use())
2005 		return 0;
2006 
2007 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2008 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2009 
2010 	down_write(&dmar_global_lock);
2011 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2012 		list_del(&dmaru->list);
2013 		dmar_free_drhd(dmaru);
2014 	}
2015 	up_write(&dmar_global_lock);
2016 
2017 	return 0;
2018 }
2019 
2020 late_initcall(dmar_free_unused_resources);
2021 IOMMU_INIT_POST(detect_intel_iommu);
2022 
2023 /*
2024  * DMAR Hotplug Support
2025  * For more details, please refer to Intel(R) Virtualization Technology
2026  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2027  * "Remapping Hardware Unit Hot Plug".
2028  */
2029 static guid_t dmar_hp_guid =
2030 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2031 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2032 
2033 /*
2034  * Currently there's only one revision and BIOS will not check the revision id,
2035  * so use 0 for safety.
2036  */
2037 #define	DMAR_DSM_REV_ID			0
2038 #define	DMAR_DSM_FUNC_DRHD		1
2039 #define	DMAR_DSM_FUNC_ATSR		2
2040 #define	DMAR_DSM_FUNC_RHSA		3
2041 
2042 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2043 {
2044 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2045 }
2046 
2047 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2048 				  dmar_res_handler_t handler, void *arg)
2049 {
2050 	int ret = -ENODEV;
2051 	union acpi_object *obj;
2052 	struct acpi_dmar_header *start;
2053 	struct dmar_res_callback callback;
2054 	static int res_type[] = {
2055 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2056 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2057 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2058 	};
2059 
2060 	if (!dmar_detect_dsm(handle, func))
2061 		return 0;
2062 
2063 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2064 				      func, NULL, ACPI_TYPE_BUFFER);
2065 	if (!obj)
2066 		return -ENODEV;
2067 
2068 	memset(&callback, 0, sizeof(callback));
2069 	callback.cb[res_type[func]] = handler;
2070 	callback.arg[res_type[func]] = arg;
2071 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2072 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2073 
2074 	ACPI_FREE(obj);
2075 
2076 	return ret;
2077 }
2078 
2079 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2080 {
2081 	int ret;
2082 	struct dmar_drhd_unit *dmaru;
2083 
2084 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2085 	if (!dmaru)
2086 		return -ENODEV;
2087 
2088 	ret = dmar_ir_hotplug(dmaru, true);
2089 	if (ret == 0)
2090 		ret = dmar_iommu_hotplug(dmaru, true);
2091 
2092 	return ret;
2093 }
2094 
2095 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2096 {
2097 	int i, ret;
2098 	struct device *dev;
2099 	struct dmar_drhd_unit *dmaru;
2100 
2101 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2102 	if (!dmaru)
2103 		return 0;
2104 
2105 	/*
2106 	 * All PCI devices managed by this unit should have been destroyed.
2107 	 */
2108 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2109 		for_each_active_dev_scope(dmaru->devices,
2110 					  dmaru->devices_cnt, i, dev)
2111 			return -EBUSY;
2112 	}
2113 
2114 	ret = dmar_ir_hotplug(dmaru, false);
2115 	if (ret == 0)
2116 		ret = dmar_iommu_hotplug(dmaru, false);
2117 
2118 	return ret;
2119 }
2120 
2121 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2122 {
2123 	struct dmar_drhd_unit *dmaru;
2124 
2125 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2126 	if (dmaru) {
2127 		list_del_rcu(&dmaru->list);
2128 		synchronize_rcu();
2129 		dmar_free_drhd(dmaru);
2130 	}
2131 
2132 	return 0;
2133 }
2134 
2135 static int dmar_hotplug_insert(acpi_handle handle)
2136 {
2137 	int ret;
2138 	int drhd_count = 0;
2139 
2140 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2141 				     &dmar_validate_one_drhd, (void *)1);
2142 	if (ret)
2143 		goto out;
2144 
2145 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2146 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2147 	if (ret == 0 && drhd_count == 0) {
2148 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2149 		goto out;
2150 	} else if (ret) {
2151 		goto release_drhd;
2152 	}
2153 
2154 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2155 				     &dmar_parse_one_rhsa, NULL);
2156 	if (ret)
2157 		goto release_drhd;
2158 
2159 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2160 				     &dmar_parse_one_atsr, NULL);
2161 	if (ret)
2162 		goto release_atsr;
2163 
2164 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2165 				     &dmar_hp_add_drhd, NULL);
2166 	if (!ret)
2167 		return 0;
2168 
2169 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2170 			       &dmar_hp_remove_drhd, NULL);
2171 release_atsr:
2172 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2173 			       &dmar_release_one_atsr, NULL);
2174 release_drhd:
2175 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2176 			       &dmar_hp_release_drhd, NULL);
2177 out:
2178 	return ret;
2179 }
2180 
2181 static int dmar_hotplug_remove(acpi_handle handle)
2182 {
2183 	int ret;
2184 
2185 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2186 				     &dmar_check_one_atsr, NULL);
2187 	if (ret)
2188 		return ret;
2189 
2190 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2191 				     &dmar_hp_remove_drhd, NULL);
2192 	if (ret == 0) {
2193 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2194 					       &dmar_release_one_atsr, NULL));
2195 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2196 					       &dmar_hp_release_drhd, NULL));
2197 	} else {
2198 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2199 				       &dmar_hp_add_drhd, NULL);
2200 	}
2201 
2202 	return ret;
2203 }
2204 
2205 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2206 				       void *context, void **retval)
2207 {
2208 	acpi_handle *phdl = retval;
2209 
2210 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2211 		*phdl = handle;
2212 		return AE_CTRL_TERMINATE;
2213 	}
2214 
2215 	return AE_OK;
2216 }
2217 
2218 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2219 {
2220 	int ret;
2221 	acpi_handle tmp = NULL;
2222 	acpi_status status;
2223 
2224 	if (!dmar_in_use())
2225 		return 0;
2226 
2227 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2228 		tmp = handle;
2229 	} else {
2230 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2231 					     ACPI_UINT32_MAX,
2232 					     dmar_get_dsm_handle,
2233 					     NULL, NULL, &tmp);
2234 		if (ACPI_FAILURE(status)) {
2235 			pr_warn("Failed to locate _DSM method.\n");
2236 			return -ENXIO;
2237 		}
2238 	}
2239 	if (tmp == NULL)
2240 		return 0;
2241 
2242 	down_write(&dmar_global_lock);
2243 	if (insert)
2244 		ret = dmar_hotplug_insert(tmp);
2245 	else
2246 		ret = dmar_hotplug_remove(tmp);
2247 	up_write(&dmar_global_lock);
2248 
2249 	return ret;
2250 }
2251 
2252 int dmar_device_add(acpi_handle handle)
2253 {
2254 	return dmar_device_hotplug(handle, true);
2255 }
2256 
2257 int dmar_device_remove(acpi_handle handle)
2258 {
2259 	return dmar_device_hotplug(handle, false);
2260 }
2261 
2262 /*
2263  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2264  *
2265  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2266  * the ACPI DMAR table. This means that the platform boot firmware has made
2267  * sure no device can issue DMA outside of RMRR regions.
2268  */
2269 bool dmar_platform_optin(void)
2270 {
2271 	struct acpi_table_dmar *dmar;
2272 	acpi_status status;
2273 	bool ret;
2274 
2275 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2276 				(struct acpi_table_header **)&dmar);
2277 	if (ACPI_FAILURE(status))
2278 		return false;
2279 
2280 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2281 	acpi_put_table((struct acpi_table_header *)dmar);
2282 
2283 	return ret;
2284 }
2285 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2286