xref: /linux/drivers/iommu/intel/dmar.c (revision 5cfe477f6a3f9a4d9b2906d442964f2115b0403f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16 
17 #define pr_fmt(fmt)     "DMAR: " fmt
18 
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <asm/iommu_table.h>
34 #include <trace/events/intel_iommu.h>
35 
36 #include "../irq_remapping.h"
37 #include "perf.h"
38 
39 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
40 struct dmar_res_callback {
41 	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
42 	void			*arg[ACPI_DMAR_TYPE_RESERVED];
43 	bool			ignore_unhandled;
44 	bool			print_entry;
45 };
46 
47 /*
48  * Assumptions:
49  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
50  *    before IO devices managed by that unit.
51  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
52  *    after IO devices managed by that unit.
53  * 3) Hotplug events are rare.
54  *
55  * Locking rules for DMA and interrupt remapping related global data structures:
56  * 1) Use dmar_global_lock in process context
57  * 2) Use RCU in interrupt context
58  */
59 DECLARE_RWSEM(dmar_global_lock);
60 LIST_HEAD(dmar_drhd_units);
61 
62 struct acpi_table_header * __initdata dmar_tbl;
63 static int dmar_dev_scope_status = 1;
64 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
65 
66 static int alloc_iommu(struct dmar_drhd_unit *drhd);
67 static void free_iommu(struct intel_iommu *iommu);
68 
69 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
70 {
71 	/*
72 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
73 	 * the very end.
74 	 */
75 	if (drhd->include_all)
76 		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
77 	else
78 		list_add_rcu(&drhd->list, &dmar_drhd_units);
79 }
80 
81 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
82 {
83 	struct acpi_dmar_device_scope *scope;
84 
85 	*cnt = 0;
86 	while (start < end) {
87 		scope = start;
88 		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
89 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
90 		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
91 			(*cnt)++;
92 		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
93 			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
94 			pr_warn("Unsupported device scope\n");
95 		}
96 		start += scope->length;
97 	}
98 	if (*cnt == 0)
99 		return NULL;
100 
101 	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
102 }
103 
104 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
105 {
106 	int i;
107 	struct device *tmp_dev;
108 
109 	if (*devices && *cnt) {
110 		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
111 			put_device(tmp_dev);
112 		kfree(*devices);
113 	}
114 
115 	*devices = NULL;
116 	*cnt = 0;
117 }
118 
119 /* Optimize out kzalloc()/kfree() for normal cases */
120 static char dmar_pci_notify_info_buf[64];
121 
122 static struct dmar_pci_notify_info *
123 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
124 {
125 	int level = 0;
126 	size_t size;
127 	struct pci_dev *tmp;
128 	struct dmar_pci_notify_info *info;
129 
130 	BUG_ON(dev->is_virtfn);
131 
132 	/*
133 	 * Ignore devices that have a domain number higher than what can
134 	 * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
135 	 */
136 	if (pci_domain_nr(dev->bus) > U16_MAX)
137 		return NULL;
138 
139 	/* Only generate path[] for device addition event */
140 	if (event == BUS_NOTIFY_ADD_DEVICE)
141 		for (tmp = dev; tmp; tmp = tmp->bus->self)
142 			level++;
143 
144 	size = struct_size(info, path, level);
145 	if (size <= sizeof(dmar_pci_notify_info_buf)) {
146 		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
147 	} else {
148 		info = kzalloc(size, GFP_KERNEL);
149 		if (!info) {
150 			if (dmar_dev_scope_status == 0)
151 				dmar_dev_scope_status = -ENOMEM;
152 			return NULL;
153 		}
154 	}
155 
156 	info->event = event;
157 	info->dev = dev;
158 	info->seg = pci_domain_nr(dev->bus);
159 	info->level = level;
160 	if (event == BUS_NOTIFY_ADD_DEVICE) {
161 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
162 			level--;
163 			info->path[level].bus = tmp->bus->number;
164 			info->path[level].device = PCI_SLOT(tmp->devfn);
165 			info->path[level].function = PCI_FUNC(tmp->devfn);
166 			if (pci_is_root_bus(tmp->bus))
167 				info->bus = tmp->bus->number;
168 		}
169 	}
170 
171 	return info;
172 }
173 
174 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
175 {
176 	if ((void *)info != dmar_pci_notify_info_buf)
177 		kfree(info);
178 }
179 
180 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
181 				struct acpi_dmar_pci_path *path, int count)
182 {
183 	int i;
184 
185 	if (info->bus != bus)
186 		goto fallback;
187 	if (info->level != count)
188 		goto fallback;
189 
190 	for (i = 0; i < count; i++) {
191 		if (path[i].device != info->path[i].device ||
192 		    path[i].function != info->path[i].function)
193 			goto fallback;
194 	}
195 
196 	return true;
197 
198 fallback:
199 
200 	if (count != 1)
201 		return false;
202 
203 	i = info->level - 1;
204 	if (bus              == info->path[i].bus &&
205 	    path[0].device   == info->path[i].device &&
206 	    path[0].function == info->path[i].function) {
207 		pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
208 			bus, path[0].device, path[0].function);
209 		return true;
210 	}
211 
212 	return false;
213 }
214 
215 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
216 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
217 			  void *start, void*end, u16 segment,
218 			  struct dmar_dev_scope *devices,
219 			  int devices_cnt)
220 {
221 	int i, level;
222 	struct device *tmp, *dev = &info->dev->dev;
223 	struct acpi_dmar_device_scope *scope;
224 	struct acpi_dmar_pci_path *path;
225 
226 	if (segment != info->seg)
227 		return 0;
228 
229 	for (; start < end; start += scope->length) {
230 		scope = start;
231 		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
232 		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
233 			continue;
234 
235 		path = (struct acpi_dmar_pci_path *)(scope + 1);
236 		level = (scope->length - sizeof(*scope)) / sizeof(*path);
237 		if (!dmar_match_pci_path(info, scope->bus, path, level))
238 			continue;
239 
240 		/*
241 		 * We expect devices with endpoint scope to have normal PCI
242 		 * headers, and devices with bridge scope to have bridge PCI
243 		 * headers.  However PCI NTB devices may be listed in the
244 		 * DMAR table with bridge scope, even though they have a
245 		 * normal PCI header.  NTB devices are identified by class
246 		 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
247 		 * for this special case.
248 		 */
249 		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
250 		     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
251 		    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
252 		     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
253 		      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
254 			pr_warn("Device scope type does not match for %s\n",
255 				pci_name(info->dev));
256 			return -EINVAL;
257 		}
258 
259 		for_each_dev_scope(devices, devices_cnt, i, tmp)
260 			if (tmp == NULL) {
261 				devices[i].bus = info->dev->bus->number;
262 				devices[i].devfn = info->dev->devfn;
263 				rcu_assign_pointer(devices[i].dev,
264 						   get_device(dev));
265 				return 1;
266 			}
267 		BUG_ON(i >= devices_cnt);
268 	}
269 
270 	return 0;
271 }
272 
273 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
274 			  struct dmar_dev_scope *devices, int count)
275 {
276 	int index;
277 	struct device *tmp;
278 
279 	if (info->seg != segment)
280 		return 0;
281 
282 	for_each_active_dev_scope(devices, count, index, tmp)
283 		if (tmp == &info->dev->dev) {
284 			RCU_INIT_POINTER(devices[index].dev, NULL);
285 			synchronize_rcu();
286 			put_device(tmp);
287 			return 1;
288 		}
289 
290 	return 0;
291 }
292 
293 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
294 {
295 	int ret = 0;
296 	struct dmar_drhd_unit *dmaru;
297 	struct acpi_dmar_hardware_unit *drhd;
298 
299 	for_each_drhd_unit(dmaru) {
300 		if (dmaru->include_all)
301 			continue;
302 
303 		drhd = container_of(dmaru->hdr,
304 				    struct acpi_dmar_hardware_unit, header);
305 		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
306 				((void *)drhd) + drhd->header.length,
307 				dmaru->segment,
308 				dmaru->devices, dmaru->devices_cnt);
309 		if (ret)
310 			break;
311 	}
312 	if (ret >= 0)
313 		ret = dmar_iommu_notify_scope_dev(info);
314 	if (ret < 0 && dmar_dev_scope_status == 0)
315 		dmar_dev_scope_status = ret;
316 
317 	if (ret >= 0)
318 		intel_irq_remap_add_device(info);
319 
320 	return ret;
321 }
322 
323 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
324 {
325 	struct dmar_drhd_unit *dmaru;
326 
327 	for_each_drhd_unit(dmaru)
328 		if (dmar_remove_dev_scope(info, dmaru->segment,
329 			dmaru->devices, dmaru->devices_cnt))
330 			break;
331 	dmar_iommu_notify_scope_dev(info);
332 }
333 
334 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
335 {
336 	struct pci_dev *physfn = pci_physfn(pdev);
337 
338 	dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
339 }
340 
341 static int dmar_pci_bus_notifier(struct notifier_block *nb,
342 				 unsigned long action, void *data)
343 {
344 	struct pci_dev *pdev = to_pci_dev(data);
345 	struct dmar_pci_notify_info *info;
346 
347 	/* Only care about add/remove events for physical functions.
348 	 * For VFs we actually do the lookup based on the corresponding
349 	 * PF in device_to_iommu() anyway. */
350 	if (pdev->is_virtfn) {
351 		/*
352 		 * Ensure that the VF device inherits the irq domain of the
353 		 * PF device. Ideally the device would inherit the domain
354 		 * from the bus, but DMAR can have multiple units per bus
355 		 * which makes this impossible. The VF 'bus' could inherit
356 		 * from the PF device, but that's yet another x86'sism to
357 		 * inflict on everybody else.
358 		 */
359 		if (action == BUS_NOTIFY_ADD_DEVICE)
360 			vf_inherit_msi_domain(pdev);
361 		return NOTIFY_DONE;
362 	}
363 
364 	if (action != BUS_NOTIFY_ADD_DEVICE &&
365 	    action != BUS_NOTIFY_REMOVED_DEVICE)
366 		return NOTIFY_DONE;
367 
368 	info = dmar_alloc_pci_notify_info(pdev, action);
369 	if (!info)
370 		return NOTIFY_DONE;
371 
372 	down_write(&dmar_global_lock);
373 	if (action == BUS_NOTIFY_ADD_DEVICE)
374 		dmar_pci_bus_add_dev(info);
375 	else if (action == BUS_NOTIFY_REMOVED_DEVICE)
376 		dmar_pci_bus_del_dev(info);
377 	up_write(&dmar_global_lock);
378 
379 	dmar_free_pci_notify_info(info);
380 
381 	return NOTIFY_OK;
382 }
383 
384 static struct notifier_block dmar_pci_bus_nb = {
385 	.notifier_call = dmar_pci_bus_notifier,
386 	.priority = INT_MIN,
387 };
388 
389 static struct dmar_drhd_unit *
390 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
391 {
392 	struct dmar_drhd_unit *dmaru;
393 
394 	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
395 				dmar_rcu_check())
396 		if (dmaru->segment == drhd->segment &&
397 		    dmaru->reg_base_addr == drhd->address)
398 			return dmaru;
399 
400 	return NULL;
401 }
402 
403 /*
404  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
405  * structure which uniquely represent one DMA remapping hardware unit
406  * present in the platform
407  */
408 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
409 {
410 	struct acpi_dmar_hardware_unit *drhd;
411 	struct dmar_drhd_unit *dmaru;
412 	int ret;
413 
414 	drhd = (struct acpi_dmar_hardware_unit *)header;
415 	dmaru = dmar_find_dmaru(drhd);
416 	if (dmaru)
417 		goto out;
418 
419 	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
420 	if (!dmaru)
421 		return -ENOMEM;
422 
423 	/*
424 	 * If header is allocated from slab by ACPI _DSM method, we need to
425 	 * copy the content because the memory buffer will be freed on return.
426 	 */
427 	dmaru->hdr = (void *)(dmaru + 1);
428 	memcpy(dmaru->hdr, header, header->length);
429 	dmaru->reg_base_addr = drhd->address;
430 	dmaru->segment = drhd->segment;
431 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
432 	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
433 					      ((void *)drhd) + drhd->header.length,
434 					      &dmaru->devices_cnt);
435 	if (dmaru->devices_cnt && dmaru->devices == NULL) {
436 		kfree(dmaru);
437 		return -ENOMEM;
438 	}
439 
440 	ret = alloc_iommu(dmaru);
441 	if (ret) {
442 		dmar_free_dev_scope(&dmaru->devices,
443 				    &dmaru->devices_cnt);
444 		kfree(dmaru);
445 		return ret;
446 	}
447 	dmar_register_drhd_unit(dmaru);
448 
449 out:
450 	if (arg)
451 		(*(int *)arg)++;
452 
453 	return 0;
454 }
455 
456 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
457 {
458 	if (dmaru->devices && dmaru->devices_cnt)
459 		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
460 	if (dmaru->iommu)
461 		free_iommu(dmaru->iommu);
462 	kfree(dmaru);
463 }
464 
465 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
466 				      void *arg)
467 {
468 	struct acpi_dmar_andd *andd = (void *)header;
469 
470 	/* Check for NUL termination within the designated length */
471 	if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
472 		pr_warn(FW_BUG
473 			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
474 			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
475 			   dmi_get_system_info(DMI_BIOS_VENDOR),
476 			   dmi_get_system_info(DMI_BIOS_VERSION),
477 			   dmi_get_system_info(DMI_PRODUCT_VERSION));
478 		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
479 		return -EINVAL;
480 	}
481 	pr_info("ANDD device: %x name: %s\n", andd->device_number,
482 		andd->device_name);
483 
484 	return 0;
485 }
486 
487 #ifdef CONFIG_ACPI_NUMA
488 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
489 {
490 	struct acpi_dmar_rhsa *rhsa;
491 	struct dmar_drhd_unit *drhd;
492 
493 	rhsa = (struct acpi_dmar_rhsa *)header;
494 	for_each_drhd_unit(drhd) {
495 		if (drhd->reg_base_addr == rhsa->base_address) {
496 			int node = pxm_to_node(rhsa->proximity_domain);
497 
498 			if (!node_online(node))
499 				node = NUMA_NO_NODE;
500 			drhd->iommu->node = node;
501 			return 0;
502 		}
503 	}
504 	pr_warn(FW_BUG
505 		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
506 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
507 		rhsa->base_address,
508 		dmi_get_system_info(DMI_BIOS_VENDOR),
509 		dmi_get_system_info(DMI_BIOS_VERSION),
510 		dmi_get_system_info(DMI_PRODUCT_VERSION));
511 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
512 
513 	return 0;
514 }
515 #else
516 #define	dmar_parse_one_rhsa		dmar_res_noop
517 #endif
518 
519 static void
520 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
521 {
522 	struct acpi_dmar_hardware_unit *drhd;
523 	struct acpi_dmar_reserved_memory *rmrr;
524 	struct acpi_dmar_atsr *atsr;
525 	struct acpi_dmar_rhsa *rhsa;
526 	struct acpi_dmar_satc *satc;
527 
528 	switch (header->type) {
529 	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
530 		drhd = container_of(header, struct acpi_dmar_hardware_unit,
531 				    header);
532 		pr_info("DRHD base: %#016Lx flags: %#x\n",
533 			(unsigned long long)drhd->address, drhd->flags);
534 		break;
535 	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
536 		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
537 				    header);
538 		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
539 			(unsigned long long)rmrr->base_address,
540 			(unsigned long long)rmrr->end_address);
541 		break;
542 	case ACPI_DMAR_TYPE_ROOT_ATS:
543 		atsr = container_of(header, struct acpi_dmar_atsr, header);
544 		pr_info("ATSR flags: %#x\n", atsr->flags);
545 		break;
546 	case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
547 		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
548 		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
549 		       (unsigned long long)rhsa->base_address,
550 		       rhsa->proximity_domain);
551 		break;
552 	case ACPI_DMAR_TYPE_NAMESPACE:
553 		/* We don't print this here because we need to sanity-check
554 		   it first. So print it in dmar_parse_one_andd() instead. */
555 		break;
556 	case ACPI_DMAR_TYPE_SATC:
557 		satc = container_of(header, struct acpi_dmar_satc, header);
558 		pr_info("SATC flags: 0x%x\n", satc->flags);
559 		break;
560 	}
561 }
562 
563 /**
564  * dmar_table_detect - checks to see if the platform supports DMAR devices
565  */
566 static int __init dmar_table_detect(void)
567 {
568 	acpi_status status = AE_OK;
569 
570 	/* if we could find DMAR table, then there are DMAR devices */
571 	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
572 
573 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
574 		pr_warn("Unable to map DMAR\n");
575 		status = AE_NOT_FOUND;
576 	}
577 
578 	return ACPI_SUCCESS(status) ? 0 : -ENOENT;
579 }
580 
581 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
582 				       size_t len, struct dmar_res_callback *cb)
583 {
584 	struct acpi_dmar_header *iter, *next;
585 	struct acpi_dmar_header *end = ((void *)start) + len;
586 
587 	for (iter = start; iter < end; iter = next) {
588 		next = (void *)iter + iter->length;
589 		if (iter->length == 0) {
590 			/* Avoid looping forever on bad ACPI tables */
591 			pr_debug(FW_BUG "Invalid 0-length structure\n");
592 			break;
593 		} else if (next > end) {
594 			/* Avoid passing table end */
595 			pr_warn(FW_BUG "Record passes table end\n");
596 			return -EINVAL;
597 		}
598 
599 		if (cb->print_entry)
600 			dmar_table_print_dmar_entry(iter);
601 
602 		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
603 			/* continue for forward compatibility */
604 			pr_debug("Unknown DMAR structure type %d\n",
605 				 iter->type);
606 		} else if (cb->cb[iter->type]) {
607 			int ret;
608 
609 			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
610 			if (ret)
611 				return ret;
612 		} else if (!cb->ignore_unhandled) {
613 			pr_warn("No handler for DMAR structure type %d\n",
614 				iter->type);
615 			return -EINVAL;
616 		}
617 	}
618 
619 	return 0;
620 }
621 
622 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
623 				       struct dmar_res_callback *cb)
624 {
625 	return dmar_walk_remapping_entries((void *)(dmar + 1),
626 			dmar->header.length - sizeof(*dmar), cb);
627 }
628 
629 /**
630  * parse_dmar_table - parses the DMA reporting table
631  */
632 static int __init
633 parse_dmar_table(void)
634 {
635 	struct acpi_table_dmar *dmar;
636 	int drhd_count = 0;
637 	int ret;
638 	struct dmar_res_callback cb = {
639 		.print_entry = true,
640 		.ignore_unhandled = true,
641 		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
642 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
643 		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
644 		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
645 		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
646 		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
647 		.cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
648 	};
649 
650 	/*
651 	 * Do it again, earlier dmar_tbl mapping could be mapped with
652 	 * fixed map.
653 	 */
654 	dmar_table_detect();
655 
656 	/*
657 	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
658 	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
659 	 */
660 	dmar_tbl = tboot_get_dmar_table(dmar_tbl);
661 
662 	dmar = (struct acpi_table_dmar *)dmar_tbl;
663 	if (!dmar)
664 		return -ENODEV;
665 
666 	if (dmar->width < PAGE_SHIFT - 1) {
667 		pr_warn("Invalid DMAR haw\n");
668 		return -EINVAL;
669 	}
670 
671 	pr_info("Host address width %d\n", dmar->width + 1);
672 	ret = dmar_walk_dmar_table(dmar, &cb);
673 	if (ret == 0 && drhd_count == 0)
674 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
675 
676 	return ret;
677 }
678 
679 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
680 				 int cnt, struct pci_dev *dev)
681 {
682 	int index;
683 	struct device *tmp;
684 
685 	while (dev) {
686 		for_each_active_dev_scope(devices, cnt, index, tmp)
687 			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
688 				return 1;
689 
690 		/* Check our parent */
691 		dev = dev->bus->self;
692 	}
693 
694 	return 0;
695 }
696 
697 struct dmar_drhd_unit *
698 dmar_find_matched_drhd_unit(struct pci_dev *dev)
699 {
700 	struct dmar_drhd_unit *dmaru;
701 	struct acpi_dmar_hardware_unit *drhd;
702 
703 	dev = pci_physfn(dev);
704 
705 	rcu_read_lock();
706 	for_each_drhd_unit(dmaru) {
707 		drhd = container_of(dmaru->hdr,
708 				    struct acpi_dmar_hardware_unit,
709 				    header);
710 
711 		if (dmaru->include_all &&
712 		    drhd->segment == pci_domain_nr(dev->bus))
713 			goto out;
714 
715 		if (dmar_pci_device_match(dmaru->devices,
716 					  dmaru->devices_cnt, dev))
717 			goto out;
718 	}
719 	dmaru = NULL;
720 out:
721 	rcu_read_unlock();
722 
723 	return dmaru;
724 }
725 
726 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
727 					      struct acpi_device *adev)
728 {
729 	struct dmar_drhd_unit *dmaru;
730 	struct acpi_dmar_hardware_unit *drhd;
731 	struct acpi_dmar_device_scope *scope;
732 	struct device *tmp;
733 	int i;
734 	struct acpi_dmar_pci_path *path;
735 
736 	for_each_drhd_unit(dmaru) {
737 		drhd = container_of(dmaru->hdr,
738 				    struct acpi_dmar_hardware_unit,
739 				    header);
740 
741 		for (scope = (void *)(drhd + 1);
742 		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
743 		     scope = ((void *)scope) + scope->length) {
744 			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
745 				continue;
746 			if (scope->enumeration_id != device_number)
747 				continue;
748 
749 			path = (void *)(scope + 1);
750 			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
751 				dev_name(&adev->dev), dmaru->reg_base_addr,
752 				scope->bus, path->device, path->function);
753 			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
754 				if (tmp == NULL) {
755 					dmaru->devices[i].bus = scope->bus;
756 					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
757 									    path->function);
758 					rcu_assign_pointer(dmaru->devices[i].dev,
759 							   get_device(&adev->dev));
760 					return;
761 				}
762 			BUG_ON(i >= dmaru->devices_cnt);
763 		}
764 	}
765 	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
766 		device_number, dev_name(&adev->dev));
767 }
768 
769 static int __init dmar_acpi_dev_scope_init(void)
770 {
771 	struct acpi_dmar_andd *andd;
772 
773 	if (dmar_tbl == NULL)
774 		return -ENODEV;
775 
776 	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
777 	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
778 	     andd = ((void *)andd) + andd->header.length) {
779 		if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
780 			acpi_handle h;
781 			struct acpi_device *adev;
782 
783 			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
784 							  andd->device_name,
785 							  &h))) {
786 				pr_err("Failed to find handle for ACPI object %s\n",
787 				       andd->device_name);
788 				continue;
789 			}
790 			adev = acpi_fetch_acpi_dev(h);
791 			if (!adev) {
792 				pr_err("Failed to get device for ACPI object %s\n",
793 				       andd->device_name);
794 				continue;
795 			}
796 			dmar_acpi_insert_dev_scope(andd->device_number, adev);
797 		}
798 	}
799 	return 0;
800 }
801 
802 int __init dmar_dev_scope_init(void)
803 {
804 	struct pci_dev *dev = NULL;
805 	struct dmar_pci_notify_info *info;
806 
807 	if (dmar_dev_scope_status != 1)
808 		return dmar_dev_scope_status;
809 
810 	if (list_empty(&dmar_drhd_units)) {
811 		dmar_dev_scope_status = -ENODEV;
812 	} else {
813 		dmar_dev_scope_status = 0;
814 
815 		dmar_acpi_dev_scope_init();
816 
817 		for_each_pci_dev(dev) {
818 			if (dev->is_virtfn)
819 				continue;
820 
821 			info = dmar_alloc_pci_notify_info(dev,
822 					BUS_NOTIFY_ADD_DEVICE);
823 			if (!info) {
824 				return dmar_dev_scope_status;
825 			} else {
826 				dmar_pci_bus_add_dev(info);
827 				dmar_free_pci_notify_info(info);
828 			}
829 		}
830 	}
831 
832 	return dmar_dev_scope_status;
833 }
834 
835 void __init dmar_register_bus_notifier(void)
836 {
837 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
838 }
839 
840 
841 int __init dmar_table_init(void)
842 {
843 	static int dmar_table_initialized;
844 	int ret;
845 
846 	if (dmar_table_initialized == 0) {
847 		ret = parse_dmar_table();
848 		if (ret < 0) {
849 			if (ret != -ENODEV)
850 				pr_info("Parse DMAR table failure.\n");
851 		} else  if (list_empty(&dmar_drhd_units)) {
852 			pr_info("No DMAR devices found\n");
853 			ret = -ENODEV;
854 		}
855 
856 		if (ret < 0)
857 			dmar_table_initialized = ret;
858 		else
859 			dmar_table_initialized = 1;
860 	}
861 
862 	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
863 }
864 
865 static void warn_invalid_dmar(u64 addr, const char *message)
866 {
867 	pr_warn_once(FW_BUG
868 		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
869 		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
870 		addr, message,
871 		dmi_get_system_info(DMI_BIOS_VENDOR),
872 		dmi_get_system_info(DMI_BIOS_VERSION),
873 		dmi_get_system_info(DMI_PRODUCT_VERSION));
874 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
875 }
876 
877 static int __ref
878 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
879 {
880 	struct acpi_dmar_hardware_unit *drhd;
881 	void __iomem *addr;
882 	u64 cap, ecap;
883 
884 	drhd = (void *)entry;
885 	if (!drhd->address) {
886 		warn_invalid_dmar(0, "");
887 		return -EINVAL;
888 	}
889 
890 	if (arg)
891 		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
892 	else
893 		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
894 	if (!addr) {
895 		pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
896 		return -EINVAL;
897 	}
898 
899 	cap = dmar_readq(addr + DMAR_CAP_REG);
900 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
901 
902 	if (arg)
903 		iounmap(addr);
904 	else
905 		early_iounmap(addr, VTD_PAGE_SIZE);
906 
907 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
908 		warn_invalid_dmar(drhd->address, " returns all ones");
909 		return -EINVAL;
910 	}
911 
912 	return 0;
913 }
914 
915 int __init detect_intel_iommu(void)
916 {
917 	int ret;
918 	struct dmar_res_callback validate_drhd_cb = {
919 		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
920 		.ignore_unhandled = true,
921 	};
922 
923 	down_write(&dmar_global_lock);
924 	ret = dmar_table_detect();
925 	if (!ret)
926 		ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
927 					   &validate_drhd_cb);
928 	if (!ret && !no_iommu && !iommu_detected &&
929 	    (!dmar_disabled || dmar_platform_optin())) {
930 		iommu_detected = 1;
931 		/* Make sure ACS will be enabled */
932 		pci_request_acs();
933 	}
934 
935 #ifdef CONFIG_X86
936 	if (!ret) {
937 		x86_init.iommu.iommu_init = intel_iommu_init;
938 		x86_platform.iommu_shutdown = intel_iommu_shutdown;
939 	}
940 
941 #endif
942 
943 	if (dmar_tbl) {
944 		acpi_put_table(dmar_tbl);
945 		dmar_tbl = NULL;
946 	}
947 	up_write(&dmar_global_lock);
948 
949 	return ret ? ret : 1;
950 }
951 
952 static void unmap_iommu(struct intel_iommu *iommu)
953 {
954 	iounmap(iommu->reg);
955 	release_mem_region(iommu->reg_phys, iommu->reg_size);
956 }
957 
958 /**
959  * map_iommu: map the iommu's registers
960  * @iommu: the iommu to map
961  * @phys_addr: the physical address of the base resgister
962  *
963  * Memory map the iommu's registers.  Start w/ a single page, and
964  * possibly expand if that turns out to be insufficent.
965  */
966 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
967 {
968 	int map_size, err=0;
969 
970 	iommu->reg_phys = phys_addr;
971 	iommu->reg_size = VTD_PAGE_SIZE;
972 
973 	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
974 		pr_err("Can't reserve memory\n");
975 		err = -EBUSY;
976 		goto out;
977 	}
978 
979 	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
980 	if (!iommu->reg) {
981 		pr_err("Can't map the region\n");
982 		err = -ENOMEM;
983 		goto release;
984 	}
985 
986 	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
987 	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
988 
989 	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
990 		err = -EINVAL;
991 		warn_invalid_dmar(phys_addr, " returns all ones");
992 		goto unmap;
993 	}
994 	if (ecap_vcs(iommu->ecap))
995 		iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
996 
997 	/* the registers might be more than one page */
998 	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
999 			 cap_max_fault_reg_offset(iommu->cap));
1000 	map_size = VTD_PAGE_ALIGN(map_size);
1001 	if (map_size > iommu->reg_size) {
1002 		iounmap(iommu->reg);
1003 		release_mem_region(iommu->reg_phys, iommu->reg_size);
1004 		iommu->reg_size = map_size;
1005 		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1006 					iommu->name)) {
1007 			pr_err("Can't reserve memory\n");
1008 			err = -EBUSY;
1009 			goto out;
1010 		}
1011 		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1012 		if (!iommu->reg) {
1013 			pr_err("Can't map the region\n");
1014 			err = -ENOMEM;
1015 			goto release;
1016 		}
1017 	}
1018 	err = 0;
1019 	goto out;
1020 
1021 unmap:
1022 	iounmap(iommu->reg);
1023 release:
1024 	release_mem_region(iommu->reg_phys, iommu->reg_size);
1025 out:
1026 	return err;
1027 }
1028 
1029 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1030 {
1031 	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1032 					    DMAR_UNITS_SUPPORTED);
1033 	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1034 		iommu->seq_id = -1;
1035 	} else {
1036 		set_bit(iommu->seq_id, dmar_seq_ids);
1037 		sprintf(iommu->name, "dmar%d", iommu->seq_id);
1038 	}
1039 
1040 	return iommu->seq_id;
1041 }
1042 
1043 static void dmar_free_seq_id(struct intel_iommu *iommu)
1044 {
1045 	if (iommu->seq_id >= 0) {
1046 		clear_bit(iommu->seq_id, dmar_seq_ids);
1047 		iommu->seq_id = -1;
1048 	}
1049 }
1050 
1051 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1052 {
1053 	struct intel_iommu *iommu;
1054 	u32 ver, sts;
1055 	int agaw = -1;
1056 	int msagaw = -1;
1057 	int err;
1058 
1059 	if (!drhd->reg_base_addr) {
1060 		warn_invalid_dmar(0, "");
1061 		return -EINVAL;
1062 	}
1063 
1064 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1065 	if (!iommu)
1066 		return -ENOMEM;
1067 
1068 	if (dmar_alloc_seq_id(iommu) < 0) {
1069 		pr_err("Failed to allocate seq_id\n");
1070 		err = -ENOSPC;
1071 		goto error;
1072 	}
1073 
1074 	err = map_iommu(iommu, drhd->reg_base_addr);
1075 	if (err) {
1076 		pr_err("Failed to map %s\n", iommu->name);
1077 		goto error_free_seq_id;
1078 	}
1079 
1080 	err = -EINVAL;
1081 	if (cap_sagaw(iommu->cap) == 0) {
1082 		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1083 			iommu->name);
1084 		drhd->ignored = 1;
1085 	}
1086 
1087 	if (!drhd->ignored) {
1088 		agaw = iommu_calculate_agaw(iommu);
1089 		if (agaw < 0) {
1090 			pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1091 			       iommu->seq_id);
1092 			drhd->ignored = 1;
1093 		}
1094 	}
1095 	if (!drhd->ignored) {
1096 		msagaw = iommu_calculate_max_sagaw(iommu);
1097 		if (msagaw < 0) {
1098 			pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1099 			       iommu->seq_id);
1100 			drhd->ignored = 1;
1101 			agaw = -1;
1102 		}
1103 	}
1104 	iommu->agaw = agaw;
1105 	iommu->msagaw = msagaw;
1106 	iommu->segment = drhd->segment;
1107 
1108 	iommu->node = NUMA_NO_NODE;
1109 
1110 	ver = readl(iommu->reg + DMAR_VER_REG);
1111 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1112 		iommu->name,
1113 		(unsigned long long)drhd->reg_base_addr,
1114 		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1115 		(unsigned long long)iommu->cap,
1116 		(unsigned long long)iommu->ecap);
1117 
1118 	/* Reflect status in gcmd */
1119 	sts = readl(iommu->reg + DMAR_GSTS_REG);
1120 	if (sts & DMA_GSTS_IRES)
1121 		iommu->gcmd |= DMA_GCMD_IRE;
1122 	if (sts & DMA_GSTS_TES)
1123 		iommu->gcmd |= DMA_GCMD_TE;
1124 	if (sts & DMA_GSTS_QIES)
1125 		iommu->gcmd |= DMA_GCMD_QIE;
1126 
1127 	raw_spin_lock_init(&iommu->register_lock);
1128 
1129 	/*
1130 	 * This is only for hotplug; at boot time intel_iommu_enabled won't
1131 	 * be set yet. When intel_iommu_init() runs, it registers the units
1132 	 * present at boot time, then sets intel_iommu_enabled.
1133 	 */
1134 	if (intel_iommu_enabled && !drhd->ignored) {
1135 		err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1136 					     intel_iommu_groups,
1137 					     "%s", iommu->name);
1138 		if (err)
1139 			goto err_unmap;
1140 
1141 		err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1142 		if (err)
1143 			goto err_sysfs;
1144 	}
1145 
1146 	drhd->iommu = iommu;
1147 	iommu->drhd = drhd;
1148 
1149 	return 0;
1150 
1151 err_sysfs:
1152 	iommu_device_sysfs_remove(&iommu->iommu);
1153 err_unmap:
1154 	unmap_iommu(iommu);
1155 error_free_seq_id:
1156 	dmar_free_seq_id(iommu);
1157 error:
1158 	kfree(iommu);
1159 	return err;
1160 }
1161 
1162 static void free_iommu(struct intel_iommu *iommu)
1163 {
1164 	if (intel_iommu_enabled && !iommu->drhd->ignored) {
1165 		iommu_device_unregister(&iommu->iommu);
1166 		iommu_device_sysfs_remove(&iommu->iommu);
1167 	}
1168 
1169 	if (iommu->irq) {
1170 		if (iommu->pr_irq) {
1171 			free_irq(iommu->pr_irq, iommu);
1172 			dmar_free_hwirq(iommu->pr_irq);
1173 			iommu->pr_irq = 0;
1174 		}
1175 		free_irq(iommu->irq, iommu);
1176 		dmar_free_hwirq(iommu->irq);
1177 		iommu->irq = 0;
1178 	}
1179 
1180 	if (iommu->qi) {
1181 		free_page((unsigned long)iommu->qi->desc);
1182 		kfree(iommu->qi->desc_status);
1183 		kfree(iommu->qi);
1184 	}
1185 
1186 	if (iommu->reg)
1187 		unmap_iommu(iommu);
1188 
1189 	dmar_free_seq_id(iommu);
1190 	kfree(iommu);
1191 }
1192 
1193 /*
1194  * Reclaim all the submitted descriptors which have completed its work.
1195  */
1196 static inline void reclaim_free_desc(struct q_inval *qi)
1197 {
1198 	while (qi->desc_status[qi->free_tail] == QI_DONE ||
1199 	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1200 		qi->desc_status[qi->free_tail] = QI_FREE;
1201 		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1202 		qi->free_cnt++;
1203 	}
1204 }
1205 
1206 static const char *qi_type_string(u8 type)
1207 {
1208 	switch (type) {
1209 	case QI_CC_TYPE:
1210 		return "Context-cache Invalidation";
1211 	case QI_IOTLB_TYPE:
1212 		return "IOTLB Invalidation";
1213 	case QI_DIOTLB_TYPE:
1214 		return "Device-TLB Invalidation";
1215 	case QI_IEC_TYPE:
1216 		return "Interrupt Entry Cache Invalidation";
1217 	case QI_IWD_TYPE:
1218 		return "Invalidation Wait";
1219 	case QI_EIOTLB_TYPE:
1220 		return "PASID-based IOTLB Invalidation";
1221 	case QI_PC_TYPE:
1222 		return "PASID-cache Invalidation";
1223 	case QI_DEIOTLB_TYPE:
1224 		return "PASID-based Device-TLB Invalidation";
1225 	case QI_PGRP_RESP_TYPE:
1226 		return "Page Group Response";
1227 	default:
1228 		return "UNKNOWN";
1229 	}
1230 }
1231 
1232 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1233 {
1234 	unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1235 	u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1236 	struct qi_desc *desc = iommu->qi->desc + head;
1237 
1238 	if (fault & DMA_FSTS_IQE)
1239 		pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1240 		       DMAR_IQER_REG_IQEI(iqe_err));
1241 	if (fault & DMA_FSTS_ITE)
1242 		pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1243 		       DMAR_IQER_REG_ITESID(iqe_err));
1244 	if (fault & DMA_FSTS_ICE)
1245 		pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1246 		       DMAR_IQER_REG_ICESID(iqe_err));
1247 
1248 	pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1249 	       qi_type_string(desc->qw0 & 0xf),
1250 	       (unsigned long long)desc->qw0,
1251 	       (unsigned long long)desc->qw1);
1252 
1253 	head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1254 	head <<= qi_shift(iommu);
1255 	desc = iommu->qi->desc + head;
1256 
1257 	pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1258 	       qi_type_string(desc->qw0 & 0xf),
1259 	       (unsigned long long)desc->qw0,
1260 	       (unsigned long long)desc->qw1);
1261 }
1262 
1263 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1264 {
1265 	u32 fault;
1266 	int head, tail;
1267 	struct q_inval *qi = iommu->qi;
1268 	int shift = qi_shift(iommu);
1269 
1270 	if (qi->desc_status[wait_index] == QI_ABORT)
1271 		return -EAGAIN;
1272 
1273 	fault = readl(iommu->reg + DMAR_FSTS_REG);
1274 	if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1275 		qi_dump_fault(iommu, fault);
1276 
1277 	/*
1278 	 * If IQE happens, the head points to the descriptor associated
1279 	 * with the error. No new descriptors are fetched until the IQE
1280 	 * is cleared.
1281 	 */
1282 	if (fault & DMA_FSTS_IQE) {
1283 		head = readl(iommu->reg + DMAR_IQH_REG);
1284 		if ((head >> shift) == index) {
1285 			struct qi_desc *desc = qi->desc + head;
1286 
1287 			/*
1288 			 * desc->qw2 and desc->qw3 are either reserved or
1289 			 * used by software as private data. We won't print
1290 			 * out these two qw's for security consideration.
1291 			 */
1292 			memcpy(desc, qi->desc + (wait_index << shift),
1293 			       1 << shift);
1294 			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1295 			pr_info("Invalidation Queue Error (IQE) cleared\n");
1296 			return -EINVAL;
1297 		}
1298 	}
1299 
1300 	/*
1301 	 * If ITE happens, all pending wait_desc commands are aborted.
1302 	 * No new descriptors are fetched until the ITE is cleared.
1303 	 */
1304 	if (fault & DMA_FSTS_ITE) {
1305 		head = readl(iommu->reg + DMAR_IQH_REG);
1306 		head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1307 		head |= 1;
1308 		tail = readl(iommu->reg + DMAR_IQT_REG);
1309 		tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1310 
1311 		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1312 		pr_info("Invalidation Time-out Error (ITE) cleared\n");
1313 
1314 		do {
1315 			if (qi->desc_status[head] == QI_IN_USE)
1316 				qi->desc_status[head] = QI_ABORT;
1317 			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1318 		} while (head != tail);
1319 
1320 		if (qi->desc_status[wait_index] == QI_ABORT)
1321 			return -EAGAIN;
1322 	}
1323 
1324 	if (fault & DMA_FSTS_ICE) {
1325 		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1326 		pr_info("Invalidation Completion Error (ICE) cleared\n");
1327 	}
1328 
1329 	return 0;
1330 }
1331 
1332 /*
1333  * Function to submit invalidation descriptors of all types to the queued
1334  * invalidation interface(QI). Multiple descriptors can be submitted at a
1335  * time, a wait descriptor will be appended to each submission to ensure
1336  * hardware has completed the invalidation before return. Wait descriptors
1337  * can be part of the submission but it will not be polled for completion.
1338  */
1339 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1340 		   unsigned int count, unsigned long options)
1341 {
1342 	struct q_inval *qi = iommu->qi;
1343 	s64 devtlb_start_ktime = 0;
1344 	s64 iotlb_start_ktime = 0;
1345 	s64 iec_start_ktime = 0;
1346 	struct qi_desc wait_desc;
1347 	int wait_index, index;
1348 	unsigned long flags;
1349 	int offset, shift;
1350 	int rc, i;
1351 	u64 type;
1352 
1353 	if (!qi)
1354 		return 0;
1355 
1356 	type = desc->qw0 & GENMASK_ULL(3, 0);
1357 
1358 	if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1359 	    dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1360 		iotlb_start_ktime = ktime_to_ns(ktime_get());
1361 
1362 	if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1363 	    dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1364 		devtlb_start_ktime = ktime_to_ns(ktime_get());
1365 
1366 	if (type == QI_IEC_TYPE &&
1367 	    dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1368 		iec_start_ktime = ktime_to_ns(ktime_get());
1369 
1370 restart:
1371 	rc = 0;
1372 
1373 	raw_spin_lock_irqsave(&qi->q_lock, flags);
1374 	/*
1375 	 * Check if we have enough empty slots in the queue to submit,
1376 	 * the calculation is based on:
1377 	 * # of desc + 1 wait desc + 1 space between head and tail
1378 	 */
1379 	while (qi->free_cnt < count + 2) {
1380 		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1381 		cpu_relax();
1382 		raw_spin_lock_irqsave(&qi->q_lock, flags);
1383 	}
1384 
1385 	index = qi->free_head;
1386 	wait_index = (index + count) % QI_LENGTH;
1387 	shift = qi_shift(iommu);
1388 
1389 	for (i = 0; i < count; i++) {
1390 		offset = ((index + i) % QI_LENGTH) << shift;
1391 		memcpy(qi->desc + offset, &desc[i], 1 << shift);
1392 		qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1393 		trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1394 				desc[i].qw2, desc[i].qw3);
1395 	}
1396 	qi->desc_status[wait_index] = QI_IN_USE;
1397 
1398 	wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1399 			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1400 	if (options & QI_OPT_WAIT_DRAIN)
1401 		wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1402 	wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1403 	wait_desc.qw2 = 0;
1404 	wait_desc.qw3 = 0;
1405 
1406 	offset = wait_index << shift;
1407 	memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1408 
1409 	qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1410 	qi->free_cnt -= count + 1;
1411 
1412 	/*
1413 	 * update the HW tail register indicating the presence of
1414 	 * new descriptors.
1415 	 */
1416 	writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1417 
1418 	while (qi->desc_status[wait_index] != QI_DONE) {
1419 		/*
1420 		 * We will leave the interrupts disabled, to prevent interrupt
1421 		 * context to queue another cmd while a cmd is already submitted
1422 		 * and waiting for completion on this cpu. This is to avoid
1423 		 * a deadlock where the interrupt context can wait indefinitely
1424 		 * for free slots in the queue.
1425 		 */
1426 		rc = qi_check_fault(iommu, index, wait_index);
1427 		if (rc)
1428 			break;
1429 
1430 		raw_spin_unlock(&qi->q_lock);
1431 		cpu_relax();
1432 		raw_spin_lock(&qi->q_lock);
1433 	}
1434 
1435 	for (i = 0; i < count; i++)
1436 		qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1437 
1438 	reclaim_free_desc(qi);
1439 	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1440 
1441 	if (rc == -EAGAIN)
1442 		goto restart;
1443 
1444 	if (iotlb_start_ktime)
1445 		dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1446 				ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1447 
1448 	if (devtlb_start_ktime)
1449 		dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1450 				ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1451 
1452 	if (iec_start_ktime)
1453 		dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1454 				ktime_to_ns(ktime_get()) - iec_start_ktime);
1455 
1456 	return rc;
1457 }
1458 
1459 /*
1460  * Flush the global interrupt entry cache.
1461  */
1462 void qi_global_iec(struct intel_iommu *iommu)
1463 {
1464 	struct qi_desc desc;
1465 
1466 	desc.qw0 = QI_IEC_TYPE;
1467 	desc.qw1 = 0;
1468 	desc.qw2 = 0;
1469 	desc.qw3 = 0;
1470 
1471 	/* should never fail */
1472 	qi_submit_sync(iommu, &desc, 1, 0);
1473 }
1474 
1475 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1476 		      u64 type)
1477 {
1478 	struct qi_desc desc;
1479 
1480 	desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1481 			| QI_CC_GRAN(type) | QI_CC_TYPE;
1482 	desc.qw1 = 0;
1483 	desc.qw2 = 0;
1484 	desc.qw3 = 0;
1485 
1486 	qi_submit_sync(iommu, &desc, 1, 0);
1487 }
1488 
1489 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1490 		    unsigned int size_order, u64 type)
1491 {
1492 	u8 dw = 0, dr = 0;
1493 
1494 	struct qi_desc desc;
1495 	int ih = 0;
1496 
1497 	if (cap_write_drain(iommu->cap))
1498 		dw = 1;
1499 
1500 	if (cap_read_drain(iommu->cap))
1501 		dr = 1;
1502 
1503 	desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1504 		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1505 	desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1506 		| QI_IOTLB_AM(size_order);
1507 	desc.qw2 = 0;
1508 	desc.qw3 = 0;
1509 
1510 	qi_submit_sync(iommu, &desc, 1, 0);
1511 }
1512 
1513 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1514 			u16 qdep, u64 addr, unsigned mask)
1515 {
1516 	struct qi_desc desc;
1517 
1518 	if (mask) {
1519 		addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1520 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1521 	} else
1522 		desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1523 
1524 	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1525 		qdep = 0;
1526 
1527 	desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1528 		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1529 	desc.qw2 = 0;
1530 	desc.qw3 = 0;
1531 
1532 	qi_submit_sync(iommu, &desc, 1, 0);
1533 }
1534 
1535 /* PASID-based IOTLB invalidation */
1536 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1537 		     unsigned long npages, bool ih)
1538 {
1539 	struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1540 
1541 	/*
1542 	 * npages == -1 means a PASID-selective invalidation, otherwise,
1543 	 * a positive value for Page-selective-within-PASID invalidation.
1544 	 * 0 is not a valid input.
1545 	 */
1546 	if (WARN_ON(!npages)) {
1547 		pr_err("Invalid input npages = %ld\n", npages);
1548 		return;
1549 	}
1550 
1551 	if (npages == -1) {
1552 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1553 				QI_EIOTLB_DID(did) |
1554 				QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1555 				QI_EIOTLB_TYPE;
1556 		desc.qw1 = 0;
1557 	} else {
1558 		int mask = ilog2(__roundup_pow_of_two(npages));
1559 		unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1560 
1561 		if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1562 			addr = ALIGN_DOWN(addr, align);
1563 
1564 		desc.qw0 = QI_EIOTLB_PASID(pasid) |
1565 				QI_EIOTLB_DID(did) |
1566 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1567 				QI_EIOTLB_TYPE;
1568 		desc.qw1 = QI_EIOTLB_ADDR(addr) |
1569 				QI_EIOTLB_IH(ih) |
1570 				QI_EIOTLB_AM(mask);
1571 	}
1572 
1573 	qi_submit_sync(iommu, &desc, 1, 0);
1574 }
1575 
1576 /* PASID-based device IOTLB Invalidate */
1577 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1578 			      u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1579 {
1580 	unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1581 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1582 
1583 	desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1584 		QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1585 		QI_DEV_IOTLB_PFSID(pfsid);
1586 
1587 	/*
1588 	 * If S bit is 0, we only flush a single page. If S bit is set,
1589 	 * The least significant zero bit indicates the invalidation address
1590 	 * range. VT-d spec 6.5.2.6.
1591 	 * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1592 	 * size order = 0 is PAGE_SIZE 4KB
1593 	 * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1594 	 * ECAP.
1595 	 */
1596 	if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1597 		pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1598 				    addr, size_order);
1599 
1600 	/* Take page address */
1601 	desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1602 
1603 	if (size_order) {
1604 		/*
1605 		 * Existing 0s in address below size_order may be the least
1606 		 * significant bit, we must set them to 1s to avoid having
1607 		 * smaller size than desired.
1608 		 */
1609 		desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1610 					VTD_PAGE_SHIFT);
1611 		/* Clear size_order bit to indicate size */
1612 		desc.qw1 &= ~mask;
1613 		/* Set the S bit to indicate flushing more than 1 page */
1614 		desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1615 	}
1616 
1617 	qi_submit_sync(iommu, &desc, 1, 0);
1618 }
1619 
1620 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1621 			  u64 granu, u32 pasid)
1622 {
1623 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1624 
1625 	desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1626 			QI_PC_GRAN(granu) | QI_PC_TYPE;
1627 	qi_submit_sync(iommu, &desc, 1, 0);
1628 }
1629 
1630 /*
1631  * Disable Queued Invalidation interface.
1632  */
1633 void dmar_disable_qi(struct intel_iommu *iommu)
1634 {
1635 	unsigned long flags;
1636 	u32 sts;
1637 	cycles_t start_time = get_cycles();
1638 
1639 	if (!ecap_qis(iommu->ecap))
1640 		return;
1641 
1642 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1643 
1644 	sts =  readl(iommu->reg + DMAR_GSTS_REG);
1645 	if (!(sts & DMA_GSTS_QIES))
1646 		goto end;
1647 
1648 	/*
1649 	 * Give a chance to HW to complete the pending invalidation requests.
1650 	 */
1651 	while ((readl(iommu->reg + DMAR_IQT_REG) !=
1652 		readl(iommu->reg + DMAR_IQH_REG)) &&
1653 		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1654 		cpu_relax();
1655 
1656 	iommu->gcmd &= ~DMA_GCMD_QIE;
1657 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1658 
1659 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1660 		      !(sts & DMA_GSTS_QIES), sts);
1661 end:
1662 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1663 }
1664 
1665 /*
1666  * Enable queued invalidation.
1667  */
1668 static void __dmar_enable_qi(struct intel_iommu *iommu)
1669 {
1670 	u32 sts;
1671 	unsigned long flags;
1672 	struct q_inval *qi = iommu->qi;
1673 	u64 val = virt_to_phys(qi->desc);
1674 
1675 	qi->free_head = qi->free_tail = 0;
1676 	qi->free_cnt = QI_LENGTH;
1677 
1678 	/*
1679 	 * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1680 	 * is present.
1681 	 */
1682 	if (ecap_smts(iommu->ecap))
1683 		val |= (1 << 11) | 1;
1684 
1685 	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1686 
1687 	/* write zero to the tail reg */
1688 	writel(0, iommu->reg + DMAR_IQT_REG);
1689 
1690 	dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1691 
1692 	iommu->gcmd |= DMA_GCMD_QIE;
1693 	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1694 
1695 	/* Make sure hardware complete it */
1696 	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1697 
1698 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1699 }
1700 
1701 /*
1702  * Enable Queued Invalidation interface. This is a must to support
1703  * interrupt-remapping. Also used by DMA-remapping, which replaces
1704  * register based IOTLB invalidation.
1705  */
1706 int dmar_enable_qi(struct intel_iommu *iommu)
1707 {
1708 	struct q_inval *qi;
1709 	struct page *desc_page;
1710 
1711 	if (!ecap_qis(iommu->ecap))
1712 		return -ENOENT;
1713 
1714 	/*
1715 	 * queued invalidation is already setup and enabled.
1716 	 */
1717 	if (iommu->qi)
1718 		return 0;
1719 
1720 	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1721 	if (!iommu->qi)
1722 		return -ENOMEM;
1723 
1724 	qi = iommu->qi;
1725 
1726 	/*
1727 	 * Need two pages to accommodate 256 descriptors of 256 bits each
1728 	 * if the remapping hardware supports scalable mode translation.
1729 	 */
1730 	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1731 				     !!ecap_smts(iommu->ecap));
1732 	if (!desc_page) {
1733 		kfree(qi);
1734 		iommu->qi = NULL;
1735 		return -ENOMEM;
1736 	}
1737 
1738 	qi->desc = page_address(desc_page);
1739 
1740 	qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1741 	if (!qi->desc_status) {
1742 		free_page((unsigned long) qi->desc);
1743 		kfree(qi);
1744 		iommu->qi = NULL;
1745 		return -ENOMEM;
1746 	}
1747 
1748 	raw_spin_lock_init(&qi->q_lock);
1749 
1750 	__dmar_enable_qi(iommu);
1751 
1752 	return 0;
1753 }
1754 
1755 /* iommu interrupt handling. Most stuff are MSI-like. */
1756 
1757 enum faulttype {
1758 	DMA_REMAP,
1759 	INTR_REMAP,
1760 	UNKNOWN,
1761 };
1762 
1763 static const char *dma_remap_fault_reasons[] =
1764 {
1765 	"Software",
1766 	"Present bit in root entry is clear",
1767 	"Present bit in context entry is clear",
1768 	"Invalid context entry",
1769 	"Access beyond MGAW",
1770 	"PTE Write access is not set",
1771 	"PTE Read access is not set",
1772 	"Next page table ptr is invalid",
1773 	"Root table address invalid",
1774 	"Context table ptr is invalid",
1775 	"non-zero reserved fields in RTP",
1776 	"non-zero reserved fields in CTP",
1777 	"non-zero reserved fields in PTE",
1778 	"PCE for translation request specifies blocking",
1779 };
1780 
1781 static const char * const dma_remap_sm_fault_reasons[] = {
1782 	"SM: Invalid Root Table Address",
1783 	"SM: TTM 0 for request with PASID",
1784 	"SM: TTM 0 for page group request",
1785 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1786 	"SM: Error attempting to access Root Entry",
1787 	"SM: Present bit in Root Entry is clear",
1788 	"SM: Non-zero reserved field set in Root Entry",
1789 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1790 	"SM: Error attempting to access Context Entry",
1791 	"SM: Present bit in Context Entry is clear",
1792 	"SM: Non-zero reserved field set in the Context Entry",
1793 	"SM: Invalid Context Entry",
1794 	"SM: DTE field in Context Entry is clear",
1795 	"SM: PASID Enable field in Context Entry is clear",
1796 	"SM: PASID is larger than the max in Context Entry",
1797 	"SM: PRE field in Context-Entry is clear",
1798 	"SM: RID_PASID field error in Context-Entry",
1799 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1800 	"SM: Error attempting to access the PASID Directory Entry",
1801 	"SM: Present bit in Directory Entry is clear",
1802 	"SM: Non-zero reserved field set in PASID Directory Entry",
1803 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1804 	"SM: Error attempting to access PASID Table Entry",
1805 	"SM: Present bit in PASID Table Entry is clear",
1806 	"SM: Non-zero reserved field set in PASID Table Entry",
1807 	"SM: Invalid Scalable-Mode PASID Table Entry",
1808 	"SM: ERE field is clear in PASID Table Entry",
1809 	"SM: SRE field is clear in PASID Table Entry",
1810 	"Unknown", "Unknown",/* 0x5E-0x5F */
1811 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1812 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1813 	"SM: Error attempting to access first-level paging entry",
1814 	"SM: Present bit in first-level paging entry is clear",
1815 	"SM: Non-zero reserved field set in first-level paging entry",
1816 	"SM: Error attempting to access FL-PML4 entry",
1817 	"SM: First-level entry address beyond MGAW in Nested translation",
1818 	"SM: Read permission error in FL-PML4 entry in Nested translation",
1819 	"SM: Read permission error in first-level paging entry in Nested translation",
1820 	"SM: Write permission error in first-level paging entry in Nested translation",
1821 	"SM: Error attempting to access second-level paging entry",
1822 	"SM: Read/Write permission error in second-level paging entry",
1823 	"SM: Non-zero reserved field set in second-level paging entry",
1824 	"SM: Invalid second-level page table pointer",
1825 	"SM: A/D bit update needed in second-level entry when set up in no snoop",
1826 	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1827 	"SM: Address in first-level translation is not canonical",
1828 	"SM: U/S set 0 for first-level translation with user privilege",
1829 	"SM: No execute permission for request with PASID and ER=1",
1830 	"SM: Address beyond the DMA hardware max",
1831 	"SM: Second-level entry address beyond the max",
1832 	"SM: No write permission for Write/AtomicOp request",
1833 	"SM: No read permission for Read/AtomicOp request",
1834 	"SM: Invalid address-interrupt address",
1835 	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1836 	"SM: A/D bit update needed in first-level entry when set up in no snoop",
1837 };
1838 
1839 static const char *irq_remap_fault_reasons[] =
1840 {
1841 	"Detected reserved fields in the decoded interrupt-remapped request",
1842 	"Interrupt index exceeded the interrupt-remapping table size",
1843 	"Present field in the IRTE entry is clear",
1844 	"Error accessing interrupt-remapping table pointed by IRTA_REG",
1845 	"Detected reserved fields in the IRTE entry",
1846 	"Blocked a compatibility format interrupt request",
1847 	"Blocked an interrupt request due to source-id verification failure",
1848 };
1849 
1850 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1851 {
1852 	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1853 					ARRAY_SIZE(irq_remap_fault_reasons))) {
1854 		*fault_type = INTR_REMAP;
1855 		return irq_remap_fault_reasons[fault_reason - 0x20];
1856 	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1857 			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1858 		*fault_type = DMA_REMAP;
1859 		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1860 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1861 		*fault_type = DMA_REMAP;
1862 		return dma_remap_fault_reasons[fault_reason];
1863 	} else {
1864 		*fault_type = UNKNOWN;
1865 		return "Unknown";
1866 	}
1867 }
1868 
1869 
1870 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1871 {
1872 	if (iommu->irq == irq)
1873 		return DMAR_FECTL_REG;
1874 	else if (iommu->pr_irq == irq)
1875 		return DMAR_PECTL_REG;
1876 	else
1877 		BUG();
1878 }
1879 
1880 void dmar_msi_unmask(struct irq_data *data)
1881 {
1882 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1883 	int reg = dmar_msi_reg(iommu, data->irq);
1884 	unsigned long flag;
1885 
1886 	/* unmask it */
1887 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1888 	writel(0, iommu->reg + reg);
1889 	/* Read a reg to force flush the post write */
1890 	readl(iommu->reg + reg);
1891 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1892 }
1893 
1894 void dmar_msi_mask(struct irq_data *data)
1895 {
1896 	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1897 	int reg = dmar_msi_reg(iommu, data->irq);
1898 	unsigned long flag;
1899 
1900 	/* mask it */
1901 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1902 	writel(DMA_FECTL_IM, iommu->reg + reg);
1903 	/* Read a reg to force flush the post write */
1904 	readl(iommu->reg + reg);
1905 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1906 }
1907 
1908 void dmar_msi_write(int irq, struct msi_msg *msg)
1909 {
1910 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1911 	int reg = dmar_msi_reg(iommu, irq);
1912 	unsigned long flag;
1913 
1914 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1915 	writel(msg->data, iommu->reg + reg + 4);
1916 	writel(msg->address_lo, iommu->reg + reg + 8);
1917 	writel(msg->address_hi, iommu->reg + reg + 12);
1918 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1919 }
1920 
1921 void dmar_msi_read(int irq, struct msi_msg *msg)
1922 {
1923 	struct intel_iommu *iommu = irq_get_handler_data(irq);
1924 	int reg = dmar_msi_reg(iommu, irq);
1925 	unsigned long flag;
1926 
1927 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1928 	msg->data = readl(iommu->reg + reg + 4);
1929 	msg->address_lo = readl(iommu->reg + reg + 8);
1930 	msg->address_hi = readl(iommu->reg + reg + 12);
1931 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1932 }
1933 
1934 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1935 		u8 fault_reason, u32 pasid, u16 source_id,
1936 		unsigned long long addr)
1937 {
1938 	const char *reason;
1939 	int fault_type;
1940 
1941 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1942 
1943 	if (fault_type == INTR_REMAP) {
1944 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1945 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1946 		       PCI_FUNC(source_id & 0xFF), addr >> 48,
1947 		       fault_reason, reason);
1948 
1949 		return 0;
1950 	}
1951 
1952 	if (pasid == INVALID_IOASID)
1953 		pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1954 		       type ? "DMA Read" : "DMA Write",
1955 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1956 		       PCI_FUNC(source_id & 0xFF), addr,
1957 		       fault_reason, reason);
1958 	else
1959 		pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1960 		       type ? "DMA Read" : "DMA Write", pasid,
1961 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1962 		       PCI_FUNC(source_id & 0xFF), addr,
1963 		       fault_reason, reason);
1964 
1965 	dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1966 
1967 	return 0;
1968 }
1969 
1970 #define PRIMARY_FAULT_REG_LEN (16)
1971 irqreturn_t dmar_fault(int irq, void *dev_id)
1972 {
1973 	struct intel_iommu *iommu = dev_id;
1974 	int reg, fault_index;
1975 	u32 fault_status;
1976 	unsigned long flag;
1977 	static DEFINE_RATELIMIT_STATE(rs,
1978 				      DEFAULT_RATELIMIT_INTERVAL,
1979 				      DEFAULT_RATELIMIT_BURST);
1980 
1981 	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1982 	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1983 	if (fault_status && __ratelimit(&rs))
1984 		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1985 
1986 	/* TBD: ignore advanced fault log currently */
1987 	if (!(fault_status & DMA_FSTS_PPF))
1988 		goto unlock_exit;
1989 
1990 	fault_index = dma_fsts_fault_record_index(fault_status);
1991 	reg = cap_fault_reg_offset(iommu->cap);
1992 	while (1) {
1993 		/* Disable printing, simply clear the fault when ratelimited */
1994 		bool ratelimited = !__ratelimit(&rs);
1995 		u8 fault_reason;
1996 		u16 source_id;
1997 		u64 guest_addr;
1998 		u32 pasid;
1999 		int type;
2000 		u32 data;
2001 		bool pasid_present;
2002 
2003 		/* highest 32 bits */
2004 		data = readl(iommu->reg + reg +
2005 				fault_index * PRIMARY_FAULT_REG_LEN + 12);
2006 		if (!(data & DMA_FRCD_F))
2007 			break;
2008 
2009 		if (!ratelimited) {
2010 			fault_reason = dma_frcd_fault_reason(data);
2011 			type = dma_frcd_type(data);
2012 
2013 			pasid = dma_frcd_pasid_value(data);
2014 			data = readl(iommu->reg + reg +
2015 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
2016 			source_id = dma_frcd_source_id(data);
2017 
2018 			pasid_present = dma_frcd_pasid_present(data);
2019 			guest_addr = dmar_readq(iommu->reg + reg +
2020 					fault_index * PRIMARY_FAULT_REG_LEN);
2021 			guest_addr = dma_frcd_page_addr(guest_addr);
2022 		}
2023 
2024 		/* clear the fault */
2025 		writel(DMA_FRCD_F, iommu->reg + reg +
2026 			fault_index * PRIMARY_FAULT_REG_LEN + 12);
2027 
2028 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2029 
2030 		if (!ratelimited)
2031 			/* Using pasid -1 if pasid is not present */
2032 			dmar_fault_do_one(iommu, type, fault_reason,
2033 					  pasid_present ? pasid : INVALID_IOASID,
2034 					  source_id, guest_addr);
2035 
2036 		fault_index++;
2037 		if (fault_index >= cap_num_fault_regs(iommu->cap))
2038 			fault_index = 0;
2039 		raw_spin_lock_irqsave(&iommu->register_lock, flag);
2040 	}
2041 
2042 	writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2043 	       iommu->reg + DMAR_FSTS_REG);
2044 
2045 unlock_exit:
2046 	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2047 	return IRQ_HANDLED;
2048 }
2049 
2050 int dmar_set_interrupt(struct intel_iommu *iommu)
2051 {
2052 	int irq, ret;
2053 
2054 	/*
2055 	 * Check if the fault interrupt is already initialized.
2056 	 */
2057 	if (iommu->irq)
2058 		return 0;
2059 
2060 	irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2061 	if (irq > 0) {
2062 		iommu->irq = irq;
2063 	} else {
2064 		pr_err("No free IRQ vectors\n");
2065 		return -EINVAL;
2066 	}
2067 
2068 	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2069 	if (ret)
2070 		pr_err("Can't request irq\n");
2071 	return ret;
2072 }
2073 
2074 int __init enable_drhd_fault_handling(void)
2075 {
2076 	struct dmar_drhd_unit *drhd;
2077 	struct intel_iommu *iommu;
2078 
2079 	/*
2080 	 * Enable fault control interrupt.
2081 	 */
2082 	for_each_iommu(iommu, drhd) {
2083 		u32 fault_status;
2084 		int ret = dmar_set_interrupt(iommu);
2085 
2086 		if (ret) {
2087 			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2088 			       (unsigned long long)drhd->reg_base_addr, ret);
2089 			return -1;
2090 		}
2091 
2092 		/*
2093 		 * Clear any previous faults.
2094 		 */
2095 		dmar_fault(iommu->irq, iommu);
2096 		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2097 		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2098 	}
2099 
2100 	return 0;
2101 }
2102 
2103 /*
2104  * Re-enable Queued Invalidation interface.
2105  */
2106 int dmar_reenable_qi(struct intel_iommu *iommu)
2107 {
2108 	if (!ecap_qis(iommu->ecap))
2109 		return -ENOENT;
2110 
2111 	if (!iommu->qi)
2112 		return -ENOENT;
2113 
2114 	/*
2115 	 * First disable queued invalidation.
2116 	 */
2117 	dmar_disable_qi(iommu);
2118 	/*
2119 	 * Then enable queued invalidation again. Since there is no pending
2120 	 * invalidation requests now, it's safe to re-enable queued
2121 	 * invalidation.
2122 	 */
2123 	__dmar_enable_qi(iommu);
2124 
2125 	return 0;
2126 }
2127 
2128 /*
2129  * Check interrupt remapping support in DMAR table description.
2130  */
2131 int __init dmar_ir_support(void)
2132 {
2133 	struct acpi_table_dmar *dmar;
2134 	dmar = (struct acpi_table_dmar *)dmar_tbl;
2135 	if (!dmar)
2136 		return 0;
2137 	return dmar->flags & 0x1;
2138 }
2139 
2140 /* Check whether DMAR units are in use */
2141 static inline bool dmar_in_use(void)
2142 {
2143 	return irq_remapping_enabled || intel_iommu_enabled;
2144 }
2145 
2146 static int __init dmar_free_unused_resources(void)
2147 {
2148 	struct dmar_drhd_unit *dmaru, *dmaru_n;
2149 
2150 	if (dmar_in_use())
2151 		return 0;
2152 
2153 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2154 		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2155 
2156 	down_write(&dmar_global_lock);
2157 	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2158 		list_del(&dmaru->list);
2159 		dmar_free_drhd(dmaru);
2160 	}
2161 	up_write(&dmar_global_lock);
2162 
2163 	return 0;
2164 }
2165 
2166 late_initcall(dmar_free_unused_resources);
2167 IOMMU_INIT_POST(detect_intel_iommu);
2168 
2169 /*
2170  * DMAR Hotplug Support
2171  * For more details, please refer to Intel(R) Virtualization Technology
2172  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2173  * "Remapping Hardware Unit Hot Plug".
2174  */
2175 static guid_t dmar_hp_guid =
2176 	GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2177 		  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2178 
2179 /*
2180  * Currently there's only one revision and BIOS will not check the revision id,
2181  * so use 0 for safety.
2182  */
2183 #define	DMAR_DSM_REV_ID			0
2184 #define	DMAR_DSM_FUNC_DRHD		1
2185 #define	DMAR_DSM_FUNC_ATSR		2
2186 #define	DMAR_DSM_FUNC_RHSA		3
2187 #define	DMAR_DSM_FUNC_SATC		4
2188 
2189 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2190 {
2191 	return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2192 }
2193 
2194 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2195 				  dmar_res_handler_t handler, void *arg)
2196 {
2197 	int ret = -ENODEV;
2198 	union acpi_object *obj;
2199 	struct acpi_dmar_header *start;
2200 	struct dmar_res_callback callback;
2201 	static int res_type[] = {
2202 		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2203 		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2204 		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2205 		[DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2206 	};
2207 
2208 	if (!dmar_detect_dsm(handle, func))
2209 		return 0;
2210 
2211 	obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2212 				      func, NULL, ACPI_TYPE_BUFFER);
2213 	if (!obj)
2214 		return -ENODEV;
2215 
2216 	memset(&callback, 0, sizeof(callback));
2217 	callback.cb[res_type[func]] = handler;
2218 	callback.arg[res_type[func]] = arg;
2219 	start = (struct acpi_dmar_header *)obj->buffer.pointer;
2220 	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2221 
2222 	ACPI_FREE(obj);
2223 
2224 	return ret;
2225 }
2226 
2227 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2228 {
2229 	int ret;
2230 	struct dmar_drhd_unit *dmaru;
2231 
2232 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2233 	if (!dmaru)
2234 		return -ENODEV;
2235 
2236 	ret = dmar_ir_hotplug(dmaru, true);
2237 	if (ret == 0)
2238 		ret = dmar_iommu_hotplug(dmaru, true);
2239 
2240 	return ret;
2241 }
2242 
2243 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2244 {
2245 	int i, ret;
2246 	struct device *dev;
2247 	struct dmar_drhd_unit *dmaru;
2248 
2249 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2250 	if (!dmaru)
2251 		return 0;
2252 
2253 	/*
2254 	 * All PCI devices managed by this unit should have been destroyed.
2255 	 */
2256 	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2257 		for_each_active_dev_scope(dmaru->devices,
2258 					  dmaru->devices_cnt, i, dev)
2259 			return -EBUSY;
2260 	}
2261 
2262 	ret = dmar_ir_hotplug(dmaru, false);
2263 	if (ret == 0)
2264 		ret = dmar_iommu_hotplug(dmaru, false);
2265 
2266 	return ret;
2267 }
2268 
2269 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2270 {
2271 	struct dmar_drhd_unit *dmaru;
2272 
2273 	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2274 	if (dmaru) {
2275 		list_del_rcu(&dmaru->list);
2276 		synchronize_rcu();
2277 		dmar_free_drhd(dmaru);
2278 	}
2279 
2280 	return 0;
2281 }
2282 
2283 static int dmar_hotplug_insert(acpi_handle handle)
2284 {
2285 	int ret;
2286 	int drhd_count = 0;
2287 
2288 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2289 				     &dmar_validate_one_drhd, (void *)1);
2290 	if (ret)
2291 		goto out;
2292 
2293 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2294 				     &dmar_parse_one_drhd, (void *)&drhd_count);
2295 	if (ret == 0 && drhd_count == 0) {
2296 		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2297 		goto out;
2298 	} else if (ret) {
2299 		goto release_drhd;
2300 	}
2301 
2302 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2303 				     &dmar_parse_one_rhsa, NULL);
2304 	if (ret)
2305 		goto release_drhd;
2306 
2307 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2308 				     &dmar_parse_one_atsr, NULL);
2309 	if (ret)
2310 		goto release_atsr;
2311 
2312 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2313 				     &dmar_hp_add_drhd, NULL);
2314 	if (!ret)
2315 		return 0;
2316 
2317 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2318 			       &dmar_hp_remove_drhd, NULL);
2319 release_atsr:
2320 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2321 			       &dmar_release_one_atsr, NULL);
2322 release_drhd:
2323 	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2324 			       &dmar_hp_release_drhd, NULL);
2325 out:
2326 	return ret;
2327 }
2328 
2329 static int dmar_hotplug_remove(acpi_handle handle)
2330 {
2331 	int ret;
2332 
2333 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2334 				     &dmar_check_one_atsr, NULL);
2335 	if (ret)
2336 		return ret;
2337 
2338 	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2339 				     &dmar_hp_remove_drhd, NULL);
2340 	if (ret == 0) {
2341 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2342 					       &dmar_release_one_atsr, NULL));
2343 		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2344 					       &dmar_hp_release_drhd, NULL));
2345 	} else {
2346 		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2347 				       &dmar_hp_add_drhd, NULL);
2348 	}
2349 
2350 	return ret;
2351 }
2352 
2353 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2354 				       void *context, void **retval)
2355 {
2356 	acpi_handle *phdl = retval;
2357 
2358 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2359 		*phdl = handle;
2360 		return AE_CTRL_TERMINATE;
2361 	}
2362 
2363 	return AE_OK;
2364 }
2365 
2366 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2367 {
2368 	int ret;
2369 	acpi_handle tmp = NULL;
2370 	acpi_status status;
2371 
2372 	if (!dmar_in_use())
2373 		return 0;
2374 
2375 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2376 		tmp = handle;
2377 	} else {
2378 		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2379 					     ACPI_UINT32_MAX,
2380 					     dmar_get_dsm_handle,
2381 					     NULL, NULL, &tmp);
2382 		if (ACPI_FAILURE(status)) {
2383 			pr_warn("Failed to locate _DSM method.\n");
2384 			return -ENXIO;
2385 		}
2386 	}
2387 	if (tmp == NULL)
2388 		return 0;
2389 
2390 	down_write(&dmar_global_lock);
2391 	if (insert)
2392 		ret = dmar_hotplug_insert(tmp);
2393 	else
2394 		ret = dmar_hotplug_remove(tmp);
2395 	up_write(&dmar_global_lock);
2396 
2397 	return ret;
2398 }
2399 
2400 int dmar_device_add(acpi_handle handle)
2401 {
2402 	return dmar_device_hotplug(handle, true);
2403 }
2404 
2405 int dmar_device_remove(acpi_handle handle)
2406 {
2407 	return dmar_device_hotplug(handle, false);
2408 }
2409 
2410 /*
2411  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2412  *
2413  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2414  * the ACPI DMAR table. This means that the platform boot firmware has made
2415  * sure no device can issue DMA outside of RMRR regions.
2416  */
2417 bool dmar_platform_optin(void)
2418 {
2419 	struct acpi_table_dmar *dmar;
2420 	acpi_status status;
2421 	bool ret;
2422 
2423 	status = acpi_get_table(ACPI_SIG_DMAR, 0,
2424 				(struct acpi_table_header **)&dmar);
2425 	if (ACPI_FAILURE(status))
2426 		return false;
2427 
2428 	ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2429 	acpi_put_table((struct acpi_table_header *)dmar);
2430 
2431 	return ret;
2432 }
2433 EXPORT_SYMBOL_GPL(dmar_platform_optin);
2434