1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Handle device page faults 4 * 5 * Copyright (C) 2020 ARM Ltd. 6 */ 7 8 #include <linux/iommu.h> 9 #include <linux/list.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 14 #include "iommu-priv.h" 15 16 /* 17 * Return the fault parameter of a device if it exists. Otherwise, return NULL. 18 * On a successful return, the caller takes a reference of this parameter and 19 * should put it after use by calling iopf_put_dev_fault_param(). 20 */ 21 static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) 22 { 23 struct dev_iommu *param = dev->iommu; 24 struct iommu_fault_param *fault_param; 25 26 rcu_read_lock(); 27 fault_param = rcu_dereference(param->fault_param); 28 if (fault_param && !refcount_inc_not_zero(&fault_param->users)) 29 fault_param = NULL; 30 rcu_read_unlock(); 31 32 return fault_param; 33 } 34 35 /* Caller must hold a reference of the fault parameter. */ 36 static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) 37 { 38 if (refcount_dec_and_test(&fault_param->users)) 39 kfree_rcu(fault_param, rcu); 40 } 41 42 static void __iopf_free_group(struct iopf_group *group) 43 { 44 struct iopf_fault *iopf, *next; 45 46 list_for_each_entry_safe(iopf, next, &group->faults, list) { 47 if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) 48 kfree(iopf); 49 } 50 51 /* Pair with iommu_report_device_fault(). */ 52 iopf_put_dev_fault_param(group->fault_param); 53 } 54 55 void iopf_free_group(struct iopf_group *group) 56 { 57 __iopf_free_group(group); 58 kfree(group); 59 } 60 EXPORT_SYMBOL_GPL(iopf_free_group); 61 62 static struct iommu_domain *get_domain_for_iopf(struct device *dev, 63 struct iommu_fault *fault) 64 { 65 struct iommu_domain *domain; 66 67 if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { 68 domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0); 69 if (IS_ERR(domain)) 70 domain = NULL; 71 } else { 72 domain = iommu_get_domain_for_dev(dev); 73 } 74 75 if (!domain || !domain->iopf_handler) { 76 dev_warn_ratelimited(dev, 77 "iopf (pasid %d) without domain attached or handler installed\n", 78 fault->prm.pasid); 79 80 return NULL; 81 } 82 83 return domain; 84 } 85 86 /* Non-last request of a group. Postpone until the last one. */ 87 static int report_partial_fault(struct iommu_fault_param *fault_param, 88 struct iommu_fault *fault) 89 { 90 struct iopf_fault *iopf; 91 92 iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); 93 if (!iopf) 94 return -ENOMEM; 95 96 iopf->fault = *fault; 97 98 mutex_lock(&fault_param->lock); 99 list_add(&iopf->list, &fault_param->partial); 100 mutex_unlock(&fault_param->lock); 101 102 return 0; 103 } 104 105 static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, 106 struct iopf_fault *evt, 107 struct iopf_group *abort_group) 108 { 109 struct iopf_fault *iopf, *next; 110 struct iopf_group *group; 111 112 group = kzalloc(sizeof(*group), GFP_KERNEL); 113 if (!group) { 114 /* 115 * We always need to construct the group as we need it to abort 116 * the request at the driver if it can't be handled. 117 */ 118 group = abort_group; 119 } 120 121 group->fault_param = iopf_param; 122 group->last_fault.fault = evt->fault; 123 INIT_LIST_HEAD(&group->faults); 124 INIT_LIST_HEAD(&group->pending_node); 125 list_add(&group->last_fault.list, &group->faults); 126 127 /* See if we have partial faults for this group */ 128 mutex_lock(&iopf_param->lock); 129 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 130 if (iopf->fault.prm.grpid == evt->fault.prm.grpid) 131 /* Insert *before* the last fault */ 132 list_move(&iopf->list, &group->faults); 133 } 134 list_add(&group->pending_node, &iopf_param->faults); 135 mutex_unlock(&iopf_param->lock); 136 137 return group; 138 } 139 140 /** 141 * iommu_report_device_fault() - Report fault event to device driver 142 * @dev: the device 143 * @evt: fault event data 144 * 145 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 146 * handler. If this function fails then ops->page_response() was called to 147 * complete evt if required. 148 * 149 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard 150 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't 151 * expect a response. It may be generated when disabling a PASID (issuing a 152 * PASID stop request) by some PCI devices. 153 * 154 * The PASID stop request is issued by the device driver before unbind(). Once 155 * it completes, no page request is generated for this PASID anymore and 156 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 157 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait 158 * for all outstanding page requests to come back with a response before 159 * completing the PASID stop request. Others do not wait for page responses, and 160 * instead issue this Stop Marker that tells us when the PASID can be 161 * reallocated. 162 * 163 * It is safe to discard the Stop Marker because it is an optimization. 164 * a. Page requests, which are posted requests, have been flushed to the IOMMU 165 * when the stop request completes. 166 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the 167 * PASID. 168 * 169 * So even though the Stop Marker might be issued by the device *after* the stop 170 * request completes, outstanding faults will have been dealt with by the time 171 * the PASID is freed. 172 * 173 * Any valid page fault will be eventually routed to an iommu domain and the 174 * page fault handler installed there will get called. The users of this 175 * handling framework should guarantee that the iommu domain could only be 176 * freed after the device has stopped generating page faults (or the iommu 177 * hardware has been set to block the page faults) and the pending page faults 178 * have been flushed. 179 */ 180 void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) 181 { 182 struct iommu_fault *fault = &evt->fault; 183 struct iommu_fault_param *iopf_param; 184 struct iopf_group abort_group = {}; 185 struct iopf_group *group; 186 187 iopf_param = iopf_get_dev_fault_param(dev); 188 if (WARN_ON(!iopf_param)) 189 return; 190 191 if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 192 report_partial_fault(iopf_param, fault); 193 iopf_put_dev_fault_param(iopf_param); 194 /* A request that is not the last does not need to be ack'd */ 195 } 196 197 /* 198 * This is the last page fault of a group. Allocate an iopf group and 199 * pass it to domain's page fault handler. The group holds a reference 200 * count of the fault parameter. It will be released after response or 201 * error path of this function. If an error is returned, the caller 202 * will send a response to the hardware. We need to clean up before 203 * leaving, otherwise partial faults will be stuck. 204 */ 205 group = iopf_group_alloc(iopf_param, evt, &abort_group); 206 if (group == &abort_group) 207 goto err_abort; 208 209 group->domain = get_domain_for_iopf(dev, fault); 210 if (!group->domain) 211 goto err_abort; 212 213 /* 214 * On success iopf_handler must call iopf_group_response() and 215 * iopf_free_group() 216 */ 217 if (group->domain->iopf_handler(group)) 218 goto err_abort; 219 220 return; 221 222 err_abort: 223 iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); 224 if (group == &abort_group) 225 __iopf_free_group(group); 226 else 227 iopf_free_group(group); 228 } 229 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 230 231 /** 232 * iopf_queue_flush_dev - Ensure that all queued faults have been processed 233 * @dev: the endpoint whose faults need to be flushed. 234 * 235 * The IOMMU driver calls this before releasing a PASID, to ensure that all 236 * pending faults for this PASID have been handled, and won't hit the address 237 * space of the next process that uses this PASID. The driver must make sure 238 * that no new fault is added to the queue. In particular it must flush its 239 * low-level queue before calling this function. 240 * 241 * Return: 0 on success and <0 on error. 242 */ 243 int iopf_queue_flush_dev(struct device *dev) 244 { 245 struct iommu_fault_param *iopf_param; 246 247 /* 248 * It's a driver bug to be here after iopf_queue_remove_device(). 249 * Therefore, it's safe to dereference the fault parameter without 250 * holding the lock. 251 */ 252 iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); 253 if (WARN_ON(!iopf_param)) 254 return -ENODEV; 255 256 flush_workqueue(iopf_param->queue->wq); 257 258 return 0; 259 } 260 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); 261 262 /** 263 * iopf_group_response - Respond a group of page faults 264 * @group: the group of faults with the same group id 265 * @status: the response code 266 */ 267 void iopf_group_response(struct iopf_group *group, 268 enum iommu_page_response_code status) 269 { 270 struct iommu_fault_param *fault_param = group->fault_param; 271 struct iopf_fault *iopf = &group->last_fault; 272 struct device *dev = group->fault_param->dev; 273 const struct iommu_ops *ops = dev_iommu_ops(dev); 274 struct iommu_page_response resp = { 275 .pasid = iopf->fault.prm.pasid, 276 .grpid = iopf->fault.prm.grpid, 277 .code = status, 278 }; 279 280 /* Only send response if there is a fault report pending */ 281 mutex_lock(&fault_param->lock); 282 if (!list_empty(&group->pending_node)) { 283 ops->page_response(dev, &group->last_fault, &resp); 284 list_del_init(&group->pending_node); 285 } 286 mutex_unlock(&fault_param->lock); 287 } 288 EXPORT_SYMBOL_GPL(iopf_group_response); 289 290 /** 291 * iopf_queue_discard_partial - Remove all pending partial fault 292 * @queue: the queue whose partial faults need to be discarded 293 * 294 * When the hardware queue overflows, last page faults in a group may have been 295 * lost and the IOMMU driver calls this to discard all partial faults. The 296 * driver shouldn't be adding new faults to this queue concurrently. 297 * 298 * Return: 0 on success and <0 on error. 299 */ 300 int iopf_queue_discard_partial(struct iopf_queue *queue) 301 { 302 struct iopf_fault *iopf, *next; 303 struct iommu_fault_param *iopf_param; 304 305 if (!queue) 306 return -EINVAL; 307 308 mutex_lock(&queue->lock); 309 list_for_each_entry(iopf_param, &queue->devices, queue_list) { 310 mutex_lock(&iopf_param->lock); 311 list_for_each_entry_safe(iopf, next, &iopf_param->partial, 312 list) { 313 list_del(&iopf->list); 314 kfree(iopf); 315 } 316 mutex_unlock(&iopf_param->lock); 317 } 318 mutex_unlock(&queue->lock); 319 return 0; 320 } 321 EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); 322 323 /** 324 * iopf_queue_add_device - Add producer to the fault queue 325 * @queue: IOPF queue 326 * @dev: device to add 327 * 328 * Return: 0 on success and <0 on error. 329 */ 330 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) 331 { 332 int ret = 0; 333 struct dev_iommu *param = dev->iommu; 334 struct iommu_fault_param *fault_param; 335 const struct iommu_ops *ops = dev_iommu_ops(dev); 336 337 if (!ops->page_response) 338 return -ENODEV; 339 340 mutex_lock(&queue->lock); 341 mutex_lock(¶m->lock); 342 if (rcu_dereference_check(param->fault_param, 343 lockdep_is_held(¶m->lock))) { 344 ret = -EBUSY; 345 goto done_unlock; 346 } 347 348 fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL); 349 if (!fault_param) { 350 ret = -ENOMEM; 351 goto done_unlock; 352 } 353 354 mutex_init(&fault_param->lock); 355 INIT_LIST_HEAD(&fault_param->faults); 356 INIT_LIST_HEAD(&fault_param->partial); 357 fault_param->dev = dev; 358 refcount_set(&fault_param->users, 1); 359 list_add(&fault_param->queue_list, &queue->devices); 360 fault_param->queue = queue; 361 362 rcu_assign_pointer(param->fault_param, fault_param); 363 364 done_unlock: 365 mutex_unlock(¶m->lock); 366 mutex_unlock(&queue->lock); 367 368 return ret; 369 } 370 EXPORT_SYMBOL_GPL(iopf_queue_add_device); 371 372 /** 373 * iopf_queue_remove_device - Remove producer from fault queue 374 * @queue: IOPF queue 375 * @dev: device to remove 376 * 377 * Removing a device from an iopf_queue. It's recommended to follow these 378 * steps when removing a device: 379 * 380 * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware 381 * and flush any hardware page request queues. This should be done before 382 * calling into this helper. 383 * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding 384 * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should 385 * not retry. This helper function handles this. 386 * - Disable PRI on the device: After calling this helper, the caller could 387 * then disable PRI on the device. 388 * 389 * Calling iopf_queue_remove_device() essentially disassociates the device. 390 * The fault_param might still exist, but iommu_page_response() will do 391 * nothing. The device fault parameter reference count has been properly 392 * passed from iommu_report_device_fault() to the fault handling work, and 393 * will eventually be released after iommu_page_response(). 394 */ 395 void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) 396 { 397 struct iopf_fault *partial_iopf; 398 struct iopf_fault *next; 399 struct iopf_group *group, *temp; 400 struct dev_iommu *param = dev->iommu; 401 struct iommu_fault_param *fault_param; 402 const struct iommu_ops *ops = dev_iommu_ops(dev); 403 404 mutex_lock(&queue->lock); 405 mutex_lock(¶m->lock); 406 fault_param = rcu_dereference_check(param->fault_param, 407 lockdep_is_held(¶m->lock)); 408 409 if (WARN_ON(!fault_param || fault_param->queue != queue)) 410 goto unlock; 411 412 mutex_lock(&fault_param->lock); 413 list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) 414 kfree(partial_iopf); 415 416 list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { 417 struct iopf_fault *iopf = &group->last_fault; 418 struct iommu_page_response resp = { 419 .pasid = iopf->fault.prm.pasid, 420 .grpid = iopf->fault.prm.grpid, 421 .code = IOMMU_PAGE_RESP_INVALID 422 }; 423 424 ops->page_response(dev, iopf, &resp); 425 list_del_init(&group->pending_node); 426 } 427 mutex_unlock(&fault_param->lock); 428 429 list_del(&fault_param->queue_list); 430 431 /* dec the ref owned by iopf_queue_add_device() */ 432 rcu_assign_pointer(param->fault_param, NULL); 433 iopf_put_dev_fault_param(fault_param); 434 unlock: 435 mutex_unlock(¶m->lock); 436 mutex_unlock(&queue->lock); 437 } 438 EXPORT_SYMBOL_GPL(iopf_queue_remove_device); 439 440 /** 441 * iopf_queue_alloc - Allocate and initialize a fault queue 442 * @name: a unique string identifying the queue (for workqueue) 443 * 444 * Return: the queue on success and NULL on error. 445 */ 446 struct iopf_queue *iopf_queue_alloc(const char *name) 447 { 448 struct iopf_queue *queue; 449 450 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 451 if (!queue) 452 return NULL; 453 454 /* 455 * The WQ is unordered because the low-level handler enqueues faults by 456 * group. PRI requests within a group have to be ordered, but once 457 * that's dealt with, the high-level function can handle groups out of 458 * order. 459 */ 460 queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); 461 if (!queue->wq) { 462 kfree(queue); 463 return NULL; 464 } 465 466 INIT_LIST_HEAD(&queue->devices); 467 mutex_init(&queue->lock); 468 469 return queue; 470 } 471 EXPORT_SYMBOL_GPL(iopf_queue_alloc); 472 473 /** 474 * iopf_queue_free - Free IOPF queue 475 * @queue: queue to free 476 * 477 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or 478 * adding/removing devices on this queue anymore. 479 */ 480 void iopf_queue_free(struct iopf_queue *queue) 481 { 482 struct iommu_fault_param *iopf_param, *next; 483 484 if (!queue) 485 return; 486 487 list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) 488 iopf_queue_remove_device(queue, iopf_param->dev); 489 490 destroy_workqueue(queue->wq); 491 kfree(queue); 492 } 493 EXPORT_SYMBOL_GPL(iopf_queue_free); 494