1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Handle device page faults 4 * 5 * Copyright (C) 2020 ARM Ltd. 6 */ 7 8 #include <linux/iommu.h> 9 #include <linux/list.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 14 #include "iommu-priv.h" 15 16 /* 17 * Return the fault parameter of a device if it exists. Otherwise, return NULL. 18 * On a successful return, the caller takes a reference of this parameter and 19 * should put it after use by calling iopf_put_dev_fault_param(). 20 */ 21 static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) 22 { 23 struct dev_iommu *param = dev->iommu; 24 struct iommu_fault_param *fault_param; 25 26 rcu_read_lock(); 27 fault_param = rcu_dereference(param->fault_param); 28 if (fault_param && !refcount_inc_not_zero(&fault_param->users)) 29 fault_param = NULL; 30 rcu_read_unlock(); 31 32 return fault_param; 33 } 34 35 /* Caller must hold a reference of the fault parameter. */ 36 static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) 37 { 38 if (refcount_dec_and_test(&fault_param->users)) 39 kfree_rcu(fault_param, rcu); 40 } 41 42 static void __iopf_free_group(struct iopf_group *group) 43 { 44 struct iopf_fault *iopf, *next; 45 46 list_for_each_entry_safe(iopf, next, &group->faults, list) { 47 if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) 48 kfree(iopf); 49 } 50 51 /* Pair with iommu_report_device_fault(). */ 52 iopf_put_dev_fault_param(group->fault_param); 53 } 54 55 void iopf_free_group(struct iopf_group *group) 56 { 57 __iopf_free_group(group); 58 kfree(group); 59 } 60 EXPORT_SYMBOL_GPL(iopf_free_group); 61 62 /* Non-last request of a group. Postpone until the last one. */ 63 static int report_partial_fault(struct iommu_fault_param *fault_param, 64 struct iommu_fault *fault) 65 { 66 struct iopf_fault *iopf; 67 68 iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); 69 if (!iopf) 70 return -ENOMEM; 71 72 iopf->fault = *fault; 73 74 mutex_lock(&fault_param->lock); 75 list_add(&iopf->list, &fault_param->partial); 76 mutex_unlock(&fault_param->lock); 77 78 return 0; 79 } 80 81 static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, 82 struct iopf_fault *evt, 83 struct iopf_group *abort_group) 84 { 85 struct iopf_fault *iopf, *next; 86 struct iopf_group *group; 87 88 group = kzalloc(sizeof(*group), GFP_KERNEL); 89 if (!group) { 90 /* 91 * We always need to construct the group as we need it to abort 92 * the request at the driver if it can't be handled. 93 */ 94 group = abort_group; 95 } 96 97 group->fault_param = iopf_param; 98 group->last_fault.fault = evt->fault; 99 INIT_LIST_HEAD(&group->faults); 100 INIT_LIST_HEAD(&group->pending_node); 101 list_add(&group->last_fault.list, &group->faults); 102 103 /* See if we have partial faults for this group */ 104 mutex_lock(&iopf_param->lock); 105 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 106 if (iopf->fault.prm.grpid == evt->fault.prm.grpid) 107 /* Insert *before* the last fault */ 108 list_move(&iopf->list, &group->faults); 109 } 110 list_add(&group->pending_node, &iopf_param->faults); 111 mutex_unlock(&iopf_param->lock); 112 113 group->fault_count = list_count_nodes(&group->faults); 114 115 return group; 116 } 117 118 /** 119 * iommu_report_device_fault() - Report fault event to device driver 120 * @dev: the device 121 * @evt: fault event data 122 * 123 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 124 * handler. If this function fails then ops->page_response() was called to 125 * complete evt if required. 126 * 127 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard 128 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't 129 * expect a response. It may be generated when disabling a PASID (issuing a 130 * PASID stop request) by some PCI devices. 131 * 132 * The PASID stop request is issued by the device driver before unbind(). Once 133 * it completes, no page request is generated for this PASID anymore and 134 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 135 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait 136 * for all outstanding page requests to come back with a response before 137 * completing the PASID stop request. Others do not wait for page responses, and 138 * instead issue this Stop Marker that tells us when the PASID can be 139 * reallocated. 140 * 141 * It is safe to discard the Stop Marker because it is an optimization. 142 * a. Page requests, which are posted requests, have been flushed to the IOMMU 143 * when the stop request completes. 144 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the 145 * PASID. 146 * 147 * So even though the Stop Marker might be issued by the device *after* the stop 148 * request completes, outstanding faults will have been dealt with by the time 149 * the PASID is freed. 150 * 151 * Any valid page fault will be eventually routed to an iommu domain and the 152 * page fault handler installed there will get called. The users of this 153 * handling framework should guarantee that the iommu domain could only be 154 * freed after the device has stopped generating page faults (or the iommu 155 * hardware has been set to block the page faults) and the pending page faults 156 * have been flushed. 157 */ 158 void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) 159 { 160 struct iommu_fault *fault = &evt->fault; 161 struct iommu_fault_param *iopf_param; 162 struct iopf_group abort_group = {}; 163 struct iopf_group *group; 164 165 iopf_param = iopf_get_dev_fault_param(dev); 166 if (WARN_ON(!iopf_param)) 167 return; 168 169 if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 170 report_partial_fault(iopf_param, fault); 171 iopf_put_dev_fault_param(iopf_param); 172 /* A request that is not the last does not need to be ack'd */ 173 return; 174 } 175 176 /* 177 * This is the last page fault of a group. Allocate an iopf group and 178 * pass it to domain's page fault handler. The group holds a reference 179 * count of the fault parameter. It will be released after response or 180 * error path of this function. If an error is returned, the caller 181 * will send a response to the hardware. We need to clean up before 182 * leaving, otherwise partial faults will be stuck. 183 */ 184 group = iopf_group_alloc(iopf_param, evt, &abort_group); 185 if (group == &abort_group) 186 goto err_abort; 187 188 if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { 189 group->attach_handle = iommu_attach_handle_get(dev->iommu_group, 190 fault->prm.pasid, 191 0); 192 if (IS_ERR(group->attach_handle)) { 193 const struct iommu_ops *ops = dev_iommu_ops(dev); 194 195 if (!ops->user_pasid_table) 196 goto err_abort; 197 198 /* 199 * The iommu driver for this device supports user- 200 * managed PASID table. Therefore page faults for 201 * any PASID should go through the NESTING domain 202 * attached to the device RID. 203 */ 204 group->attach_handle = 205 iommu_attach_handle_get(dev->iommu_group, 206 IOMMU_NO_PASID, 207 IOMMU_DOMAIN_NESTED); 208 if (IS_ERR(group->attach_handle)) 209 goto err_abort; 210 } 211 } else { 212 group->attach_handle = 213 iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); 214 if (IS_ERR(group->attach_handle)) 215 goto err_abort; 216 } 217 218 if (!group->attach_handle->domain->iopf_handler) 219 goto err_abort; 220 221 /* 222 * On success iopf_handler must call iopf_group_response() and 223 * iopf_free_group() 224 */ 225 if (group->attach_handle->domain->iopf_handler(group)) 226 goto err_abort; 227 228 return; 229 230 err_abort: 231 dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", 232 fault->prm.pasid); 233 iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); 234 if (group == &abort_group) 235 __iopf_free_group(group); 236 else 237 iopf_free_group(group); 238 } 239 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 240 241 /** 242 * iopf_queue_flush_dev - Ensure that all queued faults have been processed 243 * @dev: the endpoint whose faults need to be flushed. 244 * 245 * The IOMMU driver calls this before releasing a PASID, to ensure that all 246 * pending faults for this PASID have been handled, and won't hit the address 247 * space of the next process that uses this PASID. The driver must make sure 248 * that no new fault is added to the queue. In particular it must flush its 249 * low-level queue before calling this function. 250 * 251 * Return: 0 on success and <0 on error. 252 */ 253 int iopf_queue_flush_dev(struct device *dev) 254 { 255 struct iommu_fault_param *iopf_param; 256 257 /* 258 * It's a driver bug to be here after iopf_queue_remove_device(). 259 * Therefore, it's safe to dereference the fault parameter without 260 * holding the lock. 261 */ 262 iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); 263 if (WARN_ON(!iopf_param)) 264 return -ENODEV; 265 266 flush_workqueue(iopf_param->queue->wq); 267 268 return 0; 269 } 270 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); 271 272 /** 273 * iopf_group_response - Respond a group of page faults 274 * @group: the group of faults with the same group id 275 * @status: the response code 276 */ 277 void iopf_group_response(struct iopf_group *group, 278 enum iommu_page_response_code status) 279 { 280 struct iommu_fault_param *fault_param = group->fault_param; 281 struct iopf_fault *iopf = &group->last_fault; 282 struct device *dev = group->fault_param->dev; 283 const struct iommu_ops *ops = dev_iommu_ops(dev); 284 struct iommu_page_response resp = { 285 .pasid = iopf->fault.prm.pasid, 286 .grpid = iopf->fault.prm.grpid, 287 .code = status, 288 }; 289 290 /* Only send response if there is a fault report pending */ 291 mutex_lock(&fault_param->lock); 292 if (!list_empty(&group->pending_node)) { 293 ops->page_response(dev, &group->last_fault, &resp); 294 list_del_init(&group->pending_node); 295 } 296 mutex_unlock(&fault_param->lock); 297 } 298 EXPORT_SYMBOL_GPL(iopf_group_response); 299 300 /** 301 * iopf_queue_discard_partial - Remove all pending partial fault 302 * @queue: the queue whose partial faults need to be discarded 303 * 304 * When the hardware queue overflows, last page faults in a group may have been 305 * lost and the IOMMU driver calls this to discard all partial faults. The 306 * driver shouldn't be adding new faults to this queue concurrently. 307 * 308 * Return: 0 on success and <0 on error. 309 */ 310 int iopf_queue_discard_partial(struct iopf_queue *queue) 311 { 312 struct iopf_fault *iopf, *next; 313 struct iommu_fault_param *iopf_param; 314 315 if (!queue) 316 return -EINVAL; 317 318 mutex_lock(&queue->lock); 319 list_for_each_entry(iopf_param, &queue->devices, queue_list) { 320 mutex_lock(&iopf_param->lock); 321 list_for_each_entry_safe(iopf, next, &iopf_param->partial, 322 list) { 323 list_del(&iopf->list); 324 kfree(iopf); 325 } 326 mutex_unlock(&iopf_param->lock); 327 } 328 mutex_unlock(&queue->lock); 329 return 0; 330 } 331 EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); 332 333 /** 334 * iopf_queue_add_device - Add producer to the fault queue 335 * @queue: IOPF queue 336 * @dev: device to add 337 * 338 * Return: 0 on success and <0 on error. 339 */ 340 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) 341 { 342 int ret = 0; 343 struct dev_iommu *param = dev->iommu; 344 struct iommu_fault_param *fault_param; 345 const struct iommu_ops *ops = dev_iommu_ops(dev); 346 347 if (!ops->page_response) 348 return -ENODEV; 349 350 mutex_lock(&queue->lock); 351 mutex_lock(¶m->lock); 352 if (rcu_dereference_check(param->fault_param, 353 lockdep_is_held(¶m->lock))) { 354 ret = -EBUSY; 355 goto done_unlock; 356 } 357 358 fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL); 359 if (!fault_param) { 360 ret = -ENOMEM; 361 goto done_unlock; 362 } 363 364 mutex_init(&fault_param->lock); 365 INIT_LIST_HEAD(&fault_param->faults); 366 INIT_LIST_HEAD(&fault_param->partial); 367 fault_param->dev = dev; 368 refcount_set(&fault_param->users, 1); 369 list_add(&fault_param->queue_list, &queue->devices); 370 fault_param->queue = queue; 371 372 rcu_assign_pointer(param->fault_param, fault_param); 373 374 done_unlock: 375 mutex_unlock(¶m->lock); 376 mutex_unlock(&queue->lock); 377 378 return ret; 379 } 380 EXPORT_SYMBOL_GPL(iopf_queue_add_device); 381 382 /** 383 * iopf_queue_remove_device - Remove producer from fault queue 384 * @queue: IOPF queue 385 * @dev: device to remove 386 * 387 * Removing a device from an iopf_queue. It's recommended to follow these 388 * steps when removing a device: 389 * 390 * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware 391 * and flush any hardware page request queues. This should be done before 392 * calling into this helper. 393 * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding 394 * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should 395 * not retry. This helper function handles this. 396 * - Disable PRI on the device: After calling this helper, the caller could 397 * then disable PRI on the device. 398 * 399 * Calling iopf_queue_remove_device() essentially disassociates the device. 400 * The fault_param might still exist, but iommu_page_response() will do 401 * nothing. The device fault parameter reference count has been properly 402 * passed from iommu_report_device_fault() to the fault handling work, and 403 * will eventually be released after iommu_page_response(). 404 */ 405 void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) 406 { 407 struct iopf_fault *partial_iopf; 408 struct iopf_fault *next; 409 struct iopf_group *group, *temp; 410 struct dev_iommu *param = dev->iommu; 411 struct iommu_fault_param *fault_param; 412 const struct iommu_ops *ops = dev_iommu_ops(dev); 413 414 mutex_lock(&queue->lock); 415 mutex_lock(¶m->lock); 416 fault_param = rcu_dereference_check(param->fault_param, 417 lockdep_is_held(¶m->lock)); 418 419 if (WARN_ON(!fault_param || fault_param->queue != queue)) 420 goto unlock; 421 422 mutex_lock(&fault_param->lock); 423 list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) 424 kfree(partial_iopf); 425 426 list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { 427 struct iopf_fault *iopf = &group->last_fault; 428 struct iommu_page_response resp = { 429 .pasid = iopf->fault.prm.pasid, 430 .grpid = iopf->fault.prm.grpid, 431 .code = IOMMU_PAGE_RESP_INVALID 432 }; 433 434 ops->page_response(dev, iopf, &resp); 435 list_del_init(&group->pending_node); 436 } 437 mutex_unlock(&fault_param->lock); 438 439 list_del(&fault_param->queue_list); 440 441 /* dec the ref owned by iopf_queue_add_device() */ 442 rcu_assign_pointer(param->fault_param, NULL); 443 iopf_put_dev_fault_param(fault_param); 444 unlock: 445 mutex_unlock(¶m->lock); 446 mutex_unlock(&queue->lock); 447 } 448 EXPORT_SYMBOL_GPL(iopf_queue_remove_device); 449 450 /** 451 * iopf_queue_alloc - Allocate and initialize a fault queue 452 * @name: a unique string identifying the queue (for workqueue) 453 * 454 * Return: the queue on success and NULL on error. 455 */ 456 struct iopf_queue *iopf_queue_alloc(const char *name) 457 { 458 struct iopf_queue *queue; 459 460 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 461 if (!queue) 462 return NULL; 463 464 /* 465 * The WQ is unordered because the low-level handler enqueues faults by 466 * group. PRI requests within a group have to be ordered, but once 467 * that's dealt with, the high-level function can handle groups out of 468 * order. 469 */ 470 queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); 471 if (!queue->wq) { 472 kfree(queue); 473 return NULL; 474 } 475 476 INIT_LIST_HEAD(&queue->devices); 477 mutex_init(&queue->lock); 478 479 return queue; 480 } 481 EXPORT_SYMBOL_GPL(iopf_queue_alloc); 482 483 /** 484 * iopf_queue_free - Free IOPF queue 485 * @queue: queue to free 486 * 487 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or 488 * adding/removing devices on this queue anymore. 489 */ 490 void iopf_queue_free(struct iopf_queue *queue) 491 { 492 struct iommu_fault_param *iopf_param, *next; 493 494 if (!queue) 495 return; 496 497 list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) 498 iopf_queue_remove_device(queue, iopf_param->dev); 499 500 destroy_workqueue(queue->wq); 501 kfree(queue); 502 } 503 EXPORT_SYMBOL_GPL(iopf_queue_free); 504