1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Handle device page faults 4 * 5 * Copyright (C) 2020 ARM Ltd. 6 */ 7 8 #include <linux/iommu.h> 9 #include <linux/list.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 14 #include "iommu-priv.h" 15 16 /* 17 * Return the fault parameter of a device if it exists. Otherwise, return NULL. 18 * On a successful return, the caller takes a reference of this parameter and 19 * should put it after use by calling iopf_put_dev_fault_param(). 20 */ 21 static struct iommu_fault_param *iopf_get_dev_fault_param(struct device *dev) 22 { 23 struct dev_iommu *param = dev->iommu; 24 struct iommu_fault_param *fault_param; 25 26 rcu_read_lock(); 27 fault_param = rcu_dereference(param->fault_param); 28 if (fault_param && !refcount_inc_not_zero(&fault_param->users)) 29 fault_param = NULL; 30 rcu_read_unlock(); 31 32 return fault_param; 33 } 34 35 /* Caller must hold a reference of the fault parameter. */ 36 static void iopf_put_dev_fault_param(struct iommu_fault_param *fault_param) 37 { 38 if (refcount_dec_and_test(&fault_param->users)) 39 kfree_rcu(fault_param, rcu); 40 } 41 42 static void __iopf_free_group(struct iopf_group *group) 43 { 44 struct iopf_fault *iopf, *next; 45 46 list_for_each_entry_safe(iopf, next, &group->faults, list) { 47 if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) 48 kfree(iopf); 49 } 50 51 /* Pair with iommu_report_device_fault(). */ 52 iopf_put_dev_fault_param(group->fault_param); 53 } 54 55 void iopf_free_group(struct iopf_group *group) 56 { 57 __iopf_free_group(group); 58 kfree(group); 59 } 60 EXPORT_SYMBOL_GPL(iopf_free_group); 61 62 /* Non-last request of a group. Postpone until the last one. */ 63 static int report_partial_fault(struct iommu_fault_param *fault_param, 64 struct iommu_fault *fault) 65 { 66 struct iopf_fault *iopf; 67 68 iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); 69 if (!iopf) 70 return -ENOMEM; 71 72 iopf->fault = *fault; 73 74 mutex_lock(&fault_param->lock); 75 list_add(&iopf->list, &fault_param->partial); 76 mutex_unlock(&fault_param->lock); 77 78 return 0; 79 } 80 81 static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, 82 struct iopf_fault *evt, 83 struct iopf_group *abort_group) 84 { 85 struct iopf_fault *iopf, *next; 86 struct iopf_group *group; 87 88 group = kzalloc(sizeof(*group), GFP_KERNEL); 89 if (!group) { 90 /* 91 * We always need to construct the group as we need it to abort 92 * the request at the driver if it can't be handled. 93 */ 94 group = abort_group; 95 } 96 97 group->fault_param = iopf_param; 98 group->last_fault.fault = evt->fault; 99 INIT_LIST_HEAD(&group->faults); 100 INIT_LIST_HEAD(&group->pending_node); 101 list_add(&group->last_fault.list, &group->faults); 102 103 /* See if we have partial faults for this group */ 104 mutex_lock(&iopf_param->lock); 105 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 106 if (iopf->fault.prm.grpid == evt->fault.prm.grpid) 107 /* Insert *before* the last fault */ 108 list_move(&iopf->list, &group->faults); 109 } 110 list_add(&group->pending_node, &iopf_param->faults); 111 mutex_unlock(&iopf_param->lock); 112 113 group->fault_count = list_count_nodes(&group->faults); 114 115 return group; 116 } 117 118 /** 119 * iommu_report_device_fault() - Report fault event to device driver 120 * @dev: the device 121 * @evt: fault event data 122 * 123 * Called by IOMMU drivers when a fault is detected, typically in a threaded IRQ 124 * handler. If this function fails then ops->page_response() was called to 125 * complete evt if required. 126 * 127 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard 128 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't 129 * expect a response. It may be generated when disabling a PASID (issuing a 130 * PASID stop request) by some PCI devices. 131 * 132 * The PASID stop request is issued by the device driver before unbind(). Once 133 * it completes, no page request is generated for this PASID anymore and 134 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 135 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait 136 * for all outstanding page requests to come back with a response before 137 * completing the PASID stop request. Others do not wait for page responses, and 138 * instead issue this Stop Marker that tells us when the PASID can be 139 * reallocated. 140 * 141 * It is safe to discard the Stop Marker because it is an optimization. 142 * a. Page requests, which are posted requests, have been flushed to the IOMMU 143 * when the stop request completes. 144 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the 145 * PASID. 146 * 147 * So even though the Stop Marker might be issued by the device *after* the stop 148 * request completes, outstanding faults will have been dealt with by the time 149 * the PASID is freed. 150 * 151 * Any valid page fault will be eventually routed to an iommu domain and the 152 * page fault handler installed there will get called. The users of this 153 * handling framework should guarantee that the iommu domain could only be 154 * freed after the device has stopped generating page faults (or the iommu 155 * hardware has been set to block the page faults) and the pending page faults 156 * have been flushed. 157 */ 158 void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) 159 { 160 struct iommu_fault *fault = &evt->fault; 161 struct iommu_fault_param *iopf_param; 162 struct iopf_group abort_group = {}; 163 struct iopf_group *group; 164 165 iopf_param = iopf_get_dev_fault_param(dev); 166 if (WARN_ON(!iopf_param)) 167 return; 168 169 if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 170 report_partial_fault(iopf_param, fault); 171 iopf_put_dev_fault_param(iopf_param); 172 /* A request that is not the last does not need to be ack'd */ 173 } 174 175 /* 176 * This is the last page fault of a group. Allocate an iopf group and 177 * pass it to domain's page fault handler. The group holds a reference 178 * count of the fault parameter. It will be released after response or 179 * error path of this function. If an error is returned, the caller 180 * will send a response to the hardware. We need to clean up before 181 * leaving, otherwise partial faults will be stuck. 182 */ 183 group = iopf_group_alloc(iopf_param, evt, &abort_group); 184 if (group == &abort_group) 185 goto err_abort; 186 187 if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { 188 group->attach_handle = iommu_attach_handle_get(dev->iommu_group, 189 fault->prm.pasid, 190 0); 191 if (IS_ERR(group->attach_handle)) { 192 const struct iommu_ops *ops = dev_iommu_ops(dev); 193 194 if (!ops->user_pasid_table) 195 goto err_abort; 196 197 /* 198 * The iommu driver for this device supports user- 199 * managed PASID table. Therefore page faults for 200 * any PASID should go through the NESTING domain 201 * attached to the device RID. 202 */ 203 group->attach_handle = 204 iommu_attach_handle_get(dev->iommu_group, 205 IOMMU_NO_PASID, 206 IOMMU_DOMAIN_NESTED); 207 if (IS_ERR(group->attach_handle)) 208 goto err_abort; 209 } 210 } else { 211 group->attach_handle = 212 iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); 213 if (IS_ERR(group->attach_handle)) 214 goto err_abort; 215 } 216 217 if (!group->attach_handle->domain->iopf_handler) 218 goto err_abort; 219 220 /* 221 * On success iopf_handler must call iopf_group_response() and 222 * iopf_free_group() 223 */ 224 if (group->attach_handle->domain->iopf_handler(group)) 225 goto err_abort; 226 227 return; 228 229 err_abort: 230 dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", 231 fault->prm.pasid); 232 iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); 233 if (group == &abort_group) 234 __iopf_free_group(group); 235 else 236 iopf_free_group(group); 237 } 238 EXPORT_SYMBOL_GPL(iommu_report_device_fault); 239 240 /** 241 * iopf_queue_flush_dev - Ensure that all queued faults have been processed 242 * @dev: the endpoint whose faults need to be flushed. 243 * 244 * The IOMMU driver calls this before releasing a PASID, to ensure that all 245 * pending faults for this PASID have been handled, and won't hit the address 246 * space of the next process that uses this PASID. The driver must make sure 247 * that no new fault is added to the queue. In particular it must flush its 248 * low-level queue before calling this function. 249 * 250 * Return: 0 on success and <0 on error. 251 */ 252 int iopf_queue_flush_dev(struct device *dev) 253 { 254 struct iommu_fault_param *iopf_param; 255 256 /* 257 * It's a driver bug to be here after iopf_queue_remove_device(). 258 * Therefore, it's safe to dereference the fault parameter without 259 * holding the lock. 260 */ 261 iopf_param = rcu_dereference_check(dev->iommu->fault_param, true); 262 if (WARN_ON(!iopf_param)) 263 return -ENODEV; 264 265 flush_workqueue(iopf_param->queue->wq); 266 267 return 0; 268 } 269 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); 270 271 /** 272 * iopf_group_response - Respond a group of page faults 273 * @group: the group of faults with the same group id 274 * @status: the response code 275 */ 276 void iopf_group_response(struct iopf_group *group, 277 enum iommu_page_response_code status) 278 { 279 struct iommu_fault_param *fault_param = group->fault_param; 280 struct iopf_fault *iopf = &group->last_fault; 281 struct device *dev = group->fault_param->dev; 282 const struct iommu_ops *ops = dev_iommu_ops(dev); 283 struct iommu_page_response resp = { 284 .pasid = iopf->fault.prm.pasid, 285 .grpid = iopf->fault.prm.grpid, 286 .code = status, 287 }; 288 289 /* Only send response if there is a fault report pending */ 290 mutex_lock(&fault_param->lock); 291 if (!list_empty(&group->pending_node)) { 292 ops->page_response(dev, &group->last_fault, &resp); 293 list_del_init(&group->pending_node); 294 } 295 mutex_unlock(&fault_param->lock); 296 } 297 EXPORT_SYMBOL_GPL(iopf_group_response); 298 299 /** 300 * iopf_queue_discard_partial - Remove all pending partial fault 301 * @queue: the queue whose partial faults need to be discarded 302 * 303 * When the hardware queue overflows, last page faults in a group may have been 304 * lost and the IOMMU driver calls this to discard all partial faults. The 305 * driver shouldn't be adding new faults to this queue concurrently. 306 * 307 * Return: 0 on success and <0 on error. 308 */ 309 int iopf_queue_discard_partial(struct iopf_queue *queue) 310 { 311 struct iopf_fault *iopf, *next; 312 struct iommu_fault_param *iopf_param; 313 314 if (!queue) 315 return -EINVAL; 316 317 mutex_lock(&queue->lock); 318 list_for_each_entry(iopf_param, &queue->devices, queue_list) { 319 mutex_lock(&iopf_param->lock); 320 list_for_each_entry_safe(iopf, next, &iopf_param->partial, 321 list) { 322 list_del(&iopf->list); 323 kfree(iopf); 324 } 325 mutex_unlock(&iopf_param->lock); 326 } 327 mutex_unlock(&queue->lock); 328 return 0; 329 } 330 EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); 331 332 /** 333 * iopf_queue_add_device - Add producer to the fault queue 334 * @queue: IOPF queue 335 * @dev: device to add 336 * 337 * Return: 0 on success and <0 on error. 338 */ 339 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) 340 { 341 int ret = 0; 342 struct dev_iommu *param = dev->iommu; 343 struct iommu_fault_param *fault_param; 344 const struct iommu_ops *ops = dev_iommu_ops(dev); 345 346 if (!ops->page_response) 347 return -ENODEV; 348 349 mutex_lock(&queue->lock); 350 mutex_lock(¶m->lock); 351 if (rcu_dereference_check(param->fault_param, 352 lockdep_is_held(¶m->lock))) { 353 ret = -EBUSY; 354 goto done_unlock; 355 } 356 357 fault_param = kzalloc(sizeof(*fault_param), GFP_KERNEL); 358 if (!fault_param) { 359 ret = -ENOMEM; 360 goto done_unlock; 361 } 362 363 mutex_init(&fault_param->lock); 364 INIT_LIST_HEAD(&fault_param->faults); 365 INIT_LIST_HEAD(&fault_param->partial); 366 fault_param->dev = dev; 367 refcount_set(&fault_param->users, 1); 368 list_add(&fault_param->queue_list, &queue->devices); 369 fault_param->queue = queue; 370 371 rcu_assign_pointer(param->fault_param, fault_param); 372 373 done_unlock: 374 mutex_unlock(¶m->lock); 375 mutex_unlock(&queue->lock); 376 377 return ret; 378 } 379 EXPORT_SYMBOL_GPL(iopf_queue_add_device); 380 381 /** 382 * iopf_queue_remove_device - Remove producer from fault queue 383 * @queue: IOPF queue 384 * @dev: device to remove 385 * 386 * Removing a device from an iopf_queue. It's recommended to follow these 387 * steps when removing a device: 388 * 389 * - Disable new PRI reception: Turn off PRI generation in the IOMMU hardware 390 * and flush any hardware page request queues. This should be done before 391 * calling into this helper. 392 * - Acknowledge all outstanding PRQs to the device: Respond to all outstanding 393 * page requests with IOMMU_PAGE_RESP_INVALID, indicating the device should 394 * not retry. This helper function handles this. 395 * - Disable PRI on the device: After calling this helper, the caller could 396 * then disable PRI on the device. 397 * 398 * Calling iopf_queue_remove_device() essentially disassociates the device. 399 * The fault_param might still exist, but iommu_page_response() will do 400 * nothing. The device fault parameter reference count has been properly 401 * passed from iommu_report_device_fault() to the fault handling work, and 402 * will eventually be released after iommu_page_response(). 403 */ 404 void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) 405 { 406 struct iopf_fault *partial_iopf; 407 struct iopf_fault *next; 408 struct iopf_group *group, *temp; 409 struct dev_iommu *param = dev->iommu; 410 struct iommu_fault_param *fault_param; 411 const struct iommu_ops *ops = dev_iommu_ops(dev); 412 413 mutex_lock(&queue->lock); 414 mutex_lock(¶m->lock); 415 fault_param = rcu_dereference_check(param->fault_param, 416 lockdep_is_held(¶m->lock)); 417 418 if (WARN_ON(!fault_param || fault_param->queue != queue)) 419 goto unlock; 420 421 mutex_lock(&fault_param->lock); 422 list_for_each_entry_safe(partial_iopf, next, &fault_param->partial, list) 423 kfree(partial_iopf); 424 425 list_for_each_entry_safe(group, temp, &fault_param->faults, pending_node) { 426 struct iopf_fault *iopf = &group->last_fault; 427 struct iommu_page_response resp = { 428 .pasid = iopf->fault.prm.pasid, 429 .grpid = iopf->fault.prm.grpid, 430 .code = IOMMU_PAGE_RESP_INVALID 431 }; 432 433 ops->page_response(dev, iopf, &resp); 434 list_del_init(&group->pending_node); 435 } 436 mutex_unlock(&fault_param->lock); 437 438 list_del(&fault_param->queue_list); 439 440 /* dec the ref owned by iopf_queue_add_device() */ 441 rcu_assign_pointer(param->fault_param, NULL); 442 iopf_put_dev_fault_param(fault_param); 443 unlock: 444 mutex_unlock(¶m->lock); 445 mutex_unlock(&queue->lock); 446 } 447 EXPORT_SYMBOL_GPL(iopf_queue_remove_device); 448 449 /** 450 * iopf_queue_alloc - Allocate and initialize a fault queue 451 * @name: a unique string identifying the queue (for workqueue) 452 * 453 * Return: the queue on success and NULL on error. 454 */ 455 struct iopf_queue *iopf_queue_alloc(const char *name) 456 { 457 struct iopf_queue *queue; 458 459 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 460 if (!queue) 461 return NULL; 462 463 /* 464 * The WQ is unordered because the low-level handler enqueues faults by 465 * group. PRI requests within a group have to be ordered, but once 466 * that's dealt with, the high-level function can handle groups out of 467 * order. 468 */ 469 queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); 470 if (!queue->wq) { 471 kfree(queue); 472 return NULL; 473 } 474 475 INIT_LIST_HEAD(&queue->devices); 476 mutex_init(&queue->lock); 477 478 return queue; 479 } 480 EXPORT_SYMBOL_GPL(iopf_queue_alloc); 481 482 /** 483 * iopf_queue_free - Free IOPF queue 484 * @queue: queue to free 485 * 486 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or 487 * adding/removing devices on this queue anymore. 488 */ 489 void iopf_queue_free(struct iopf_queue *queue) 490 { 491 struct iommu_fault_param *iopf_param, *next; 492 493 if (!queue) 494 return; 495 496 list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) 497 iopf_queue_remove_device(queue, iopf_param->dev); 498 499 destroy_workqueue(queue->wq); 500 kfree(queue); 501 } 502 EXPORT_SYMBOL_GPL(iopf_queue_free); 503