1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Handle device page faults 4 * 5 * Copyright (C) 2020 ARM Ltd. 6 */ 7 8 #include <linux/iommu.h> 9 #include <linux/list.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 14 #include "iommu-sva.h" 15 16 /** 17 * struct iopf_queue - IO Page Fault queue 18 * @wq: the fault workqueue 19 * @devices: devices attached to this queue 20 * @lock: protects the device list 21 */ 22 struct iopf_queue { 23 struct workqueue_struct *wq; 24 struct list_head devices; 25 struct mutex lock; 26 }; 27 28 /** 29 * struct iopf_device_param - IO Page Fault data attached to a device 30 * @dev: the device that owns this param 31 * @queue: IOPF queue 32 * @queue_list: index into queue->devices 33 * @partial: faults that are part of a Page Request Group for which the last 34 * request hasn't been submitted yet. 35 */ 36 struct iopf_device_param { 37 struct device *dev; 38 struct iopf_queue *queue; 39 struct list_head queue_list; 40 struct list_head partial; 41 }; 42 43 struct iopf_fault { 44 struct iommu_fault fault; 45 struct list_head list; 46 }; 47 48 struct iopf_group { 49 struct iopf_fault last_fault; 50 struct list_head faults; 51 struct work_struct work; 52 struct device *dev; 53 }; 54 55 static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, 56 enum iommu_page_response_code status) 57 { 58 struct iommu_page_response resp = { 59 .version = IOMMU_PAGE_RESP_VERSION_1, 60 .pasid = iopf->fault.prm.pasid, 61 .grpid = iopf->fault.prm.grpid, 62 .code = status, 63 }; 64 65 if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && 66 (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) 67 resp.flags = IOMMU_PAGE_RESP_PASID_VALID; 68 69 return iommu_page_response(dev, &resp); 70 } 71 72 static void iopf_handler(struct work_struct *work) 73 { 74 struct iopf_group *group; 75 struct iommu_domain *domain; 76 struct iopf_fault *iopf, *next; 77 enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; 78 79 group = container_of(work, struct iopf_group, work); 80 domain = iommu_get_domain_for_dev_pasid(group->dev, 81 group->last_fault.fault.prm.pasid, 0); 82 if (!domain || !domain->iopf_handler) 83 status = IOMMU_PAGE_RESP_INVALID; 84 85 list_for_each_entry_safe(iopf, next, &group->faults, list) { 86 /* 87 * For the moment, errors are sticky: don't handle subsequent 88 * faults in the group if there is an error. 89 */ 90 if (status == IOMMU_PAGE_RESP_SUCCESS) 91 status = domain->iopf_handler(&iopf->fault, 92 domain->fault_data); 93 94 if (!(iopf->fault.prm.flags & 95 IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) 96 kfree(iopf); 97 } 98 99 iopf_complete_group(group->dev, &group->last_fault, status); 100 kfree(group); 101 } 102 103 /** 104 * iommu_queue_iopf - IO Page Fault handler 105 * @fault: fault event 106 * @cookie: struct device, passed to iommu_register_device_fault_handler. 107 * 108 * Add a fault to the device workqueue, to be handled by mm. 109 * 110 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard 111 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't 112 * expect a response. It may be generated when disabling a PASID (issuing a 113 * PASID stop request) by some PCI devices. 114 * 115 * The PASID stop request is issued by the device driver before unbind(). Once 116 * it completes, no page request is generated for this PASID anymore and 117 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 118 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait 119 * for all outstanding page requests to come back with a response before 120 * completing the PASID stop request. Others do not wait for page responses, and 121 * instead issue this Stop Marker that tells us when the PASID can be 122 * reallocated. 123 * 124 * It is safe to discard the Stop Marker because it is an optimization. 125 * a. Page requests, which are posted requests, have been flushed to the IOMMU 126 * when the stop request completes. 127 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the 128 * PASID. 129 * 130 * So even though the Stop Marker might be issued by the device *after* the stop 131 * request completes, outstanding faults will have been dealt with by the time 132 * the PASID is freed. 133 * 134 * Any valid page fault will be eventually routed to an iommu domain and the 135 * page fault handler installed there will get called. The users of this 136 * handling framework should guarantee that the iommu domain could only be 137 * freed after the device has stopped generating page faults (or the iommu 138 * hardware has been set to block the page faults) and the pending page faults 139 * have been flushed. 140 * 141 * Return: 0 on success and <0 on error. 142 */ 143 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) 144 { 145 int ret; 146 struct iopf_group *group; 147 struct iopf_fault *iopf, *next; 148 struct iopf_device_param *iopf_param; 149 150 struct device *dev = cookie; 151 struct dev_iommu *param = dev->iommu; 152 153 lockdep_assert_held(¶m->lock); 154 155 if (fault->type != IOMMU_FAULT_PAGE_REQ) 156 /* Not a recoverable page fault */ 157 return -EOPNOTSUPP; 158 159 /* 160 * As long as we're holding param->lock, the queue can't be unlinked 161 * from the device and therefore cannot disappear. 162 */ 163 iopf_param = param->iopf_param; 164 if (!iopf_param) 165 return -ENODEV; 166 167 if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { 168 iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); 169 if (!iopf) 170 return -ENOMEM; 171 172 iopf->fault = *fault; 173 174 /* Non-last request of a group. Postpone until the last one */ 175 list_add(&iopf->list, &iopf_param->partial); 176 177 return 0; 178 } 179 180 group = kzalloc(sizeof(*group), GFP_KERNEL); 181 if (!group) { 182 /* 183 * The caller will send a response to the hardware. But we do 184 * need to clean up before leaving, otherwise partial faults 185 * will be stuck. 186 */ 187 ret = -ENOMEM; 188 goto cleanup_partial; 189 } 190 191 group->dev = dev; 192 group->last_fault.fault = *fault; 193 INIT_LIST_HEAD(&group->faults); 194 list_add(&group->last_fault.list, &group->faults); 195 INIT_WORK(&group->work, iopf_handler); 196 197 /* See if we have partial faults for this group */ 198 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 199 if (iopf->fault.prm.grpid == fault->prm.grpid) 200 /* Insert *before* the last fault */ 201 list_move(&iopf->list, &group->faults); 202 } 203 204 queue_work(iopf_param->queue->wq, &group->work); 205 return 0; 206 207 cleanup_partial: 208 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { 209 if (iopf->fault.prm.grpid == fault->prm.grpid) { 210 list_del(&iopf->list); 211 kfree(iopf); 212 } 213 } 214 return ret; 215 } 216 EXPORT_SYMBOL_GPL(iommu_queue_iopf); 217 218 /** 219 * iopf_queue_flush_dev - Ensure that all queued faults have been processed 220 * @dev: the endpoint whose faults need to be flushed. 221 * 222 * The IOMMU driver calls this before releasing a PASID, to ensure that all 223 * pending faults for this PASID have been handled, and won't hit the address 224 * space of the next process that uses this PASID. The driver must make sure 225 * that no new fault is added to the queue. In particular it must flush its 226 * low-level queue before calling this function. 227 * 228 * Return: 0 on success and <0 on error. 229 */ 230 int iopf_queue_flush_dev(struct device *dev) 231 { 232 int ret = 0; 233 struct iopf_device_param *iopf_param; 234 struct dev_iommu *param = dev->iommu; 235 236 if (!param) 237 return -ENODEV; 238 239 mutex_lock(¶m->lock); 240 iopf_param = param->iopf_param; 241 if (iopf_param) 242 flush_workqueue(iopf_param->queue->wq); 243 else 244 ret = -ENODEV; 245 mutex_unlock(¶m->lock); 246 247 return ret; 248 } 249 EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); 250 251 /** 252 * iopf_queue_discard_partial - Remove all pending partial fault 253 * @queue: the queue whose partial faults need to be discarded 254 * 255 * When the hardware queue overflows, last page faults in a group may have been 256 * lost and the IOMMU driver calls this to discard all partial faults. The 257 * driver shouldn't be adding new faults to this queue concurrently. 258 * 259 * Return: 0 on success and <0 on error. 260 */ 261 int iopf_queue_discard_partial(struct iopf_queue *queue) 262 { 263 struct iopf_fault *iopf, *next; 264 struct iopf_device_param *iopf_param; 265 266 if (!queue) 267 return -EINVAL; 268 269 mutex_lock(&queue->lock); 270 list_for_each_entry(iopf_param, &queue->devices, queue_list) { 271 list_for_each_entry_safe(iopf, next, &iopf_param->partial, 272 list) { 273 list_del(&iopf->list); 274 kfree(iopf); 275 } 276 } 277 mutex_unlock(&queue->lock); 278 return 0; 279 } 280 EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); 281 282 /** 283 * iopf_queue_add_device - Add producer to the fault queue 284 * @queue: IOPF queue 285 * @dev: device to add 286 * 287 * Return: 0 on success and <0 on error. 288 */ 289 int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) 290 { 291 int ret = -EBUSY; 292 struct iopf_device_param *iopf_param; 293 struct dev_iommu *param = dev->iommu; 294 295 if (!param) 296 return -ENODEV; 297 298 iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); 299 if (!iopf_param) 300 return -ENOMEM; 301 302 INIT_LIST_HEAD(&iopf_param->partial); 303 iopf_param->queue = queue; 304 iopf_param->dev = dev; 305 306 mutex_lock(&queue->lock); 307 mutex_lock(¶m->lock); 308 if (!param->iopf_param) { 309 list_add(&iopf_param->queue_list, &queue->devices); 310 param->iopf_param = iopf_param; 311 ret = 0; 312 } 313 mutex_unlock(¶m->lock); 314 mutex_unlock(&queue->lock); 315 316 if (ret) 317 kfree(iopf_param); 318 319 return ret; 320 } 321 EXPORT_SYMBOL_GPL(iopf_queue_add_device); 322 323 /** 324 * iopf_queue_remove_device - Remove producer from fault queue 325 * @queue: IOPF queue 326 * @dev: device to remove 327 * 328 * Caller makes sure that no more faults are reported for this device. 329 * 330 * Return: 0 on success and <0 on error. 331 */ 332 int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) 333 { 334 int ret = -EINVAL; 335 struct iopf_fault *iopf, *next; 336 struct iopf_device_param *iopf_param; 337 struct dev_iommu *param = dev->iommu; 338 339 if (!param || !queue) 340 return -EINVAL; 341 342 mutex_lock(&queue->lock); 343 mutex_lock(¶m->lock); 344 iopf_param = param->iopf_param; 345 if (iopf_param && iopf_param->queue == queue) { 346 list_del(&iopf_param->queue_list); 347 param->iopf_param = NULL; 348 ret = 0; 349 } 350 mutex_unlock(¶m->lock); 351 mutex_unlock(&queue->lock); 352 if (ret) 353 return ret; 354 355 /* Just in case some faults are still stuck */ 356 list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) 357 kfree(iopf); 358 359 kfree(iopf_param); 360 361 return 0; 362 } 363 EXPORT_SYMBOL_GPL(iopf_queue_remove_device); 364 365 /** 366 * iopf_queue_alloc - Allocate and initialize a fault queue 367 * @name: a unique string identifying the queue (for workqueue) 368 * 369 * Return: the queue on success and NULL on error. 370 */ 371 struct iopf_queue *iopf_queue_alloc(const char *name) 372 { 373 struct iopf_queue *queue; 374 375 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 376 if (!queue) 377 return NULL; 378 379 /* 380 * The WQ is unordered because the low-level handler enqueues faults by 381 * group. PRI requests within a group have to be ordered, but once 382 * that's dealt with, the high-level function can handle groups out of 383 * order. 384 */ 385 queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); 386 if (!queue->wq) { 387 kfree(queue); 388 return NULL; 389 } 390 391 INIT_LIST_HEAD(&queue->devices); 392 mutex_init(&queue->lock); 393 394 return queue; 395 } 396 EXPORT_SYMBOL_GPL(iopf_queue_alloc); 397 398 /** 399 * iopf_queue_free - Free IOPF queue 400 * @queue: queue to free 401 * 402 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or 403 * adding/removing devices on this queue anymore. 404 */ 405 void iopf_queue_free(struct iopf_queue *queue) 406 { 407 struct iopf_device_param *iopf_param, *next; 408 409 if (!queue) 410 return; 411 412 list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) 413 iopf_queue_remove_device(queue, iopf_param->dev); 414 415 destroy_workqueue(queue->wq); 416 kfree(queue); 417 } 418 EXPORT_SYMBOL_GPL(iopf_queue_free); 419