1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (C) 2024 Intel Corporation 3 */ 4 #define pr_fmt(fmt) "iommufd: " fmt 5 6 #include <linux/anon_inodes.h> 7 #include <linux/file.h> 8 #include <linux/fs.h> 9 #include <linux/iommufd.h> 10 #include <linux/module.h> 11 #include <linux/mutex.h> 12 #include <linux/pci.h> 13 #include <linux/pci-ats.h> 14 #include <linux/poll.h> 15 #include <uapi/linux/iommufd.h> 16 17 #include "../iommu-priv.h" 18 #include "iommufd_private.h" 19 20 /* IOMMUFD_OBJ_FAULT Functions */ 21 22 int iommufd_fault_iopf_enable(struct iommufd_device *idev) 23 { 24 struct device *dev = idev->dev; 25 int ret; 26 27 /* 28 * Once we turn on PCI/PRI support for VF, the response failure code 29 * should not be forwarded to the hardware due to PRI being a shared 30 * resource between PF and VFs. There is no coordination for this 31 * shared capability. This waits for a vPRI reset to recover. 32 */ 33 if (dev_is_pci(dev)) { 34 struct pci_dev *pdev = to_pci_dev(dev); 35 36 if (pdev->is_virtfn && pci_pri_supported(pdev)) 37 return -EINVAL; 38 } 39 40 mutex_lock(&idev->iopf_lock); 41 /* Device iopf has already been on. */ 42 if (++idev->iopf_enabled > 1) { 43 mutex_unlock(&idev->iopf_lock); 44 return 0; 45 } 46 47 ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF); 48 if (ret) 49 --idev->iopf_enabled; 50 mutex_unlock(&idev->iopf_lock); 51 52 return ret; 53 } 54 55 void iommufd_fault_iopf_disable(struct iommufd_device *idev) 56 { 57 mutex_lock(&idev->iopf_lock); 58 if (!WARN_ON(idev->iopf_enabled == 0)) { 59 if (--idev->iopf_enabled == 0) 60 iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF); 61 } 62 mutex_unlock(&idev->iopf_lock); 63 } 64 65 void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, 66 struct iommufd_attach_handle *handle) 67 { 68 struct iommufd_fault *fault = hwpt->fault; 69 struct iopf_group *group, *next; 70 struct list_head free_list; 71 unsigned long index; 72 73 if (!fault) 74 return; 75 INIT_LIST_HEAD(&free_list); 76 77 mutex_lock(&fault->mutex); 78 spin_lock(&fault->common.lock); 79 list_for_each_entry_safe(group, next, &fault->common.deliver, node) { 80 if (group->attach_handle != &handle->handle) 81 continue; 82 list_move(&group->node, &free_list); 83 } 84 spin_unlock(&fault->common.lock); 85 86 list_for_each_entry_safe(group, next, &free_list, node) { 87 list_del(&group->node); 88 iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); 89 iopf_free_group(group); 90 } 91 92 xa_for_each(&fault->response, index, group) { 93 if (group->attach_handle != &handle->handle) 94 continue; 95 xa_erase(&fault->response, index); 96 iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); 97 iopf_free_group(group); 98 } 99 mutex_unlock(&fault->mutex); 100 } 101 102 void iommufd_fault_destroy(struct iommufd_object *obj) 103 { 104 struct iommufd_eventq *eventq = 105 container_of(obj, struct iommufd_eventq, obj); 106 struct iommufd_fault *fault = eventq_to_fault(eventq); 107 struct iopf_group *group, *next; 108 unsigned long index; 109 110 /* 111 * The iommufd object's reference count is zero at this point. 112 * We can be confident that no other threads are currently 113 * accessing this pointer. Therefore, acquiring the mutex here 114 * is unnecessary. 115 */ 116 list_for_each_entry_safe(group, next, &fault->common.deliver, node) { 117 list_del(&group->node); 118 iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); 119 iopf_free_group(group); 120 } 121 xa_for_each(&fault->response, index, group) { 122 xa_erase(&fault->response, index); 123 iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); 124 iopf_free_group(group); 125 } 126 xa_destroy(&fault->response); 127 mutex_destroy(&fault->mutex); 128 } 129 130 static void iommufd_compose_fault_message(struct iommu_fault *fault, 131 struct iommu_hwpt_pgfault *hwpt_fault, 132 struct iommufd_device *idev, 133 u32 cookie) 134 { 135 hwpt_fault->flags = fault->prm.flags; 136 hwpt_fault->dev_id = idev->obj.id; 137 hwpt_fault->pasid = fault->prm.pasid; 138 hwpt_fault->grpid = fault->prm.grpid; 139 hwpt_fault->perm = fault->prm.perm; 140 hwpt_fault->addr = fault->prm.addr; 141 hwpt_fault->length = 0; 142 hwpt_fault->cookie = cookie; 143 } 144 145 /* Fetch the first node out of the fault->deliver list */ 146 static struct iopf_group * 147 iommufd_fault_deliver_fetch(struct iommufd_fault *fault) 148 { 149 struct list_head *list = &fault->common.deliver; 150 struct iopf_group *group = NULL; 151 152 spin_lock(&fault->common.lock); 153 if (!list_empty(list)) { 154 group = list_first_entry(list, struct iopf_group, node); 155 list_del(&group->node); 156 } 157 spin_unlock(&fault->common.lock); 158 return group; 159 } 160 161 /* Restore a node back to the head of the fault->deliver list */ 162 static void iommufd_fault_deliver_restore(struct iommufd_fault *fault, 163 struct iopf_group *group) 164 { 165 spin_lock(&fault->common.lock); 166 list_add(&group->node, &fault->common.deliver); 167 spin_unlock(&fault->common.lock); 168 } 169 170 static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, 171 size_t count, loff_t *ppos) 172 { 173 size_t fault_size = sizeof(struct iommu_hwpt_pgfault); 174 struct iommufd_eventq *eventq = filep->private_data; 175 struct iommufd_fault *fault = eventq_to_fault(eventq); 176 struct iommu_hwpt_pgfault data = {}; 177 struct iommufd_device *idev; 178 struct iopf_group *group; 179 struct iopf_fault *iopf; 180 size_t done = 0; 181 int rc = 0; 182 183 if (*ppos || count % fault_size) 184 return -ESPIPE; 185 186 mutex_lock(&fault->mutex); 187 while ((group = iommufd_fault_deliver_fetch(fault))) { 188 if (done >= count || 189 group->fault_count * fault_size > count - done) { 190 iommufd_fault_deliver_restore(fault, group); 191 break; 192 } 193 194 rc = xa_alloc(&fault->response, &group->cookie, group, 195 xa_limit_32b, GFP_KERNEL); 196 if (rc) { 197 iommufd_fault_deliver_restore(fault, group); 198 break; 199 } 200 201 idev = to_iommufd_handle(group->attach_handle)->idev; 202 list_for_each_entry(iopf, &group->faults, list) { 203 iommufd_compose_fault_message(&iopf->fault, 204 &data, idev, 205 group->cookie); 206 if (copy_to_user(buf + done, &data, fault_size)) { 207 xa_erase(&fault->response, group->cookie); 208 iommufd_fault_deliver_restore(fault, group); 209 rc = -EFAULT; 210 break; 211 } 212 done += fault_size; 213 } 214 } 215 mutex_unlock(&fault->mutex); 216 217 return done == 0 ? rc : done; 218 } 219 220 static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, 221 size_t count, loff_t *ppos) 222 { 223 size_t response_size = sizeof(struct iommu_hwpt_page_response); 224 struct iommufd_eventq *eventq = filep->private_data; 225 struct iommufd_fault *fault = eventq_to_fault(eventq); 226 struct iommu_hwpt_page_response response; 227 struct iopf_group *group; 228 size_t done = 0; 229 int rc = 0; 230 231 if (*ppos || count % response_size) 232 return -ESPIPE; 233 234 mutex_lock(&fault->mutex); 235 while (count > done) { 236 rc = copy_from_user(&response, buf + done, response_size); 237 if (rc) 238 break; 239 240 static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS == 241 (int)IOMMU_PAGE_RESP_SUCCESS); 242 static_assert((int)IOMMUFD_PAGE_RESP_INVALID == 243 (int)IOMMU_PAGE_RESP_INVALID); 244 if (response.code != IOMMUFD_PAGE_RESP_SUCCESS && 245 response.code != IOMMUFD_PAGE_RESP_INVALID) { 246 rc = -EINVAL; 247 break; 248 } 249 250 group = xa_erase(&fault->response, response.cookie); 251 if (!group) { 252 rc = -EINVAL; 253 break; 254 } 255 256 iopf_group_response(group, response.code); 257 iopf_free_group(group); 258 done += response_size; 259 } 260 mutex_unlock(&fault->mutex); 261 262 return done == 0 ? rc : done; 263 } 264 265 /* IOMMUFD_OBJ_VEVENTQ Functions */ 266 267 void iommufd_veventq_abort(struct iommufd_object *obj) 268 { 269 struct iommufd_eventq *eventq = 270 container_of(obj, struct iommufd_eventq, obj); 271 struct iommufd_veventq *veventq = eventq_to_veventq(eventq); 272 struct iommufd_viommu *viommu = veventq->viommu; 273 struct iommufd_vevent *cur, *next; 274 275 lockdep_assert_held_write(&viommu->veventqs_rwsem); 276 277 list_for_each_entry_safe(cur, next, &eventq->deliver, node) { 278 list_del(&cur->node); 279 if (cur != &veventq->lost_events_header) 280 kfree(cur); 281 } 282 283 refcount_dec(&viommu->obj.users); 284 list_del(&veventq->node); 285 } 286 287 void iommufd_veventq_destroy(struct iommufd_object *obj) 288 { 289 struct iommufd_veventq *veventq = eventq_to_veventq( 290 container_of(obj, struct iommufd_eventq, obj)); 291 292 down_write(&veventq->viommu->veventqs_rwsem); 293 iommufd_veventq_abort(obj); 294 up_write(&veventq->viommu->veventqs_rwsem); 295 } 296 297 static struct iommufd_vevent * 298 iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq) 299 { 300 struct iommufd_eventq *eventq = &veventq->common; 301 struct list_head *list = &eventq->deliver; 302 struct iommufd_vevent *vevent = NULL; 303 304 spin_lock(&eventq->lock); 305 if (!list_empty(list)) { 306 struct iommufd_vevent *next; 307 308 next = list_first_entry(list, struct iommufd_vevent, node); 309 /* Make a copy of the lost_events_header for copy_to_user */ 310 if (next == &veventq->lost_events_header) { 311 vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC); 312 if (!vevent) 313 goto out_unlock; 314 } 315 list_del(&next->node); 316 if (vevent) 317 memcpy(vevent, next, sizeof(*vevent)); 318 else 319 vevent = next; 320 } 321 out_unlock: 322 spin_unlock(&eventq->lock); 323 return vevent; 324 } 325 326 static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq, 327 struct iommufd_vevent *vevent) 328 { 329 struct iommufd_eventq *eventq = &veventq->common; 330 struct list_head *list = &eventq->deliver; 331 332 spin_lock(&eventq->lock); 333 if (vevent_for_lost_events_header(vevent)) { 334 /* Remove the copy of the lost_events_header */ 335 kfree(vevent); 336 vevent = NULL; 337 /* An empty list needs the lost_events_header back */ 338 if (list_empty(list)) 339 vevent = &veventq->lost_events_header; 340 } 341 if (vevent) 342 list_add(&vevent->node, list); 343 spin_unlock(&eventq->lock); 344 } 345 346 static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf, 347 size_t count, loff_t *ppos) 348 { 349 struct iommufd_eventq *eventq = filep->private_data; 350 struct iommufd_veventq *veventq = eventq_to_veventq(eventq); 351 struct iommufd_vevent_header *hdr; 352 struct iommufd_vevent *cur; 353 size_t done = 0; 354 int rc = 0; 355 356 if (*ppos) 357 return -ESPIPE; 358 359 while ((cur = iommufd_veventq_deliver_fetch(veventq))) { 360 /* Validate the remaining bytes against the header size */ 361 if (done >= count || sizeof(*hdr) > count - done) { 362 iommufd_veventq_deliver_restore(veventq, cur); 363 break; 364 } 365 hdr = &cur->header; 366 367 /* If being a normal vEVENT, validate against the full size */ 368 if (!vevent_for_lost_events_header(cur) && 369 sizeof(hdr) + cur->data_len > count - done) { 370 iommufd_veventq_deliver_restore(veventq, cur); 371 break; 372 } 373 374 if (copy_to_user(buf + done, hdr, sizeof(*hdr))) { 375 iommufd_veventq_deliver_restore(veventq, cur); 376 rc = -EFAULT; 377 break; 378 } 379 done += sizeof(*hdr); 380 381 if (cur->data_len && 382 copy_to_user(buf + done, cur->event_data, cur->data_len)) { 383 iommufd_veventq_deliver_restore(veventq, cur); 384 rc = -EFAULT; 385 break; 386 } 387 spin_lock(&eventq->lock); 388 if (!vevent_for_lost_events_header(cur)) 389 veventq->num_events--; 390 spin_unlock(&eventq->lock); 391 done += cur->data_len; 392 kfree(cur); 393 } 394 395 return done == 0 ? rc : done; 396 } 397 398 /* Common Event Queue Functions */ 399 400 static __poll_t iommufd_eventq_fops_poll(struct file *filep, 401 struct poll_table_struct *wait) 402 { 403 struct iommufd_eventq *eventq = filep->private_data; 404 __poll_t pollflags = 0; 405 406 if (eventq->obj.type == IOMMUFD_OBJ_FAULT) 407 pollflags |= EPOLLOUT; 408 409 poll_wait(filep, &eventq->wait_queue, wait); 410 spin_lock(&eventq->lock); 411 if (!list_empty(&eventq->deliver)) 412 pollflags |= EPOLLIN | EPOLLRDNORM; 413 spin_unlock(&eventq->lock); 414 415 return pollflags; 416 } 417 418 static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep) 419 { 420 struct iommufd_eventq *eventq = filep->private_data; 421 422 refcount_dec(&eventq->obj.users); 423 iommufd_ctx_put(eventq->ictx); 424 return 0; 425 } 426 427 #define INIT_EVENTQ_FOPS(read_op, write_op) \ 428 ((const struct file_operations){ \ 429 .owner = THIS_MODULE, \ 430 .open = nonseekable_open, \ 431 .read = read_op, \ 432 .write = write_op, \ 433 .poll = iommufd_eventq_fops_poll, \ 434 .release = iommufd_eventq_fops_release, \ 435 }) 436 437 static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name, 438 struct iommufd_ctx *ictx, 439 const struct file_operations *fops) 440 { 441 struct file *filep; 442 int fdno; 443 444 spin_lock_init(&eventq->lock); 445 INIT_LIST_HEAD(&eventq->deliver); 446 init_waitqueue_head(&eventq->wait_queue); 447 448 filep = anon_inode_getfile(name, fops, eventq, O_RDWR); 449 if (IS_ERR(filep)) 450 return PTR_ERR(filep); 451 452 eventq->ictx = ictx; 453 iommufd_ctx_get(eventq->ictx); 454 eventq->filep = filep; 455 refcount_inc(&eventq->obj.users); 456 457 fdno = get_unused_fd_flags(O_CLOEXEC); 458 if (fdno < 0) 459 fput(filep); 460 return fdno; 461 } 462 463 static const struct file_operations iommufd_fault_fops = 464 INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write); 465 466 int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) 467 { 468 struct iommu_fault_alloc *cmd = ucmd->cmd; 469 struct iommufd_fault *fault; 470 int fdno; 471 int rc; 472 473 if (cmd->flags) 474 return -EOPNOTSUPP; 475 476 fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT, 477 common.obj); 478 if (IS_ERR(fault)) 479 return PTR_ERR(fault); 480 481 xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); 482 mutex_init(&fault->mutex); 483 484 fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]", 485 ucmd->ictx, &iommufd_fault_fops); 486 if (fdno < 0) { 487 rc = fdno; 488 goto out_abort; 489 } 490 491 cmd->out_fault_id = fault->common.obj.id; 492 cmd->out_fault_fd = fdno; 493 494 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 495 if (rc) 496 goto out_put_fdno; 497 iommufd_object_finalize(ucmd->ictx, &fault->common.obj); 498 499 fd_install(fdno, fault->common.filep); 500 501 return 0; 502 out_put_fdno: 503 put_unused_fd(fdno); 504 fput(fault->common.filep); 505 out_abort: 506 iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj); 507 508 return rc; 509 } 510 511 int iommufd_fault_iopf_handler(struct iopf_group *group) 512 { 513 struct iommufd_hw_pagetable *hwpt; 514 struct iommufd_fault *fault; 515 516 hwpt = group->attach_handle->domain->iommufd_hwpt; 517 fault = hwpt->fault; 518 519 spin_lock(&fault->common.lock); 520 list_add_tail(&group->node, &fault->common.deliver); 521 spin_unlock(&fault->common.lock); 522 523 wake_up_interruptible(&fault->common.wait_queue); 524 525 return 0; 526 } 527 528 static const struct file_operations iommufd_veventq_fops = 529 INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL); 530 531 int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd) 532 { 533 struct iommu_veventq_alloc *cmd = ucmd->cmd; 534 struct iommufd_veventq *veventq; 535 struct iommufd_viommu *viommu; 536 int fdno; 537 int rc; 538 539 if (cmd->flags || cmd->__reserved || 540 cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT) 541 return -EOPNOTSUPP; 542 if (!cmd->veventq_depth) 543 return -EINVAL; 544 545 viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); 546 if (IS_ERR(viommu)) 547 return PTR_ERR(viommu); 548 549 down_write(&viommu->veventqs_rwsem); 550 551 if (iommufd_viommu_find_veventq(viommu, cmd->type)) { 552 rc = -EEXIST; 553 goto out_unlock_veventqs; 554 } 555 556 veventq = __iommufd_object_alloc(ucmd->ictx, veventq, 557 IOMMUFD_OBJ_VEVENTQ, common.obj); 558 if (IS_ERR(veventq)) { 559 rc = PTR_ERR(veventq); 560 goto out_unlock_veventqs; 561 } 562 563 veventq->type = cmd->type; 564 veventq->viommu = viommu; 565 refcount_inc(&viommu->obj.users); 566 veventq->depth = cmd->veventq_depth; 567 list_add_tail(&veventq->node, &viommu->veventqs); 568 veventq->lost_events_header.header.flags = 569 IOMMU_VEVENTQ_FLAG_LOST_EVENTS; 570 571 fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]", 572 ucmd->ictx, &iommufd_veventq_fops); 573 if (fdno < 0) { 574 rc = fdno; 575 goto out_abort; 576 } 577 578 cmd->out_veventq_id = veventq->common.obj.id; 579 cmd->out_veventq_fd = fdno; 580 581 rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); 582 if (rc) 583 goto out_put_fdno; 584 585 iommufd_object_finalize(ucmd->ictx, &veventq->common.obj); 586 fd_install(fdno, veventq->common.filep); 587 goto out_unlock_veventqs; 588 589 out_put_fdno: 590 put_unused_fd(fdno); 591 fput(veventq->common.filep); 592 out_abort: 593 iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj); 594 out_unlock_veventqs: 595 up_write(&viommu->veventqs_rwsem); 596 iommufd_put_object(ucmd->ictx, &viommu->obj); 597 return rc; 598 } 599