1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Virtio driver for the paravirtualized IOMMU 4 * 5 * Copyright (C) 2019 Arm Limited 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/delay.h> 11 #include <linux/dma-map-ops.h> 12 #include <linux/freezer.h> 13 #include <linux/interval_tree.h> 14 #include <linux/iommu.h> 15 #include <linux/module.h> 16 #include <linux/of.h> 17 #include <linux/pci.h> 18 #include <linux/virtio.h> 19 #include <linux/virtio_config.h> 20 #include <linux/virtio_ids.h> 21 #include <linux/wait.h> 22 23 #include <uapi/linux/virtio_iommu.h> 24 25 #include "dma-iommu.h" 26 27 #define MSI_IOVA_BASE 0x8000000 28 #define MSI_IOVA_LENGTH 0x100000 29 30 #define VIOMMU_REQUEST_VQ 0 31 #define VIOMMU_EVENT_VQ 1 32 #define VIOMMU_NR_VQS 2 33 34 struct viommu_dev { 35 struct iommu_device iommu; 36 struct device *dev; 37 struct virtio_device *vdev; 38 39 struct ida domain_ids; 40 41 struct virtqueue *vqs[VIOMMU_NR_VQS]; 42 spinlock_t request_lock; 43 struct list_head requests; 44 void *evts; 45 46 /* Device configuration */ 47 struct iommu_domain_geometry geometry; 48 u64 pgsize_bitmap; 49 u32 first_domain; 50 u32 last_domain; 51 u32 identity_domain_id; 52 /* Supported MAP flags */ 53 u32 map_flags; 54 u32 probe_size; 55 }; 56 57 struct viommu_mapping { 58 phys_addr_t paddr; 59 struct interval_tree_node iova; 60 u32 flags; 61 }; 62 63 struct viommu_domain { 64 struct iommu_domain domain; 65 struct viommu_dev *viommu; 66 unsigned int id; 67 u32 map_flags; 68 69 spinlock_t mappings_lock; 70 struct rb_root_cached mappings; 71 72 unsigned long nr_endpoints; 73 }; 74 75 struct viommu_endpoint { 76 struct device *dev; 77 struct viommu_dev *viommu; 78 struct viommu_domain *vdomain; 79 struct list_head resv_regions; 80 }; 81 82 struct viommu_request { 83 struct list_head list; 84 void *writeback; 85 unsigned int write_offset; 86 unsigned int len; 87 char buf[] __counted_by(len); 88 }; 89 90 #define VIOMMU_FAULT_RESV_MASK 0xffffff00 91 92 struct viommu_event { 93 union { 94 u32 head; 95 struct virtio_iommu_fault fault; 96 }; 97 }; 98 99 static struct viommu_domain viommu_identity_domain; 100 101 #define to_viommu_domain(domain) \ 102 container_of(domain, struct viommu_domain, domain) 103 104 static int viommu_get_req_errno(void *buf, size_t len) 105 { 106 struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail); 107 108 switch (tail->status) { 109 case VIRTIO_IOMMU_S_OK: 110 return 0; 111 case VIRTIO_IOMMU_S_UNSUPP: 112 return -ENOSYS; 113 case VIRTIO_IOMMU_S_INVAL: 114 return -EINVAL; 115 case VIRTIO_IOMMU_S_RANGE: 116 return -ERANGE; 117 case VIRTIO_IOMMU_S_NOENT: 118 return -ENOENT; 119 case VIRTIO_IOMMU_S_FAULT: 120 return -EFAULT; 121 case VIRTIO_IOMMU_S_NOMEM: 122 return -ENOMEM; 123 case VIRTIO_IOMMU_S_IOERR: 124 case VIRTIO_IOMMU_S_DEVERR: 125 default: 126 return -EIO; 127 } 128 } 129 130 static void viommu_set_req_status(void *buf, size_t len, int status) 131 { 132 struct virtio_iommu_req_tail *tail = buf + len - sizeof(*tail); 133 134 tail->status = status; 135 } 136 137 static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu, 138 struct virtio_iommu_req_head *req, 139 size_t len) 140 { 141 size_t tail_size = sizeof(struct virtio_iommu_req_tail); 142 143 if (req->type == VIRTIO_IOMMU_T_PROBE) 144 return len - viommu->probe_size - tail_size; 145 146 return len - tail_size; 147 } 148 149 /* 150 * __viommu_sync_req - Complete all in-flight requests 151 * 152 * Wait for all added requests to complete. When this function returns, all 153 * requests that were in-flight at the time of the call have completed. 154 */ 155 static int __viommu_sync_req(struct viommu_dev *viommu) 156 { 157 unsigned int len; 158 size_t write_len; 159 struct viommu_request *req; 160 struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ]; 161 162 assert_spin_locked(&viommu->request_lock); 163 164 virtqueue_kick(vq); 165 166 while (!list_empty(&viommu->requests)) { 167 len = 0; 168 req = virtqueue_get_buf(vq, &len); 169 if (!req) 170 continue; 171 172 if (!len) 173 viommu_set_req_status(req->buf, req->len, 174 VIRTIO_IOMMU_S_IOERR); 175 176 write_len = req->len - req->write_offset; 177 if (req->writeback && len == write_len) 178 memcpy(req->writeback, req->buf + req->write_offset, 179 write_len); 180 181 list_del(&req->list); 182 kfree(req); 183 } 184 185 return 0; 186 } 187 188 static int viommu_sync_req(struct viommu_dev *viommu) 189 { 190 int ret; 191 unsigned long flags; 192 193 spin_lock_irqsave(&viommu->request_lock, flags); 194 ret = __viommu_sync_req(viommu); 195 if (ret) 196 dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret); 197 spin_unlock_irqrestore(&viommu->request_lock, flags); 198 199 return ret; 200 } 201 202 /* 203 * __viommu_add_request - Add one request to the queue 204 * @buf: pointer to the request buffer 205 * @len: length of the request buffer 206 * @writeback: copy data back to the buffer when the request completes. 207 * 208 * Add a request to the queue. Only synchronize the queue if it's already full. 209 * Otherwise don't kick the queue nor wait for requests to complete. 210 * 211 * When @writeback is true, data written by the device, including the request 212 * status, is copied into @buf after the request completes. This is unsafe if 213 * the caller allocates @buf on stack and drops the lock between add_req() and 214 * sync_req(). 215 * 216 * Return 0 if the request was successfully added to the queue. 217 */ 218 static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len, 219 bool writeback) 220 { 221 int ret; 222 off_t write_offset; 223 struct viommu_request *req; 224 struct scatterlist top_sg, bottom_sg; 225 struct scatterlist *sg[2] = { &top_sg, &bottom_sg }; 226 struct virtqueue *vq = viommu->vqs[VIOMMU_REQUEST_VQ]; 227 228 assert_spin_locked(&viommu->request_lock); 229 230 write_offset = viommu_get_write_desc_offset(viommu, buf, len); 231 if (write_offset <= 0) 232 return -EINVAL; 233 234 req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC); 235 if (!req) 236 return -ENOMEM; 237 238 req->len = len; 239 if (writeback) { 240 req->writeback = buf + write_offset; 241 req->write_offset = write_offset; 242 } 243 memcpy(&req->buf, buf, write_offset); 244 245 sg_init_one(&top_sg, req->buf, write_offset); 246 sg_init_one(&bottom_sg, req->buf + write_offset, len - write_offset); 247 248 ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC); 249 if (ret == -ENOSPC) { 250 /* If the queue is full, sync and retry */ 251 if (!__viommu_sync_req(viommu)) 252 ret = virtqueue_add_sgs(vq, sg, 1, 1, req, GFP_ATOMIC); 253 } 254 if (ret) 255 goto err_free; 256 257 list_add_tail(&req->list, &viommu->requests); 258 return 0; 259 260 err_free: 261 kfree(req); 262 return ret; 263 } 264 265 static int viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len) 266 { 267 int ret; 268 unsigned long flags; 269 270 spin_lock_irqsave(&viommu->request_lock, flags); 271 ret = __viommu_add_req(viommu, buf, len, false); 272 if (ret) 273 dev_dbg(viommu->dev, "could not add request: %d\n", ret); 274 spin_unlock_irqrestore(&viommu->request_lock, flags); 275 276 return ret; 277 } 278 279 /* 280 * Send a request and wait for it to complete. Return the request status (as an 281 * errno) 282 */ 283 static int viommu_send_req_sync(struct viommu_dev *viommu, void *buf, 284 size_t len) 285 { 286 int ret; 287 unsigned long flags; 288 289 spin_lock_irqsave(&viommu->request_lock, flags); 290 291 ret = __viommu_add_req(viommu, buf, len, true); 292 if (ret) { 293 dev_dbg(viommu->dev, "could not add request (%d)\n", ret); 294 goto out_unlock; 295 } 296 297 ret = __viommu_sync_req(viommu); 298 if (ret) { 299 dev_dbg(viommu->dev, "could not sync requests (%d)\n", ret); 300 /* Fall-through (get the actual request status) */ 301 } 302 303 ret = viommu_get_req_errno(buf, len); 304 out_unlock: 305 spin_unlock_irqrestore(&viommu->request_lock, flags); 306 return ret; 307 } 308 309 static int viommu_send_attach_req(struct viommu_dev *viommu, struct device *dev, 310 struct virtio_iommu_req_attach *req) 311 { 312 int ret; 313 unsigned int i; 314 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 315 316 for (i = 0; i < fwspec->num_ids; i++) { 317 req->endpoint = cpu_to_le32(fwspec->ids[i]); 318 ret = viommu_send_req_sync(viommu, req, sizeof(*req)); 319 if (ret) 320 return ret; 321 } 322 return 0; 323 } 324 325 /* 326 * viommu_add_mapping - add a mapping to the internal tree 327 * 328 * On success, return the new mapping. Otherwise return NULL. 329 */ 330 static int viommu_add_mapping(struct viommu_domain *vdomain, u64 iova, u64 end, 331 phys_addr_t paddr, u32 flags) 332 { 333 unsigned long irqflags; 334 struct viommu_mapping *mapping; 335 336 mapping = kzalloc(sizeof(*mapping), GFP_ATOMIC); 337 if (!mapping) 338 return -ENOMEM; 339 340 mapping->paddr = paddr; 341 mapping->iova.start = iova; 342 mapping->iova.last = end; 343 mapping->flags = flags; 344 345 spin_lock_irqsave(&vdomain->mappings_lock, irqflags); 346 interval_tree_insert(&mapping->iova, &vdomain->mappings); 347 spin_unlock_irqrestore(&vdomain->mappings_lock, irqflags); 348 349 return 0; 350 } 351 352 /* 353 * viommu_del_mappings - remove mappings from the internal tree 354 * 355 * @vdomain: the domain 356 * @iova: start of the range 357 * @end: end of the range 358 * 359 * On success, returns the number of unmapped bytes 360 */ 361 static size_t viommu_del_mappings(struct viommu_domain *vdomain, 362 u64 iova, u64 end) 363 { 364 size_t unmapped = 0; 365 unsigned long flags; 366 struct viommu_mapping *mapping = NULL; 367 struct interval_tree_node *node, *next; 368 369 spin_lock_irqsave(&vdomain->mappings_lock, flags); 370 next = interval_tree_iter_first(&vdomain->mappings, iova, end); 371 while (next) { 372 node = next; 373 mapping = container_of(node, struct viommu_mapping, iova); 374 next = interval_tree_iter_next(node, iova, end); 375 376 /* Trying to split a mapping? */ 377 if (mapping->iova.start < iova) 378 break; 379 380 /* 381 * Virtio-iommu doesn't allow UNMAP to split a mapping created 382 * with a single MAP request, so remove the full mapping. 383 */ 384 unmapped += mapping->iova.last - mapping->iova.start + 1; 385 386 interval_tree_remove(node, &vdomain->mappings); 387 kfree(mapping); 388 } 389 spin_unlock_irqrestore(&vdomain->mappings_lock, flags); 390 391 return unmapped; 392 } 393 394 /* 395 * Fill the domain with identity mappings, skipping the device's reserved 396 * regions. 397 */ 398 static int viommu_domain_map_identity(struct viommu_endpoint *vdev, 399 struct viommu_domain *vdomain) 400 { 401 int ret; 402 struct iommu_resv_region *resv; 403 u64 iova = vdomain->domain.geometry.aperture_start; 404 u64 limit = vdomain->domain.geometry.aperture_end; 405 u32 flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; 406 unsigned long granule = 1UL << __ffs(vdomain->domain.pgsize_bitmap); 407 408 iova = ALIGN(iova, granule); 409 limit = ALIGN_DOWN(limit + 1, granule) - 1; 410 411 list_for_each_entry(resv, &vdev->resv_regions, list) { 412 u64 resv_start = ALIGN_DOWN(resv->start, granule); 413 u64 resv_end = ALIGN(resv->start + resv->length, granule) - 1; 414 415 if (resv_end < iova || resv_start > limit) 416 /* No overlap */ 417 continue; 418 419 if (resv_start > iova) { 420 ret = viommu_add_mapping(vdomain, iova, resv_start - 1, 421 (phys_addr_t)iova, flags); 422 if (ret) 423 goto err_unmap; 424 } 425 426 if (resv_end >= limit) 427 return 0; 428 429 iova = resv_end + 1; 430 } 431 432 ret = viommu_add_mapping(vdomain, iova, limit, (phys_addr_t)iova, 433 flags); 434 if (ret) 435 goto err_unmap; 436 return 0; 437 438 err_unmap: 439 viommu_del_mappings(vdomain, 0, iova); 440 return ret; 441 } 442 443 /* 444 * viommu_replay_mappings - re-send MAP requests 445 * 446 * When reattaching a domain that was previously detached from all endpoints, 447 * mappings were deleted from the device. Re-create the mappings available in 448 * the internal tree. 449 */ 450 static int viommu_replay_mappings(struct viommu_domain *vdomain) 451 { 452 int ret = 0; 453 unsigned long flags; 454 struct viommu_mapping *mapping; 455 struct interval_tree_node *node; 456 struct virtio_iommu_req_map map; 457 458 spin_lock_irqsave(&vdomain->mappings_lock, flags); 459 node = interval_tree_iter_first(&vdomain->mappings, 0, -1UL); 460 while (node) { 461 mapping = container_of(node, struct viommu_mapping, iova); 462 map = (struct virtio_iommu_req_map) { 463 .head.type = VIRTIO_IOMMU_T_MAP, 464 .domain = cpu_to_le32(vdomain->id), 465 .virt_start = cpu_to_le64(mapping->iova.start), 466 .virt_end = cpu_to_le64(mapping->iova.last), 467 .phys_start = cpu_to_le64(mapping->paddr), 468 .flags = cpu_to_le32(mapping->flags), 469 }; 470 471 ret = viommu_send_req_sync(vdomain->viommu, &map, sizeof(map)); 472 if (ret) 473 break; 474 475 node = interval_tree_iter_next(node, 0, -1UL); 476 } 477 spin_unlock_irqrestore(&vdomain->mappings_lock, flags); 478 479 return ret; 480 } 481 482 static int viommu_add_resv_mem(struct viommu_endpoint *vdev, 483 struct virtio_iommu_probe_resv_mem *mem, 484 size_t len) 485 { 486 size_t size; 487 u64 start64, end64; 488 phys_addr_t start, end; 489 struct iommu_resv_region *region = NULL, *next; 490 unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 491 492 start = start64 = le64_to_cpu(mem->start); 493 end = end64 = le64_to_cpu(mem->end); 494 size = end64 - start64 + 1; 495 496 /* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */ 497 if (start != start64 || end != end64 || size < end64 - start64) 498 return -EOVERFLOW; 499 500 if (len < sizeof(*mem)) 501 return -EINVAL; 502 503 switch (mem->subtype) { 504 default: 505 dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n", 506 mem->subtype); 507 fallthrough; 508 case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: 509 region = iommu_alloc_resv_region(start, size, 0, 510 IOMMU_RESV_RESERVED, 511 GFP_KERNEL); 512 break; 513 case VIRTIO_IOMMU_RESV_MEM_T_MSI: 514 region = iommu_alloc_resv_region(start, size, prot, 515 IOMMU_RESV_MSI, 516 GFP_KERNEL); 517 break; 518 } 519 if (!region) 520 return -ENOMEM; 521 522 /* Keep the list sorted */ 523 list_for_each_entry(next, &vdev->resv_regions, list) { 524 if (next->start > region->start) 525 break; 526 } 527 list_add_tail(®ion->list, &next->list); 528 return 0; 529 } 530 531 static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev) 532 { 533 int ret; 534 u16 type, len; 535 size_t cur = 0; 536 size_t probe_len; 537 struct virtio_iommu_req_probe *probe; 538 struct virtio_iommu_probe_property *prop; 539 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 540 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 541 542 if (!fwspec->num_ids) 543 return -EINVAL; 544 545 probe_len = sizeof(*probe) + viommu->probe_size + 546 sizeof(struct virtio_iommu_req_tail); 547 probe = kzalloc(probe_len, GFP_KERNEL); 548 if (!probe) 549 return -ENOMEM; 550 551 probe->head.type = VIRTIO_IOMMU_T_PROBE; 552 /* 553 * For now, assume that properties of an endpoint that outputs multiple 554 * IDs are consistent. Only probe the first one. 555 */ 556 probe->endpoint = cpu_to_le32(fwspec->ids[0]); 557 558 ret = viommu_send_req_sync(viommu, probe, probe_len); 559 if (ret) 560 goto out_free; 561 562 prop = (void *)probe->properties; 563 type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK; 564 565 while (type != VIRTIO_IOMMU_PROBE_T_NONE && 566 cur < viommu->probe_size) { 567 len = le16_to_cpu(prop->length) + sizeof(*prop); 568 569 switch (type) { 570 case VIRTIO_IOMMU_PROBE_T_RESV_MEM: 571 ret = viommu_add_resv_mem(vdev, (void *)prop, len); 572 break; 573 default: 574 dev_err(dev, "unknown viommu prop 0x%x\n", type); 575 } 576 577 if (ret) 578 dev_err(dev, "failed to parse viommu prop 0x%x\n", type); 579 580 cur += len; 581 if (cur >= viommu->probe_size) 582 break; 583 584 prop = (void *)probe->properties + cur; 585 type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK; 586 } 587 588 out_free: 589 kfree(probe); 590 return ret; 591 } 592 593 static int viommu_fault_handler(struct viommu_dev *viommu, 594 struct virtio_iommu_fault *fault) 595 { 596 char *reason_str; 597 598 u8 reason = fault->reason; 599 u32 flags = le32_to_cpu(fault->flags); 600 u32 endpoint = le32_to_cpu(fault->endpoint); 601 u64 address = le64_to_cpu(fault->address); 602 603 switch (reason) { 604 case VIRTIO_IOMMU_FAULT_R_DOMAIN: 605 reason_str = "domain"; 606 break; 607 case VIRTIO_IOMMU_FAULT_R_MAPPING: 608 reason_str = "page"; 609 break; 610 case VIRTIO_IOMMU_FAULT_R_UNKNOWN: 611 default: 612 reason_str = "unknown"; 613 break; 614 } 615 616 /* TODO: find EP by ID and report_iommu_fault */ 617 if (flags & VIRTIO_IOMMU_FAULT_F_ADDRESS) 618 dev_err_ratelimited(viommu->dev, "%s fault from EP %u at %#llx [%s%s%s]\n", 619 reason_str, endpoint, address, 620 flags & VIRTIO_IOMMU_FAULT_F_READ ? "R" : "", 621 flags & VIRTIO_IOMMU_FAULT_F_WRITE ? "W" : "", 622 flags & VIRTIO_IOMMU_FAULT_F_EXEC ? "X" : ""); 623 else 624 dev_err_ratelimited(viommu->dev, "%s fault from EP %u\n", 625 reason_str, endpoint); 626 return 0; 627 } 628 629 static void viommu_event_handler(struct virtqueue *vq) 630 { 631 int ret; 632 unsigned int len; 633 struct scatterlist sg[1]; 634 struct viommu_event *evt; 635 struct viommu_dev *viommu = vq->vdev->priv; 636 637 while ((evt = virtqueue_get_buf(vq, &len)) != NULL) { 638 if (len > sizeof(*evt)) { 639 dev_err(viommu->dev, 640 "invalid event buffer (len %u != %zu)\n", 641 len, sizeof(*evt)); 642 } else if (!(evt->head & VIOMMU_FAULT_RESV_MASK)) { 643 viommu_fault_handler(viommu, &evt->fault); 644 } 645 646 sg_init_one(sg, evt, sizeof(*evt)); 647 ret = virtqueue_add_inbuf(vq, sg, 1, evt, GFP_ATOMIC); 648 if (ret) 649 dev_err(viommu->dev, "could not add event buffer\n"); 650 } 651 652 virtqueue_kick(vq); 653 } 654 655 /* IOMMU API */ 656 657 static struct iommu_domain *viommu_domain_alloc_paging(struct device *dev) 658 { 659 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 660 struct viommu_dev *viommu = vdev->viommu; 661 unsigned long viommu_page_size; 662 struct viommu_domain *vdomain; 663 int ret; 664 665 viommu_page_size = 1UL << __ffs(viommu->pgsize_bitmap); 666 if (viommu_page_size > PAGE_SIZE) { 667 dev_err(vdev->dev, 668 "granule 0x%lx larger than system page size 0x%lx\n", 669 viommu_page_size, PAGE_SIZE); 670 return ERR_PTR(-ENODEV); 671 } 672 673 vdomain = kzalloc(sizeof(*vdomain), GFP_KERNEL); 674 if (!vdomain) 675 return ERR_PTR(-ENOMEM); 676 677 spin_lock_init(&vdomain->mappings_lock); 678 vdomain->mappings = RB_ROOT_CACHED; 679 680 ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, 681 viommu->last_domain, GFP_KERNEL); 682 if (ret < 0) { 683 kfree(vdomain); 684 return ERR_PTR(ret); 685 } 686 687 vdomain->id = (unsigned int)ret; 688 689 vdomain->domain.pgsize_bitmap = viommu->pgsize_bitmap; 690 vdomain->domain.geometry = viommu->geometry; 691 692 vdomain->map_flags = viommu->map_flags; 693 vdomain->viommu = viommu; 694 695 return &vdomain->domain; 696 } 697 698 static void viommu_domain_free(struct iommu_domain *domain) 699 { 700 struct viommu_domain *vdomain = to_viommu_domain(domain); 701 702 /* Free all remaining mappings */ 703 viommu_del_mappings(vdomain, 0, ULLONG_MAX); 704 705 if (vdomain->viommu) 706 ida_free(&vdomain->viommu->domain_ids, vdomain->id); 707 708 kfree(vdomain); 709 } 710 711 static struct iommu_domain *viommu_domain_alloc_identity(struct device *dev) 712 { 713 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 714 struct iommu_domain *domain; 715 int ret; 716 717 if (virtio_has_feature(vdev->viommu->vdev, 718 VIRTIO_IOMMU_F_BYPASS_CONFIG)) 719 return &viommu_identity_domain.domain; 720 721 domain = viommu_domain_alloc_paging(dev); 722 if (IS_ERR(domain)) 723 return domain; 724 725 ret = viommu_domain_map_identity(vdev, to_viommu_domain(domain)); 726 if (ret) { 727 viommu_domain_free(domain); 728 return ERR_PTR(ret); 729 } 730 return domain; 731 } 732 733 static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) 734 { 735 int ret = 0; 736 struct virtio_iommu_req_attach req; 737 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 738 struct viommu_domain *vdomain = to_viommu_domain(domain); 739 740 if (vdomain->viommu != vdev->viommu) 741 return -EINVAL; 742 743 /* 744 * In the virtio-iommu device, when attaching the endpoint to a new 745 * domain, it is detached from the old one and, if as a result the 746 * old domain isn't attached to any endpoint, all mappings are removed 747 * from the old domain and it is freed. 748 * 749 * In the driver the old domain still exists, and its mappings will be 750 * recreated if it gets reattached to an endpoint. Otherwise it will be 751 * freed explicitly. 752 * 753 * vdev->vdomain is protected by group->mutex 754 */ 755 if (vdev->vdomain) 756 vdev->vdomain->nr_endpoints--; 757 758 req = (struct virtio_iommu_req_attach) { 759 .head.type = VIRTIO_IOMMU_T_ATTACH, 760 .domain = cpu_to_le32(vdomain->id), 761 }; 762 763 ret = viommu_send_attach_req(vdomain->viommu, dev, &req); 764 if (ret) 765 return ret; 766 767 if (!vdomain->nr_endpoints) { 768 /* 769 * This endpoint is the first to be attached to the domain. 770 * Replay existing mappings (e.g. SW MSI). 771 */ 772 ret = viommu_replay_mappings(vdomain); 773 if (ret) 774 return ret; 775 } 776 777 vdomain->nr_endpoints++; 778 vdev->vdomain = vdomain; 779 780 return 0; 781 } 782 783 static int viommu_attach_identity_domain(struct iommu_domain *domain, 784 struct device *dev) 785 { 786 int ret = 0; 787 struct virtio_iommu_req_attach req; 788 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 789 struct viommu_domain *vdomain = to_viommu_domain(domain); 790 791 req = (struct virtio_iommu_req_attach) { 792 .head.type = VIRTIO_IOMMU_T_ATTACH, 793 .domain = cpu_to_le32(vdev->viommu->identity_domain_id), 794 .flags = cpu_to_le32(VIRTIO_IOMMU_ATTACH_F_BYPASS), 795 }; 796 797 ret = viommu_send_attach_req(vdev->viommu, dev, &req); 798 if (ret) 799 return ret; 800 801 if (vdev->vdomain) 802 vdev->vdomain->nr_endpoints--; 803 vdomain->nr_endpoints++; 804 vdev->vdomain = vdomain; 805 return 0; 806 } 807 808 static struct viommu_domain viommu_identity_domain = { 809 .domain = { 810 .type = IOMMU_DOMAIN_IDENTITY, 811 .ops = &(const struct iommu_domain_ops) { 812 .attach_dev = viommu_attach_identity_domain, 813 }, 814 }, 815 }; 816 817 static void viommu_detach_dev(struct viommu_endpoint *vdev) 818 { 819 int i; 820 struct virtio_iommu_req_detach req; 821 struct viommu_domain *vdomain = vdev->vdomain; 822 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(vdev->dev); 823 824 if (!vdomain) 825 return; 826 827 req = (struct virtio_iommu_req_detach) { 828 .head.type = VIRTIO_IOMMU_T_DETACH, 829 .domain = cpu_to_le32(vdomain->id), 830 }; 831 832 for (i = 0; i < fwspec->num_ids; i++) { 833 req.endpoint = cpu_to_le32(fwspec->ids[i]); 834 WARN_ON(viommu_send_req_sync(vdev->viommu, &req, sizeof(req))); 835 } 836 vdomain->nr_endpoints--; 837 vdev->vdomain = NULL; 838 } 839 840 static int viommu_map_pages(struct iommu_domain *domain, unsigned long iova, 841 phys_addr_t paddr, size_t pgsize, size_t pgcount, 842 int prot, gfp_t gfp, size_t *mapped) 843 { 844 int ret; 845 u32 flags; 846 size_t size = pgsize * pgcount; 847 u64 end = iova + size - 1; 848 struct virtio_iommu_req_map map; 849 struct viommu_domain *vdomain = to_viommu_domain(domain); 850 851 flags = (prot & IOMMU_READ ? VIRTIO_IOMMU_MAP_F_READ : 0) | 852 (prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) | 853 (prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0); 854 855 if (flags & ~vdomain->map_flags) 856 return -EINVAL; 857 858 ret = viommu_add_mapping(vdomain, iova, end, paddr, flags); 859 if (ret) 860 return ret; 861 862 if (vdomain->nr_endpoints) { 863 map = (struct virtio_iommu_req_map) { 864 .head.type = VIRTIO_IOMMU_T_MAP, 865 .domain = cpu_to_le32(vdomain->id), 866 .virt_start = cpu_to_le64(iova), 867 .phys_start = cpu_to_le64(paddr), 868 .virt_end = cpu_to_le64(end), 869 .flags = cpu_to_le32(flags), 870 }; 871 872 ret = viommu_add_req(vdomain->viommu, &map, sizeof(map)); 873 if (ret) { 874 viommu_del_mappings(vdomain, iova, end); 875 return ret; 876 } 877 } 878 if (mapped) 879 *mapped = size; 880 881 return 0; 882 } 883 884 static size_t viommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, 885 size_t pgsize, size_t pgcount, 886 struct iommu_iotlb_gather *gather) 887 { 888 int ret = 0; 889 size_t unmapped; 890 struct virtio_iommu_req_unmap unmap; 891 struct viommu_domain *vdomain = to_viommu_domain(domain); 892 size_t size = pgsize * pgcount; 893 894 unmapped = viommu_del_mappings(vdomain, iova, iova + size - 1); 895 if (unmapped < size) 896 return 0; 897 898 /* Device already removed all mappings after detach. */ 899 if (!vdomain->nr_endpoints) 900 return unmapped; 901 902 unmap = (struct virtio_iommu_req_unmap) { 903 .head.type = VIRTIO_IOMMU_T_UNMAP, 904 .domain = cpu_to_le32(vdomain->id), 905 .virt_start = cpu_to_le64(iova), 906 .virt_end = cpu_to_le64(iova + unmapped - 1), 907 }; 908 909 ret = viommu_add_req(vdomain->viommu, &unmap, sizeof(unmap)); 910 return ret ? 0 : unmapped; 911 } 912 913 static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain, 914 dma_addr_t iova) 915 { 916 u64 paddr = 0; 917 unsigned long flags; 918 struct viommu_mapping *mapping; 919 struct interval_tree_node *node; 920 struct viommu_domain *vdomain = to_viommu_domain(domain); 921 922 spin_lock_irqsave(&vdomain->mappings_lock, flags); 923 node = interval_tree_iter_first(&vdomain->mappings, iova, iova); 924 if (node) { 925 mapping = container_of(node, struct viommu_mapping, iova); 926 paddr = mapping->paddr + (iova - mapping->iova.start); 927 } 928 spin_unlock_irqrestore(&vdomain->mappings_lock, flags); 929 930 return paddr; 931 } 932 933 static void viommu_iotlb_sync(struct iommu_domain *domain, 934 struct iommu_iotlb_gather *gather) 935 { 936 struct viommu_domain *vdomain = to_viommu_domain(domain); 937 938 viommu_sync_req(vdomain->viommu); 939 } 940 941 static int viommu_iotlb_sync_map(struct iommu_domain *domain, 942 unsigned long iova, size_t size) 943 { 944 struct viommu_domain *vdomain = to_viommu_domain(domain); 945 946 /* 947 * May be called before the viommu is initialized including 948 * while creating direct mapping 949 */ 950 if (!vdomain->nr_endpoints) 951 return 0; 952 return viommu_sync_req(vdomain->viommu); 953 } 954 955 static void viommu_flush_iotlb_all(struct iommu_domain *domain) 956 { 957 struct viommu_domain *vdomain = to_viommu_domain(domain); 958 959 /* 960 * May be called before the viommu is initialized including 961 * while creating direct mapping 962 */ 963 if (!vdomain->nr_endpoints) 964 return; 965 viommu_sync_req(vdomain->viommu); 966 } 967 968 static void viommu_get_resv_regions(struct device *dev, struct list_head *head) 969 { 970 struct iommu_resv_region *entry, *new_entry, *msi = NULL; 971 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 972 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 973 974 list_for_each_entry(entry, &vdev->resv_regions, list) { 975 if (entry->type == IOMMU_RESV_MSI) 976 msi = entry; 977 978 new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL); 979 if (!new_entry) 980 return; 981 list_add_tail(&new_entry->list, head); 982 } 983 984 /* 985 * If the device didn't register any bypass MSI window, add a 986 * software-mapped region. 987 */ 988 if (!msi) { 989 msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, 990 prot, IOMMU_RESV_SW_MSI, 991 GFP_KERNEL); 992 if (!msi) 993 return; 994 995 list_add_tail(&msi->list, head); 996 } 997 998 iommu_dma_get_resv_regions(dev, head); 999 } 1000 1001 static const struct iommu_ops viommu_ops; 1002 static struct virtio_driver virtio_iommu_drv; 1003 1004 static int viommu_match_node(struct device *dev, const void *data) 1005 { 1006 return device_match_fwnode(dev->parent, data); 1007 } 1008 1009 static struct viommu_dev *viommu_get_by_fwnode(struct fwnode_handle *fwnode) 1010 { 1011 struct device *dev = driver_find_device(&virtio_iommu_drv.driver, NULL, 1012 fwnode, viommu_match_node); 1013 put_device(dev); 1014 1015 return dev ? dev_to_virtio(dev)->priv : NULL; 1016 } 1017 1018 static struct iommu_device *viommu_probe_device(struct device *dev) 1019 { 1020 int ret; 1021 struct viommu_endpoint *vdev; 1022 struct viommu_dev *viommu = NULL; 1023 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); 1024 1025 viommu = viommu_get_by_fwnode(fwspec->iommu_fwnode); 1026 if (!viommu) 1027 return ERR_PTR(-ENODEV); 1028 1029 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); 1030 if (!vdev) 1031 return ERR_PTR(-ENOMEM); 1032 1033 vdev->dev = dev; 1034 vdev->viommu = viommu; 1035 INIT_LIST_HEAD(&vdev->resv_regions); 1036 dev_iommu_priv_set(dev, vdev); 1037 1038 if (viommu->probe_size) { 1039 /* Get additional information for this endpoint */ 1040 ret = viommu_probe_endpoint(viommu, dev); 1041 if (ret) 1042 goto err_free_dev; 1043 } 1044 1045 return &viommu->iommu; 1046 1047 err_free_dev: 1048 iommu_put_resv_regions(dev, &vdev->resv_regions); 1049 kfree(vdev); 1050 1051 return ERR_PTR(ret); 1052 } 1053 1054 static void viommu_release_device(struct device *dev) 1055 { 1056 struct viommu_endpoint *vdev = dev_iommu_priv_get(dev); 1057 1058 viommu_detach_dev(vdev); 1059 iommu_put_resv_regions(dev, &vdev->resv_regions); 1060 kfree(vdev); 1061 } 1062 1063 static struct iommu_group *viommu_device_group(struct device *dev) 1064 { 1065 if (dev_is_pci(dev)) 1066 return pci_device_group(dev); 1067 else 1068 return generic_device_group(dev); 1069 } 1070 1071 static int viommu_of_xlate(struct device *dev, 1072 const struct of_phandle_args *args) 1073 { 1074 return iommu_fwspec_add_ids(dev, args->args, 1); 1075 } 1076 1077 static bool viommu_capable(struct device *dev, enum iommu_cap cap) 1078 { 1079 switch (cap) { 1080 case IOMMU_CAP_CACHE_COHERENCY: 1081 return true; 1082 case IOMMU_CAP_DEFERRED_FLUSH: 1083 return true; 1084 default: 1085 return false; 1086 } 1087 } 1088 1089 static const struct iommu_ops viommu_ops = { 1090 .capable = viommu_capable, 1091 .domain_alloc_identity = viommu_domain_alloc_identity, 1092 .domain_alloc_paging = viommu_domain_alloc_paging, 1093 .probe_device = viommu_probe_device, 1094 .release_device = viommu_release_device, 1095 .device_group = viommu_device_group, 1096 .get_resv_regions = viommu_get_resv_regions, 1097 .of_xlate = viommu_of_xlate, 1098 .owner = THIS_MODULE, 1099 .default_domain_ops = &(const struct iommu_domain_ops) { 1100 .attach_dev = viommu_attach_dev, 1101 .map_pages = viommu_map_pages, 1102 .unmap_pages = viommu_unmap_pages, 1103 .iova_to_phys = viommu_iova_to_phys, 1104 .flush_iotlb_all = viommu_flush_iotlb_all, 1105 .iotlb_sync = viommu_iotlb_sync, 1106 .iotlb_sync_map = viommu_iotlb_sync_map, 1107 .free = viommu_domain_free, 1108 } 1109 }; 1110 1111 static int viommu_init_vqs(struct viommu_dev *viommu) 1112 { 1113 struct virtio_device *vdev = dev_to_virtio(viommu->dev); 1114 struct virtqueue_info vqs_info[] = { 1115 { "request" }, 1116 { "event", viommu_event_handler }, 1117 }; 1118 1119 return virtio_find_vqs(vdev, VIOMMU_NR_VQS, viommu->vqs, 1120 vqs_info, NULL); 1121 } 1122 1123 static int viommu_fill_evtq(struct viommu_dev *viommu) 1124 { 1125 int i, ret; 1126 struct scatterlist sg[1]; 1127 struct viommu_event *evts; 1128 struct virtqueue *vq = viommu->vqs[VIOMMU_EVENT_VQ]; 1129 size_t nr_evts = vq->num_free; 1130 1131 viommu->evts = evts = devm_kmalloc_array(viommu->dev, nr_evts, 1132 sizeof(*evts), GFP_KERNEL); 1133 if (!evts) 1134 return -ENOMEM; 1135 1136 for (i = 0; i < nr_evts; i++) { 1137 sg_init_one(sg, &evts[i], sizeof(*evts)); 1138 ret = virtqueue_add_inbuf(vq, sg, 1, &evts[i], GFP_KERNEL); 1139 if (ret) 1140 return ret; 1141 } 1142 1143 return 0; 1144 } 1145 1146 static int viommu_probe(struct virtio_device *vdev) 1147 { 1148 struct device *parent_dev = vdev->dev.parent; 1149 struct viommu_dev *viommu = NULL; 1150 struct device *dev = &vdev->dev; 1151 u64 input_start = 0; 1152 u64 input_end = -1UL; 1153 int ret; 1154 1155 if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || 1156 !virtio_has_feature(vdev, VIRTIO_IOMMU_F_MAP_UNMAP)) 1157 return -ENODEV; 1158 1159 viommu = devm_kzalloc(dev, sizeof(*viommu), GFP_KERNEL); 1160 if (!viommu) 1161 return -ENOMEM; 1162 1163 spin_lock_init(&viommu->request_lock); 1164 ida_init(&viommu->domain_ids); 1165 viommu->dev = dev; 1166 viommu->vdev = vdev; 1167 INIT_LIST_HEAD(&viommu->requests); 1168 1169 ret = viommu_init_vqs(viommu); 1170 if (ret) 1171 return ret; 1172 1173 virtio_cread_le(vdev, struct virtio_iommu_config, page_size_mask, 1174 &viommu->pgsize_bitmap); 1175 1176 if (!viommu->pgsize_bitmap) { 1177 ret = -EINVAL; 1178 goto err_free_vqs; 1179 } 1180 1181 viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE; 1182 viommu->last_domain = ~0U; 1183 1184 /* Optional features */ 1185 virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, 1186 struct virtio_iommu_config, input_range.start, 1187 &input_start); 1188 1189 virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE, 1190 struct virtio_iommu_config, input_range.end, 1191 &input_end); 1192 1193 virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, 1194 struct virtio_iommu_config, domain_range.start, 1195 &viommu->first_domain); 1196 1197 virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE, 1198 struct virtio_iommu_config, domain_range.end, 1199 &viommu->last_domain); 1200 1201 virtio_cread_le_feature(vdev, VIRTIO_IOMMU_F_PROBE, 1202 struct virtio_iommu_config, probe_size, 1203 &viommu->probe_size); 1204 1205 viommu->geometry = (struct iommu_domain_geometry) { 1206 .aperture_start = input_start, 1207 .aperture_end = input_end, 1208 .force_aperture = true, 1209 }; 1210 1211 if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO)) 1212 viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO; 1213 1214 /* Reserve an ID to use as the bypass domain */ 1215 if (virtio_has_feature(viommu->vdev, VIRTIO_IOMMU_F_BYPASS_CONFIG)) { 1216 viommu->identity_domain_id = viommu->first_domain; 1217 viommu->first_domain++; 1218 } 1219 1220 virtio_device_ready(vdev); 1221 1222 /* Populate the event queue with buffers */ 1223 ret = viommu_fill_evtq(viommu); 1224 if (ret) 1225 goto err_free_vqs; 1226 1227 ret = iommu_device_sysfs_add(&viommu->iommu, dev, NULL, "%s", 1228 virtio_bus_name(vdev)); 1229 if (ret) 1230 goto err_free_vqs; 1231 1232 iommu_device_register(&viommu->iommu, &viommu_ops, parent_dev); 1233 1234 vdev->priv = viommu; 1235 1236 dev_info(dev, "input address: %u bits\n", 1237 order_base_2(viommu->geometry.aperture_end)); 1238 dev_info(dev, "page mask: %#llx\n", viommu->pgsize_bitmap); 1239 1240 return 0; 1241 1242 err_free_vqs: 1243 vdev->config->del_vqs(vdev); 1244 1245 return ret; 1246 } 1247 1248 static void viommu_remove(struct virtio_device *vdev) 1249 { 1250 struct viommu_dev *viommu = vdev->priv; 1251 1252 iommu_device_sysfs_remove(&viommu->iommu); 1253 iommu_device_unregister(&viommu->iommu); 1254 1255 /* Stop all virtqueues */ 1256 virtio_reset_device(vdev); 1257 vdev->config->del_vqs(vdev); 1258 1259 dev_info(&vdev->dev, "device removed\n"); 1260 } 1261 1262 static void viommu_config_changed(struct virtio_device *vdev) 1263 { 1264 dev_warn(&vdev->dev, "config changed\n"); 1265 } 1266 1267 static unsigned int features[] = { 1268 VIRTIO_IOMMU_F_MAP_UNMAP, 1269 VIRTIO_IOMMU_F_INPUT_RANGE, 1270 VIRTIO_IOMMU_F_DOMAIN_RANGE, 1271 VIRTIO_IOMMU_F_PROBE, 1272 VIRTIO_IOMMU_F_MMIO, 1273 VIRTIO_IOMMU_F_BYPASS_CONFIG, 1274 }; 1275 1276 static struct virtio_device_id id_table[] = { 1277 { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID }, 1278 { 0 }, 1279 }; 1280 MODULE_DEVICE_TABLE(virtio, id_table); 1281 1282 static struct virtio_driver virtio_iommu_drv = { 1283 .driver.name = KBUILD_MODNAME, 1284 .id_table = id_table, 1285 .feature_table = features, 1286 .feature_table_size = ARRAY_SIZE(features), 1287 .probe = viommu_probe, 1288 .remove = viommu_remove, 1289 .config_changed = viommu_config_changed, 1290 }; 1291 1292 module_virtio_driver(virtio_iommu_drv); 1293 1294 MODULE_DESCRIPTION("Virtio IOMMU driver"); 1295 MODULE_AUTHOR("Jean-Philippe Brucker <jean-philippe.brucker@arm.com>"); 1296 MODULE_LICENSE("GPL v2"); 1297