1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Helpers for the host side of a virtio ring. 4 * 5 * Since these may be in userspace, we use (inline) accessors. 6 */ 7 #include <linux/compiler.h> 8 #include <linux/module.h> 9 #include <linux/vringh.h> 10 #include <linux/virtio_ring.h> 11 #include <linux/kernel.h> 12 #include <linux/ratelimit.h> 13 #include <linux/uaccess.h> 14 #include <linux/slab.h> 15 #include <linux/export.h> 16 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 17 #include <linux/bvec.h> 18 #include <linux/highmem.h> 19 #include <linux/vhost_iotlb.h> 20 #endif 21 #include <uapi/linux/virtio_config.h> 22 23 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) 24 { 25 static DEFINE_RATELIMIT_STATE(vringh_rs, 26 DEFAULT_RATELIMIT_INTERVAL, 27 DEFAULT_RATELIMIT_BURST); 28 if (__ratelimit(&vringh_rs)) { 29 va_list ap; 30 va_start(ap, fmt); 31 printk(KERN_NOTICE "vringh:"); 32 vprintk(fmt, ap); 33 va_end(ap); 34 } 35 } 36 37 /* Returns vring->num if empty, -ve on error. */ 38 static inline int __vringh_get_head(const struct vringh *vrh, 39 int (*getu16)(const struct vringh *vrh, 40 u16 *val, const __virtio16 *p), 41 u16 *last_avail_idx) 42 { 43 u16 avail_idx, i, head; 44 int err; 45 46 err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); 47 if (err) { 48 vringh_bad("Failed to access avail idx at %p", 49 &vrh->vring.avail->idx); 50 return err; 51 } 52 53 if (*last_avail_idx == avail_idx) 54 return vrh->vring.num; 55 56 /* Only get avail ring entries after they have been exposed by guest. */ 57 virtio_rmb(vrh->weak_barriers); 58 59 i = *last_avail_idx & (vrh->vring.num - 1); 60 61 err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); 62 if (err) { 63 vringh_bad("Failed to read head: idx %d address %p", 64 *last_avail_idx, &vrh->vring.avail->ring[i]); 65 return err; 66 } 67 68 if (head >= vrh->vring.num) { 69 vringh_bad("Guest says index %u > %u is available", 70 head, vrh->vring.num); 71 return -EINVAL; 72 } 73 74 (*last_avail_idx)++; 75 return head; 76 } 77 78 /** 79 * vringh_kiov_advance - skip bytes from vring_kiov 80 * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) 81 * @len: the maximum length to advance 82 */ 83 void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) 84 { 85 while (len && iov->i < iov->used) { 86 size_t partlen = min(iov->iov[iov->i].iov_len, len); 87 88 iov->consumed += partlen; 89 iov->iov[iov->i].iov_len -= partlen; 90 iov->iov[iov->i].iov_base += partlen; 91 92 if (!iov->iov[iov->i].iov_len) { 93 /* Fix up old iov element then increment. */ 94 iov->iov[iov->i].iov_len = iov->consumed; 95 iov->iov[iov->i].iov_base -= iov->consumed; 96 97 iov->consumed = 0; 98 iov->i++; 99 } 100 101 len -= partlen; 102 } 103 } 104 EXPORT_SYMBOL(vringh_kiov_advance); 105 106 /* Copy some bytes to/from the iovec. Returns num copied. */ 107 static inline ssize_t vringh_iov_xfer(struct vringh *vrh, 108 struct vringh_kiov *iov, 109 void *ptr, size_t len, 110 int (*xfer)(const struct vringh *vrh, 111 void *addr, void *ptr, 112 size_t len)) 113 { 114 int err, done = 0; 115 116 while (len && iov->i < iov->used) { 117 size_t partlen; 118 119 partlen = min(iov->iov[iov->i].iov_len, len); 120 err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); 121 if (err) 122 return err; 123 done += partlen; 124 len -= partlen; 125 ptr += partlen; 126 127 vringh_kiov_advance(iov, partlen); 128 } 129 return done; 130 } 131 132 /* May reduce *len if range is shorter. */ 133 static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, 134 struct vringh_range *range, 135 bool (*getrange)(struct vringh *, 136 u64, struct vringh_range *)) 137 { 138 if (addr < range->start || addr > range->end_incl) { 139 if (!getrange(vrh, addr, range)) 140 return false; 141 } 142 BUG_ON(addr < range->start || addr > range->end_incl); 143 144 /* To end of memory? */ 145 if (unlikely(addr + *len == 0)) { 146 if (range->end_incl == -1ULL) 147 return true; 148 goto truncate; 149 } 150 151 /* Otherwise, don't wrap. */ 152 if (addr + *len < addr) { 153 vringh_bad("Wrapping descriptor %zu@0x%llx", 154 *len, (unsigned long long)addr); 155 return false; 156 } 157 158 if (unlikely(addr + *len - 1 > range->end_incl)) 159 goto truncate; 160 return true; 161 162 truncate: 163 *len = range->end_incl + 1 - addr; 164 return true; 165 } 166 167 static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, 168 struct vringh_range *range, 169 bool (*getrange)(struct vringh *, 170 u64, struct vringh_range *)) 171 { 172 return true; 173 } 174 175 /* No reason for this code to be inline. */ 176 static int move_to_indirect(const struct vringh *vrh, 177 int *up_next, u16 *i, void *addr, 178 const struct vring_desc *desc, 179 struct vring_desc **descs, int *desc_max) 180 { 181 u32 len; 182 183 /* Indirect tables can't have indirect. */ 184 if (*up_next != -1) { 185 vringh_bad("Multilevel indirect %u->%u", *up_next, *i); 186 return -EINVAL; 187 } 188 189 len = vringh32_to_cpu(vrh, desc->len); 190 if (unlikely(len % sizeof(struct vring_desc))) { 191 vringh_bad("Strange indirect len %u", desc->len); 192 return -EINVAL; 193 } 194 195 /* We will check this when we follow it! */ 196 if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) 197 *up_next = vringh16_to_cpu(vrh, desc->next); 198 else 199 *up_next = -2; 200 *descs = addr; 201 *desc_max = len / sizeof(struct vring_desc); 202 203 /* Now, start at the first indirect. */ 204 *i = 0; 205 return 0; 206 } 207 208 static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) 209 { 210 struct kvec *new; 211 unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; 212 213 if (new_num < 8) 214 new_num = 8; 215 216 flag = (iov->max_num & VRINGH_IOV_ALLOCATED); 217 if (flag) 218 new = krealloc_array(iov->iov, new_num, 219 sizeof(struct iovec), gfp); 220 else { 221 new = kmalloc_array(new_num, sizeof(struct iovec), gfp); 222 if (new) { 223 memcpy(new, iov->iov, 224 iov->max_num * sizeof(struct iovec)); 225 flag = VRINGH_IOV_ALLOCATED; 226 } 227 } 228 if (!new) 229 return -ENOMEM; 230 iov->iov = new; 231 iov->max_num = (new_num | flag); 232 return 0; 233 } 234 235 static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, 236 struct vring_desc **descs, int *desc_max) 237 { 238 u16 i = *up_next; 239 240 *up_next = -1; 241 *descs = vrh->vring.desc; 242 *desc_max = vrh->vring.num; 243 return i; 244 } 245 246 static int slow_copy(struct vringh *vrh, void *dst, const void *src, 247 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 248 struct vringh_range *range, 249 bool (*getrange)(struct vringh *vrh, 250 u64, 251 struct vringh_range *)), 252 bool (*getrange)(struct vringh *vrh, 253 u64 addr, 254 struct vringh_range *r), 255 struct vringh_range *range, 256 int (*copy)(const struct vringh *vrh, 257 void *dst, const void *src, size_t len)) 258 { 259 size_t part, len = sizeof(struct vring_desc); 260 261 do { 262 u64 addr; 263 int err; 264 265 part = len; 266 addr = (u64)(unsigned long)src - range->offset; 267 268 if (!rcheck(vrh, addr, &part, range, getrange)) 269 return -EINVAL; 270 271 err = copy(vrh, dst, src, part); 272 if (err) 273 return err; 274 275 dst += part; 276 src += part; 277 len -= part; 278 } while (len); 279 return 0; 280 } 281 282 static inline int 283 __vringh_iov(struct vringh *vrh, u16 i, 284 struct vringh_kiov *riov, 285 struct vringh_kiov *wiov, 286 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 287 struct vringh_range *range, 288 bool (*getrange)(struct vringh *, u64, 289 struct vringh_range *)), 290 bool (*getrange)(struct vringh *, u64, struct vringh_range *), 291 gfp_t gfp, 292 int (*copy)(const struct vringh *vrh, 293 void *dst, const void *src, size_t len)) 294 { 295 int err, count = 0, indirect_count = 0, up_next, desc_max; 296 struct vring_desc desc, *descs; 297 struct vringh_range range = { -1ULL, 0 }, slowrange; 298 bool slow = false; 299 300 /* We start traversing vring's descriptor table. */ 301 descs = vrh->vring.desc; 302 desc_max = vrh->vring.num; 303 up_next = -1; 304 305 /* You must want something! */ 306 if (WARN_ON(!riov && !wiov)) 307 return -EINVAL; 308 309 if (riov) 310 riov->i = riov->used = riov->consumed = 0; 311 if (wiov) 312 wiov->i = wiov->used = wiov->consumed = 0; 313 314 for (;;) { 315 void *addr; 316 struct vringh_kiov *iov; 317 size_t len; 318 319 if (unlikely(slow)) 320 err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, 321 &slowrange, copy); 322 else 323 err = copy(vrh, &desc, &descs[i], sizeof(desc)); 324 if (unlikely(err)) 325 goto fail; 326 327 if (unlikely(desc.flags & 328 cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { 329 u64 a = vringh64_to_cpu(vrh, desc.addr); 330 331 /* Make sure it's OK, and get offset. */ 332 len = vringh32_to_cpu(vrh, desc.len); 333 if (!rcheck(vrh, a, &len, &range, getrange)) { 334 err = -EINVAL; 335 goto fail; 336 } 337 338 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 339 slow = true; 340 /* We need to save this range to use offset */ 341 slowrange = range; 342 } 343 344 addr = (void *)(long)(a + range.offset); 345 err = move_to_indirect(vrh, &up_next, &i, addr, &desc, 346 &descs, &desc_max); 347 if (err) 348 goto fail; 349 continue; 350 } 351 352 if (up_next == -1) 353 count++; 354 else 355 indirect_count++; 356 357 if (count > vrh->vring.num || indirect_count > desc_max) { 358 vringh_bad("Descriptor loop in %p", descs); 359 err = -ELOOP; 360 goto fail; 361 } 362 363 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) 364 iov = wiov; 365 else { 366 iov = riov; 367 if (unlikely(wiov && wiov->used)) { 368 vringh_bad("Readable desc %p after writable", 369 &descs[i]); 370 err = -EINVAL; 371 goto fail; 372 } 373 } 374 375 if (!iov) { 376 vringh_bad("Unexpected %s desc", 377 !wiov ? "writable" : "readable"); 378 err = -EPROTO; 379 goto fail; 380 } 381 382 again: 383 /* Make sure it's OK, and get offset. */ 384 len = vringh32_to_cpu(vrh, desc.len); 385 if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, 386 getrange)) { 387 err = -EINVAL; 388 goto fail; 389 } 390 addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + 391 range.offset); 392 393 if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { 394 err = resize_iovec(iov, gfp); 395 if (err) 396 goto fail; 397 } 398 399 iov->iov[iov->used].iov_base = addr; 400 iov->iov[iov->used].iov_len = len; 401 iov->used++; 402 403 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 404 desc.len = cpu_to_vringh32(vrh, 405 vringh32_to_cpu(vrh, desc.len) - len); 406 desc.addr = cpu_to_vringh64(vrh, 407 vringh64_to_cpu(vrh, desc.addr) + len); 408 goto again; 409 } 410 411 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { 412 i = vringh16_to_cpu(vrh, desc.next); 413 } else { 414 /* Just in case we need to finish traversing above. */ 415 if (unlikely(up_next > 0)) { 416 i = return_from_indirect(vrh, &up_next, 417 &descs, &desc_max); 418 slow = false; 419 indirect_count = 0; 420 } else 421 break; 422 } 423 424 if (i >= desc_max) { 425 vringh_bad("Chained index %u > %u", i, desc_max); 426 err = -EINVAL; 427 goto fail; 428 } 429 } 430 431 return 0; 432 433 fail: 434 return err; 435 } 436 437 static inline int __vringh_complete(struct vringh *vrh, 438 const struct vring_used_elem *used, 439 unsigned int num_used, 440 int (*putu16)(const struct vringh *vrh, 441 __virtio16 *p, u16 val), 442 int (*putused)(const struct vringh *vrh, 443 struct vring_used_elem *dst, 444 const struct vring_used_elem 445 *src, unsigned num)) 446 { 447 struct vring_used *used_ring; 448 int err; 449 u16 used_idx, off; 450 451 used_ring = vrh->vring.used; 452 used_idx = vrh->last_used_idx + vrh->completed; 453 454 off = used_idx % vrh->vring.num; 455 456 /* Compiler knows num_used == 1 sometimes, hence extra check */ 457 if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { 458 u16 part = vrh->vring.num - off; 459 err = putused(vrh, &used_ring->ring[off], used, part); 460 if (!err) 461 err = putused(vrh, &used_ring->ring[0], used + part, 462 num_used - part); 463 } else 464 err = putused(vrh, &used_ring->ring[off], used, num_used); 465 466 if (err) { 467 vringh_bad("Failed to write %u used entries %u at %p", 468 num_used, off, &used_ring->ring[off]); 469 return err; 470 } 471 472 /* Make sure buffer is written before we update index. */ 473 virtio_wmb(vrh->weak_barriers); 474 475 err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); 476 if (err) { 477 vringh_bad("Failed to update used index at %p", 478 &vrh->vring.used->idx); 479 return err; 480 } 481 482 vrh->completed += num_used; 483 return 0; 484 } 485 486 487 static inline int __vringh_need_notify(struct vringh *vrh, 488 int (*getu16)(const struct vringh *vrh, 489 u16 *val, 490 const __virtio16 *p)) 491 { 492 bool notify; 493 u16 used_event; 494 int err; 495 496 /* Flush out used index update. This is paired with the 497 * barrier that the Guest executes when enabling 498 * interrupts. */ 499 virtio_mb(vrh->weak_barriers); 500 501 /* Old-style, without event indices. */ 502 if (!vrh->event_indices) { 503 u16 flags; 504 err = getu16(vrh, &flags, &vrh->vring.avail->flags); 505 if (err) { 506 vringh_bad("Failed to get flags at %p", 507 &vrh->vring.avail->flags); 508 return err; 509 } 510 return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); 511 } 512 513 /* Modern: we know when other side wants to know. */ 514 err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); 515 if (err) { 516 vringh_bad("Failed to get used event idx at %p", 517 &vring_used_event(&vrh->vring)); 518 return err; 519 } 520 521 /* Just in case we added so many that we wrap. */ 522 if (unlikely(vrh->completed > 0xffff)) 523 notify = true; 524 else 525 notify = vring_need_event(used_event, 526 vrh->last_used_idx + vrh->completed, 527 vrh->last_used_idx); 528 529 vrh->last_used_idx += vrh->completed; 530 vrh->completed = 0; 531 return notify; 532 } 533 534 static inline bool __vringh_notify_enable(struct vringh *vrh, 535 int (*getu16)(const struct vringh *vrh, 536 u16 *val, const __virtio16 *p), 537 int (*putu16)(const struct vringh *vrh, 538 __virtio16 *p, u16 val)) 539 { 540 u16 avail; 541 542 if (!vrh->event_indices) { 543 /* Old-school; update flags. */ 544 if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { 545 vringh_bad("Clearing used flags %p", 546 &vrh->vring.used->flags); 547 return true; 548 } 549 } else { 550 if (putu16(vrh, &vring_avail_event(&vrh->vring), 551 vrh->last_avail_idx) != 0) { 552 vringh_bad("Updating avail event index %p", 553 &vring_avail_event(&vrh->vring)); 554 return true; 555 } 556 } 557 558 /* They could have slipped one in as we were doing that: make 559 * sure it's written, then check again. */ 560 virtio_mb(vrh->weak_barriers); 561 562 if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { 563 vringh_bad("Failed to check avail idx at %p", 564 &vrh->vring.avail->idx); 565 return true; 566 } 567 568 /* This is unlikely, so we just leave notifications enabled 569 * (if we're using event_indices, we'll only get one 570 * notification anyway). */ 571 return avail == vrh->last_avail_idx; 572 } 573 574 static inline void __vringh_notify_disable(struct vringh *vrh, 575 int (*putu16)(const struct vringh *vrh, 576 __virtio16 *p, u16 val)) 577 { 578 if (!vrh->event_indices) { 579 /* Old-school; update flags. */ 580 if (putu16(vrh, &vrh->vring.used->flags, 581 VRING_USED_F_NO_NOTIFY)) { 582 vringh_bad("Setting used flags %p", 583 &vrh->vring.used->flags); 584 } 585 } 586 } 587 588 /* Userspace access helpers: in this case, addresses are really userspace. */ 589 static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) 590 { 591 __virtio16 v = 0; 592 int rc = get_user(v, (__force __virtio16 __user *)p); 593 *val = vringh16_to_cpu(vrh, v); 594 return rc; 595 } 596 597 static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) 598 { 599 __virtio16 v = cpu_to_vringh16(vrh, val); 600 return put_user(v, (__force __virtio16 __user *)p); 601 } 602 603 static inline int copydesc_user(const struct vringh *vrh, 604 void *dst, const void *src, size_t len) 605 { 606 return copy_from_user(dst, (__force void __user *)src, len) ? 607 -EFAULT : 0; 608 } 609 610 static inline int putused_user(const struct vringh *vrh, 611 struct vring_used_elem *dst, 612 const struct vring_used_elem *src, 613 unsigned int num) 614 { 615 return copy_to_user((__force void __user *)dst, src, 616 sizeof(*dst) * num) ? -EFAULT : 0; 617 } 618 619 static inline int xfer_from_user(const struct vringh *vrh, void *src, 620 void *dst, size_t len) 621 { 622 return copy_from_user(dst, (__force void __user *)src, len) ? 623 -EFAULT : 0; 624 } 625 626 static inline int xfer_to_user(const struct vringh *vrh, 627 void *dst, void *src, size_t len) 628 { 629 return copy_to_user((__force void __user *)dst, src, len) ? 630 -EFAULT : 0; 631 } 632 633 /** 634 * vringh_init_user - initialize a vringh for a userspace vring. 635 * @vrh: the vringh to initialize. 636 * @features: the feature bits for this ring. 637 * @num: the number of elements. 638 * @weak_barriers: true if we only need memory barriers, not I/O. 639 * @desc: the userspace descriptor pointer. 640 * @avail: the userspace avail pointer. 641 * @used: the userspace used pointer. 642 * 643 * Returns an error if num is invalid: you should check pointers 644 * yourself! 645 */ 646 int vringh_init_user(struct vringh *vrh, u64 features, 647 unsigned int num, bool weak_barriers, 648 vring_desc_t __user *desc, 649 vring_avail_t __user *avail, 650 vring_used_t __user *used) 651 { 652 /* Sane power of 2 please! */ 653 if (!num || num > 0xffff || (num & (num - 1))) { 654 vringh_bad("Bad ring size %u", num); 655 return -EINVAL; 656 } 657 658 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 659 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 660 vrh->weak_barriers = weak_barriers; 661 vrh->completed = 0; 662 vrh->last_avail_idx = 0; 663 vrh->last_used_idx = 0; 664 vrh->vring.num = num; 665 /* vring expects kernel addresses, but only used via accessors. */ 666 vrh->vring.desc = (__force struct vring_desc *)desc; 667 vrh->vring.avail = (__force struct vring_avail *)avail; 668 vrh->vring.used = (__force struct vring_used *)used; 669 return 0; 670 } 671 EXPORT_SYMBOL(vringh_init_user); 672 673 /** 674 * vringh_getdesc_user - get next available descriptor from userspace ring. 675 * @vrh: the userspace vring. 676 * @riov: where to put the readable descriptors (or NULL) 677 * @wiov: where to put the writable descriptors (or NULL) 678 * @getrange: function to call to check ranges. 679 * @head: head index we received, for passing to vringh_complete_user(). 680 * 681 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 682 * 683 * Note that on error return, you can tell the difference between an 684 * invalid ring and a single invalid descriptor: in the former case, 685 * *head will be vrh->vring.num. You may be able to ignore an invalid 686 * descriptor, but there's not much you can do with an invalid ring. 687 * 688 * Note that you can reuse riov and wiov with subsequent calls. Content is 689 * overwritten and memory reallocated if more space is needed. 690 * When you don't have to use riov and wiov anymore, you should clean up them 691 * calling vringh_iov_cleanup() to release the memory, even on error! 692 */ 693 int vringh_getdesc_user(struct vringh *vrh, 694 struct vringh_iov *riov, 695 struct vringh_iov *wiov, 696 bool (*getrange)(struct vringh *vrh, 697 u64 addr, struct vringh_range *r), 698 u16 *head) 699 { 700 int err; 701 702 *head = vrh->vring.num; 703 err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); 704 if (err < 0) 705 return err; 706 707 /* Empty... */ 708 if (err == vrh->vring.num) 709 return 0; 710 711 /* We need the layouts to be the identical for this to work */ 712 BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); 713 BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != 714 offsetof(struct vringh_iov, iov)); 715 BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != 716 offsetof(struct vringh_iov, i)); 717 BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != 718 offsetof(struct vringh_iov, used)); 719 BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != 720 offsetof(struct vringh_iov, max_num)); 721 BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 722 BUILD_BUG_ON(offsetof(struct iovec, iov_base) != 723 offsetof(struct kvec, iov_base)); 724 BUILD_BUG_ON(offsetof(struct iovec, iov_len) != 725 offsetof(struct kvec, iov_len)); 726 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) 727 != sizeof(((struct kvec *)NULL)->iov_base)); 728 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) 729 != sizeof(((struct kvec *)NULL)->iov_len)); 730 731 *head = err; 732 err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, 733 (struct vringh_kiov *)wiov, 734 range_check, getrange, GFP_KERNEL, copydesc_user); 735 if (err) 736 return err; 737 738 return 1; 739 } 740 EXPORT_SYMBOL(vringh_getdesc_user); 741 742 /** 743 * vringh_iov_pull_user - copy bytes from vring_iov. 744 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) 745 * @dst: the place to copy. 746 * @len: the maximum length to copy. 747 * 748 * Returns the bytes copied <= len or a negative errno. 749 */ 750 ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) 751 { 752 return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, 753 dst, len, xfer_from_user); 754 } 755 EXPORT_SYMBOL(vringh_iov_pull_user); 756 757 /** 758 * vringh_iov_push_user - copy bytes into vring_iov. 759 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) 760 * @src: the place to copy from. 761 * @len: the maximum length to copy. 762 * 763 * Returns the bytes copied <= len or a negative errno. 764 */ 765 ssize_t vringh_iov_push_user(struct vringh_iov *wiov, 766 const void *src, size_t len) 767 { 768 return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, 769 (void *)src, len, xfer_to_user); 770 } 771 EXPORT_SYMBOL(vringh_iov_push_user); 772 773 /** 774 * vringh_abandon_user - we've decided not to handle the descriptor(s). 775 * @vrh: the vring. 776 * @num: the number of descriptors to put back (ie. num 777 * vringh_get_user() to undo). 778 * 779 * The next vringh_get_user() will return the old descriptor(s) again. 780 */ 781 void vringh_abandon_user(struct vringh *vrh, unsigned int num) 782 { 783 /* We only update vring_avail_event(vr) when we want to be notified, 784 * so we haven't changed that yet. */ 785 vrh->last_avail_idx -= num; 786 } 787 EXPORT_SYMBOL(vringh_abandon_user); 788 789 /** 790 * vringh_complete_user - we've finished with descriptor, publish it. 791 * @vrh: the vring. 792 * @head: the head as filled in by vringh_getdesc_user. 793 * @len: the length of data we have written. 794 * 795 * You should check vringh_need_notify_user() after one or more calls 796 * to this function. 797 */ 798 int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) 799 { 800 struct vring_used_elem used; 801 802 used.id = cpu_to_vringh32(vrh, head); 803 used.len = cpu_to_vringh32(vrh, len); 804 return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); 805 } 806 EXPORT_SYMBOL(vringh_complete_user); 807 808 /** 809 * vringh_complete_multi_user - we've finished with many descriptors. 810 * @vrh: the vring. 811 * @used: the head, length pairs. 812 * @num_used: the number of used elements. 813 * 814 * You should check vringh_need_notify_user() after one or more calls 815 * to this function. 816 */ 817 int vringh_complete_multi_user(struct vringh *vrh, 818 const struct vring_used_elem used[], 819 unsigned num_used) 820 { 821 return __vringh_complete(vrh, used, num_used, 822 putu16_user, putused_user); 823 } 824 EXPORT_SYMBOL(vringh_complete_multi_user); 825 826 /** 827 * vringh_notify_enable_user - we want to know if something changes. 828 * @vrh: the vring. 829 * 830 * This always enables notifications, but returns false if there are 831 * now more buffers available in the vring. 832 */ 833 bool vringh_notify_enable_user(struct vringh *vrh) 834 { 835 return __vringh_notify_enable(vrh, getu16_user, putu16_user); 836 } 837 EXPORT_SYMBOL(vringh_notify_enable_user); 838 839 /** 840 * vringh_notify_disable_user - don't tell us if something changes. 841 * @vrh: the vring. 842 * 843 * This is our normal running state: we disable and then only enable when 844 * we're going to sleep. 845 */ 846 void vringh_notify_disable_user(struct vringh *vrh) 847 { 848 __vringh_notify_disable(vrh, putu16_user); 849 } 850 EXPORT_SYMBOL(vringh_notify_disable_user); 851 852 /** 853 * vringh_need_notify_user - must we tell the other side about used buffers? 854 * @vrh: the vring we've called vringh_complete_user() on. 855 * 856 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 857 */ 858 int vringh_need_notify_user(struct vringh *vrh) 859 { 860 return __vringh_need_notify(vrh, getu16_user); 861 } 862 EXPORT_SYMBOL(vringh_need_notify_user); 863 864 /* Kernelspace access helpers. */ 865 static inline int getu16_kern(const struct vringh *vrh, 866 u16 *val, const __virtio16 *p) 867 { 868 *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); 869 return 0; 870 } 871 872 static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) 873 { 874 WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); 875 return 0; 876 } 877 878 static inline int copydesc_kern(const struct vringh *vrh, 879 void *dst, const void *src, size_t len) 880 { 881 memcpy(dst, src, len); 882 return 0; 883 } 884 885 static inline int putused_kern(const struct vringh *vrh, 886 struct vring_used_elem *dst, 887 const struct vring_used_elem *src, 888 unsigned int num) 889 { 890 memcpy(dst, src, num * sizeof(*dst)); 891 return 0; 892 } 893 894 static inline int xfer_kern(const struct vringh *vrh, void *src, 895 void *dst, size_t len) 896 { 897 memcpy(dst, src, len); 898 return 0; 899 } 900 901 static inline int kern_xfer(const struct vringh *vrh, void *dst, 902 void *src, size_t len) 903 { 904 memcpy(dst, src, len); 905 return 0; 906 } 907 908 /** 909 * vringh_init_kern - initialize a vringh for a kernelspace vring. 910 * @vrh: the vringh to initialize. 911 * @features: the feature bits for this ring. 912 * @num: the number of elements. 913 * @weak_barriers: true if we only need memory barriers, not I/O. 914 * @desc: the userspace descriptor pointer. 915 * @avail: the userspace avail pointer. 916 * @used: the userspace used pointer. 917 * 918 * Returns an error if num is invalid. 919 */ 920 int vringh_init_kern(struct vringh *vrh, u64 features, 921 unsigned int num, bool weak_barriers, 922 struct vring_desc *desc, 923 struct vring_avail *avail, 924 struct vring_used *used) 925 { 926 /* Sane power of 2 please! */ 927 if (!num || num > 0xffff || (num & (num - 1))) { 928 vringh_bad("Bad ring size %u", num); 929 return -EINVAL; 930 } 931 932 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 933 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 934 vrh->weak_barriers = weak_barriers; 935 vrh->completed = 0; 936 vrh->last_avail_idx = 0; 937 vrh->last_used_idx = 0; 938 vrh->vring.num = num; 939 vrh->vring.desc = desc; 940 vrh->vring.avail = avail; 941 vrh->vring.used = used; 942 return 0; 943 } 944 EXPORT_SYMBOL(vringh_init_kern); 945 946 /** 947 * vringh_getdesc_kern - get next available descriptor from kernelspace ring. 948 * @vrh: the kernelspace vring. 949 * @riov: where to put the readable descriptors (or NULL) 950 * @wiov: where to put the writable descriptors (or NULL) 951 * @head: head index we received, for passing to vringh_complete_kern(). 952 * @gfp: flags for allocating larger riov/wiov. 953 * 954 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 955 * 956 * Note that on error return, you can tell the difference between an 957 * invalid ring and a single invalid descriptor: in the former case, 958 * *head will be vrh->vring.num. You may be able to ignore an invalid 959 * descriptor, but there's not much you can do with an invalid ring. 960 * 961 * Note that you can reuse riov and wiov with subsequent calls. Content is 962 * overwritten and memory reallocated if more space is needed. 963 * When you don't have to use riov and wiov anymore, you should clean up them 964 * calling vringh_kiov_cleanup() to release the memory, even on error! 965 */ 966 int vringh_getdesc_kern(struct vringh *vrh, 967 struct vringh_kiov *riov, 968 struct vringh_kiov *wiov, 969 u16 *head, 970 gfp_t gfp) 971 { 972 int err; 973 974 err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); 975 if (err < 0) 976 return err; 977 978 /* Empty... */ 979 if (err == vrh->vring.num) 980 return 0; 981 982 *head = err; 983 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 984 gfp, copydesc_kern); 985 if (err) 986 return err; 987 988 return 1; 989 } 990 EXPORT_SYMBOL(vringh_getdesc_kern); 991 992 /** 993 * vringh_iov_pull_kern - copy bytes from vring_iov. 994 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) 995 * @dst: the place to copy. 996 * @len: the maximum length to copy. 997 * 998 * Returns the bytes copied <= len or a negative errno. 999 */ 1000 ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) 1001 { 1002 return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); 1003 } 1004 EXPORT_SYMBOL(vringh_iov_pull_kern); 1005 1006 /** 1007 * vringh_iov_push_kern - copy bytes into vring_iov. 1008 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) 1009 * @src: the place to copy from. 1010 * @len: the maximum length to copy. 1011 * 1012 * Returns the bytes copied <= len or a negative errno. 1013 */ 1014 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, 1015 const void *src, size_t len) 1016 { 1017 return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); 1018 } 1019 EXPORT_SYMBOL(vringh_iov_push_kern); 1020 1021 /** 1022 * vringh_abandon_kern - we've decided not to handle the descriptor(s). 1023 * @vrh: the vring. 1024 * @num: the number of descriptors to put back (ie. num 1025 * vringh_get_kern() to undo). 1026 * 1027 * The next vringh_get_kern() will return the old descriptor(s) again. 1028 */ 1029 void vringh_abandon_kern(struct vringh *vrh, unsigned int num) 1030 { 1031 /* We only update vring_avail_event(vr) when we want to be notified, 1032 * so we haven't changed that yet. */ 1033 vrh->last_avail_idx -= num; 1034 } 1035 EXPORT_SYMBOL(vringh_abandon_kern); 1036 1037 /** 1038 * vringh_complete_kern - we've finished with descriptor, publish it. 1039 * @vrh: the vring. 1040 * @head: the head as filled in by vringh_getdesc_kern. 1041 * @len: the length of data we have written. 1042 * 1043 * You should check vringh_need_notify_kern() after one or more calls 1044 * to this function. 1045 */ 1046 int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) 1047 { 1048 struct vring_used_elem used; 1049 1050 used.id = cpu_to_vringh32(vrh, head); 1051 used.len = cpu_to_vringh32(vrh, len); 1052 1053 return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); 1054 } 1055 EXPORT_SYMBOL(vringh_complete_kern); 1056 1057 /** 1058 * vringh_notify_enable_kern - we want to know if something changes. 1059 * @vrh: the vring. 1060 * 1061 * This always enables notifications, but returns false if there are 1062 * now more buffers available in the vring. 1063 */ 1064 bool vringh_notify_enable_kern(struct vringh *vrh) 1065 { 1066 return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); 1067 } 1068 EXPORT_SYMBOL(vringh_notify_enable_kern); 1069 1070 /** 1071 * vringh_notify_disable_kern - don't tell us if something changes. 1072 * @vrh: the vring. 1073 * 1074 * This is our normal running state: we disable and then only enable when 1075 * we're going to sleep. 1076 */ 1077 void vringh_notify_disable_kern(struct vringh *vrh) 1078 { 1079 __vringh_notify_disable(vrh, putu16_kern); 1080 } 1081 EXPORT_SYMBOL(vringh_notify_disable_kern); 1082 1083 /** 1084 * vringh_need_notify_kern - must we tell the other side about used buffers? 1085 * @vrh: the vring we've called vringh_complete_kern() on. 1086 * 1087 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1088 */ 1089 int vringh_need_notify_kern(struct vringh *vrh) 1090 { 1091 return __vringh_need_notify(vrh, getu16_kern); 1092 } 1093 EXPORT_SYMBOL(vringh_need_notify_kern); 1094 1095 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 1096 1097 struct iotlb_vec { 1098 union { 1099 struct iovec *iovec; 1100 struct bio_vec *bvec; 1101 } iov; 1102 size_t count; 1103 }; 1104 1105 static int iotlb_translate(const struct vringh *vrh, 1106 u64 addr, u64 len, u64 *translated, 1107 struct iotlb_vec *ivec, u32 perm) 1108 { 1109 struct vhost_iotlb_map *map; 1110 struct vhost_iotlb *iotlb = vrh->iotlb; 1111 int ret = 0; 1112 u64 s = 0, last = addr + len - 1; 1113 1114 spin_lock(vrh->iotlb_lock); 1115 1116 while (len > s) { 1117 uintptr_t io_addr; 1118 size_t io_len; 1119 u64 size; 1120 1121 if (unlikely(ret >= ivec->count)) { 1122 ret = -ENOBUFS; 1123 break; 1124 } 1125 1126 map = vhost_iotlb_itree_first(iotlb, addr, last); 1127 if (!map || map->start > addr) { 1128 ret = -EINVAL; 1129 break; 1130 } else if (!(map->perm & perm)) { 1131 ret = -EPERM; 1132 break; 1133 } 1134 1135 size = map->size - addr + map->start; 1136 io_len = min(len - s, size); 1137 io_addr = map->addr - map->start + addr; 1138 1139 if (vrh->use_va) { 1140 struct iovec *iovec = ivec->iov.iovec; 1141 1142 iovec[ret].iov_len = io_len; 1143 iovec[ret].iov_base = (void __user *)io_addr; 1144 } else { 1145 u64 pfn = io_addr >> PAGE_SHIFT; 1146 struct bio_vec *bvec = ivec->iov.bvec; 1147 1148 bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len, 1149 io_addr & (PAGE_SIZE - 1)); 1150 } 1151 1152 s += size; 1153 addr += size; 1154 ++ret; 1155 } 1156 1157 spin_unlock(vrh->iotlb_lock); 1158 1159 if (translated) 1160 *translated = min(len, s); 1161 1162 return ret; 1163 } 1164 1165 #define IOTLB_IOV_STRIDE 16 1166 1167 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 1168 void *src, size_t len) 1169 { 1170 struct iotlb_vec ivec; 1171 union { 1172 struct iovec iovec[IOTLB_IOV_STRIDE]; 1173 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1174 } iov; 1175 u64 total_translated = 0; 1176 1177 ivec.iov.iovec = iov.iovec; 1178 ivec.count = IOTLB_IOV_STRIDE; 1179 1180 while (total_translated < len) { 1181 struct iov_iter iter; 1182 u64 translated; 1183 int ret; 1184 1185 ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 1186 len - total_translated, &translated, 1187 &ivec, VHOST_MAP_RO); 1188 if (ret == -ENOBUFS) 1189 ret = IOTLB_IOV_STRIDE; 1190 else if (ret < 0) 1191 return ret; 1192 1193 if (vrh->use_va) { 1194 iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret, 1195 translated); 1196 } else { 1197 iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret, 1198 translated); 1199 } 1200 1201 ret = copy_from_iter(dst, translated, &iter); 1202 if (ret < 0) 1203 return ret; 1204 1205 src += translated; 1206 dst += translated; 1207 total_translated += translated; 1208 } 1209 1210 return total_translated; 1211 } 1212 1213 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 1214 void *src, size_t len) 1215 { 1216 struct iotlb_vec ivec; 1217 union { 1218 struct iovec iovec[IOTLB_IOV_STRIDE]; 1219 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1220 } iov; 1221 u64 total_translated = 0; 1222 1223 ivec.iov.iovec = iov.iovec; 1224 ivec.count = IOTLB_IOV_STRIDE; 1225 1226 while (total_translated < len) { 1227 struct iov_iter iter; 1228 u64 translated; 1229 int ret; 1230 1231 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 1232 len - total_translated, &translated, 1233 &ivec, VHOST_MAP_WO); 1234 if (ret == -ENOBUFS) 1235 ret = IOTLB_IOV_STRIDE; 1236 else if (ret < 0) 1237 return ret; 1238 1239 if (vrh->use_va) { 1240 iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret, 1241 translated); 1242 } else { 1243 iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret, 1244 translated); 1245 } 1246 1247 ret = copy_to_iter(src, translated, &iter); 1248 if (ret < 0) 1249 return ret; 1250 1251 src += translated; 1252 dst += translated; 1253 total_translated += translated; 1254 } 1255 1256 return total_translated; 1257 } 1258 1259 static inline int getu16_iotlb(const struct vringh *vrh, 1260 u16 *val, const __virtio16 *p) 1261 { 1262 struct iotlb_vec ivec; 1263 union { 1264 struct iovec iovec[1]; 1265 struct bio_vec bvec[1]; 1266 } iov; 1267 __virtio16 tmp; 1268 int ret; 1269 1270 ivec.iov.iovec = iov.iovec; 1271 ivec.count = 1; 1272 1273 /* Atomic read is needed for getu16 */ 1274 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1275 NULL, &ivec, VHOST_MAP_RO); 1276 if (ret < 0) 1277 return ret; 1278 1279 if (vrh->use_va) { 1280 ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1281 if (ret) 1282 return ret; 1283 } else { 1284 void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1285 void *from = kaddr + ivec.iov.bvec[0].bv_offset; 1286 1287 tmp = READ_ONCE(*(__virtio16 *)from); 1288 kunmap_local(kaddr); 1289 } 1290 1291 *val = vringh16_to_cpu(vrh, tmp); 1292 1293 return 0; 1294 } 1295 1296 static inline int putu16_iotlb(const struct vringh *vrh, 1297 __virtio16 *p, u16 val) 1298 { 1299 struct iotlb_vec ivec; 1300 union { 1301 struct iovec iovec; 1302 struct bio_vec bvec; 1303 } iov; 1304 __virtio16 tmp; 1305 int ret; 1306 1307 ivec.iov.iovec = &iov.iovec; 1308 ivec.count = 1; 1309 1310 /* Atomic write is needed for putu16 */ 1311 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1312 NULL, &ivec, VHOST_MAP_RO); 1313 if (ret < 0) 1314 return ret; 1315 1316 tmp = cpu_to_vringh16(vrh, val); 1317 1318 if (vrh->use_va) { 1319 ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1320 if (ret) 1321 return ret; 1322 } else { 1323 void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1324 void *to = kaddr + ivec.iov.bvec[0].bv_offset; 1325 1326 WRITE_ONCE(*(__virtio16 *)to, tmp); 1327 kunmap_local(kaddr); 1328 } 1329 1330 return 0; 1331 } 1332 1333 static inline int copydesc_iotlb(const struct vringh *vrh, 1334 void *dst, const void *src, size_t len) 1335 { 1336 int ret; 1337 1338 ret = copy_from_iotlb(vrh, dst, (void *)src, len); 1339 if (ret != len) 1340 return -EFAULT; 1341 1342 return 0; 1343 } 1344 1345 static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, 1346 void *dst, size_t len) 1347 { 1348 int ret; 1349 1350 ret = copy_from_iotlb(vrh, dst, src, len); 1351 if (ret != len) 1352 return -EFAULT; 1353 1354 return 0; 1355 } 1356 1357 static inline int xfer_to_iotlb(const struct vringh *vrh, 1358 void *dst, void *src, size_t len) 1359 { 1360 int ret; 1361 1362 ret = copy_to_iotlb(vrh, dst, src, len); 1363 if (ret != len) 1364 return -EFAULT; 1365 1366 return 0; 1367 } 1368 1369 static inline int putused_iotlb(const struct vringh *vrh, 1370 struct vring_used_elem *dst, 1371 const struct vring_used_elem *src, 1372 unsigned int num) 1373 { 1374 int size = num * sizeof(*dst); 1375 int ret; 1376 1377 ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); 1378 if (ret != size) 1379 return -EFAULT; 1380 1381 return 0; 1382 } 1383 1384 /** 1385 * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. 1386 * @vrh: the vringh to initialize. 1387 * @features: the feature bits for this ring. 1388 * @num: the number of elements. 1389 * @weak_barriers: true if we only need memory barriers, not I/O. 1390 * @desc: the userspace descriptor pointer. 1391 * @avail: the userspace avail pointer. 1392 * @used: the userspace used pointer. 1393 * 1394 * Returns an error if num is invalid. 1395 */ 1396 int vringh_init_iotlb(struct vringh *vrh, u64 features, 1397 unsigned int num, bool weak_barriers, 1398 struct vring_desc *desc, 1399 struct vring_avail *avail, 1400 struct vring_used *used) 1401 { 1402 vrh->use_va = false; 1403 1404 return vringh_init_kern(vrh, features, num, weak_barriers, 1405 desc, avail, used); 1406 } 1407 EXPORT_SYMBOL(vringh_init_iotlb); 1408 1409 /** 1410 * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing 1411 * user VA. 1412 * @vrh: the vringh to initialize. 1413 * @features: the feature bits for this ring. 1414 * @num: the number of elements. 1415 * @weak_barriers: true if we only need memory barriers, not I/O. 1416 * @desc: the userspace descriptor pointer. 1417 * @avail: the userspace avail pointer. 1418 * @used: the userspace used pointer. 1419 * 1420 * Returns an error if num is invalid. 1421 */ 1422 int vringh_init_iotlb_va(struct vringh *vrh, u64 features, 1423 unsigned int num, bool weak_barriers, 1424 struct vring_desc *desc, 1425 struct vring_avail *avail, 1426 struct vring_used *used) 1427 { 1428 vrh->use_va = true; 1429 1430 return vringh_init_kern(vrh, features, num, weak_barriers, 1431 desc, avail, used); 1432 } 1433 EXPORT_SYMBOL(vringh_init_iotlb_va); 1434 1435 /** 1436 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. 1437 * @vrh: the vring 1438 * @iotlb: iotlb associated with this vring 1439 * @iotlb_lock: spinlock to synchronize the iotlb accesses 1440 */ 1441 void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, 1442 spinlock_t *iotlb_lock) 1443 { 1444 vrh->iotlb = iotlb; 1445 vrh->iotlb_lock = iotlb_lock; 1446 } 1447 EXPORT_SYMBOL(vringh_set_iotlb); 1448 1449 /** 1450 * vringh_getdesc_iotlb - get next available descriptor from ring with 1451 * IOTLB. 1452 * @vrh: the kernelspace vring. 1453 * @riov: where to put the readable descriptors (or NULL) 1454 * @wiov: where to put the writable descriptors (or NULL) 1455 * @head: head index we received, for passing to vringh_complete_iotlb(). 1456 * @gfp: flags for allocating larger riov/wiov. 1457 * 1458 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 1459 * 1460 * Note that on error return, you can tell the difference between an 1461 * invalid ring and a single invalid descriptor: in the former case, 1462 * *head will be vrh->vring.num. You may be able to ignore an invalid 1463 * descriptor, but there's not much you can do with an invalid ring. 1464 * 1465 * Note that you can reuse riov and wiov with subsequent calls. Content is 1466 * overwritten and memory reallocated if more space is needed. 1467 * When you don't have to use riov and wiov anymore, you should clean up them 1468 * calling vringh_kiov_cleanup() to release the memory, even on error! 1469 */ 1470 int vringh_getdesc_iotlb(struct vringh *vrh, 1471 struct vringh_kiov *riov, 1472 struct vringh_kiov *wiov, 1473 u16 *head, 1474 gfp_t gfp) 1475 { 1476 int err; 1477 1478 err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); 1479 if (err < 0) 1480 return err; 1481 1482 /* Empty... */ 1483 if (err == vrh->vring.num) 1484 return 0; 1485 1486 *head = err; 1487 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 1488 gfp, copydesc_iotlb); 1489 if (err) 1490 return err; 1491 1492 return 1; 1493 } 1494 EXPORT_SYMBOL(vringh_getdesc_iotlb); 1495 1496 /** 1497 * vringh_iov_pull_iotlb - copy bytes from vring_iov. 1498 * @vrh: the vring. 1499 * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) 1500 * @dst: the place to copy. 1501 * @len: the maximum length to copy. 1502 * 1503 * Returns the bytes copied <= len or a negative errno. 1504 */ 1505 ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 1506 struct vringh_kiov *riov, 1507 void *dst, size_t len) 1508 { 1509 return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); 1510 } 1511 EXPORT_SYMBOL(vringh_iov_pull_iotlb); 1512 1513 /** 1514 * vringh_iov_push_iotlb - copy bytes into vring_iov. 1515 * @vrh: the vring. 1516 * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) 1517 * @src: the place to copy from. 1518 * @len: the maximum length to copy. 1519 * 1520 * Returns the bytes copied <= len or a negative errno. 1521 */ 1522 ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 1523 struct vringh_kiov *wiov, 1524 const void *src, size_t len) 1525 { 1526 return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); 1527 } 1528 EXPORT_SYMBOL(vringh_iov_push_iotlb); 1529 1530 /** 1531 * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). 1532 * @vrh: the vring. 1533 * @num: the number of descriptors to put back (ie. num 1534 * vringh_get_iotlb() to undo). 1535 * 1536 * The next vringh_get_iotlb() will return the old descriptor(s) again. 1537 */ 1538 void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) 1539 { 1540 /* We only update vring_avail_event(vr) when we want to be notified, 1541 * so we haven't changed that yet. 1542 */ 1543 vrh->last_avail_idx -= num; 1544 } 1545 EXPORT_SYMBOL(vringh_abandon_iotlb); 1546 1547 /** 1548 * vringh_complete_iotlb - we've finished with descriptor, publish it. 1549 * @vrh: the vring. 1550 * @head: the head as filled in by vringh_getdesc_iotlb. 1551 * @len: the length of data we have written. 1552 * 1553 * You should check vringh_need_notify_iotlb() after one or more calls 1554 * to this function. 1555 */ 1556 int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) 1557 { 1558 struct vring_used_elem used; 1559 1560 used.id = cpu_to_vringh32(vrh, head); 1561 used.len = cpu_to_vringh32(vrh, len); 1562 1563 return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); 1564 } 1565 EXPORT_SYMBOL(vringh_complete_iotlb); 1566 1567 /** 1568 * vringh_notify_enable_iotlb - we want to know if something changes. 1569 * @vrh: the vring. 1570 * 1571 * This always enables notifications, but returns false if there are 1572 * now more buffers available in the vring. 1573 */ 1574 bool vringh_notify_enable_iotlb(struct vringh *vrh) 1575 { 1576 return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); 1577 } 1578 EXPORT_SYMBOL(vringh_notify_enable_iotlb); 1579 1580 /** 1581 * vringh_notify_disable_iotlb - don't tell us if something changes. 1582 * @vrh: the vring. 1583 * 1584 * This is our normal running state: we disable and then only enable when 1585 * we're going to sleep. 1586 */ 1587 void vringh_notify_disable_iotlb(struct vringh *vrh) 1588 { 1589 __vringh_notify_disable(vrh, putu16_iotlb); 1590 } 1591 EXPORT_SYMBOL(vringh_notify_disable_iotlb); 1592 1593 /** 1594 * vringh_need_notify_iotlb - must we tell the other side about used buffers? 1595 * @vrh: the vring we've called vringh_complete_iotlb() on. 1596 * 1597 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1598 */ 1599 int vringh_need_notify_iotlb(struct vringh *vrh) 1600 { 1601 return __vringh_need_notify(vrh, getu16_iotlb); 1602 } 1603 EXPORT_SYMBOL(vringh_need_notify_iotlb); 1604 1605 #endif 1606 1607 MODULE_LICENSE("GPL"); 1608