1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Helpers for the host side of a virtio ring. 4 * 5 * Since these may be in userspace, we use (inline) accessors. 6 */ 7 #include <linux/compiler.h> 8 #include <linux/module.h> 9 #include <linux/vringh.h> 10 #include <linux/virtio_ring.h> 11 #include <linux/kernel.h> 12 #include <linux/ratelimit.h> 13 #include <linux/uaccess.h> 14 #include <linux/slab.h> 15 #include <linux/export.h> 16 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 17 #include <linux/bvec.h> 18 #include <linux/highmem.h> 19 #include <linux/vhost_iotlb.h> 20 #endif 21 #include <uapi/linux/virtio_config.h> 22 23 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...) 24 { 25 static DEFINE_RATELIMIT_STATE(vringh_rs, 26 DEFAULT_RATELIMIT_INTERVAL, 27 DEFAULT_RATELIMIT_BURST); 28 if (__ratelimit(&vringh_rs)) { 29 va_list ap; 30 va_start(ap, fmt); 31 printk(KERN_NOTICE "vringh:"); 32 vprintk(fmt, ap); 33 va_end(ap); 34 } 35 } 36 37 /* Returns vring->num if empty, -ve on error. */ 38 static inline int __vringh_get_head(const struct vringh *vrh, 39 int (*getu16)(const struct vringh *vrh, 40 u16 *val, const __virtio16 *p), 41 u16 *last_avail_idx) 42 { 43 u16 avail_idx, i, head; 44 int err; 45 46 err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx); 47 if (err) { 48 vringh_bad("Failed to access avail idx at %p", 49 &vrh->vring.avail->idx); 50 return err; 51 } 52 53 if (*last_avail_idx == avail_idx) 54 return vrh->vring.num; 55 56 /* Only get avail ring entries after they have been exposed by guest. */ 57 virtio_rmb(vrh->weak_barriers); 58 59 i = *last_avail_idx & (vrh->vring.num - 1); 60 61 err = getu16(vrh, &head, &vrh->vring.avail->ring[i]); 62 if (err) { 63 vringh_bad("Failed to read head: idx %d address %p", 64 *last_avail_idx, &vrh->vring.avail->ring[i]); 65 return err; 66 } 67 68 if (head >= vrh->vring.num) { 69 vringh_bad("Guest says index %u > %u is available", 70 head, vrh->vring.num); 71 return -EINVAL; 72 } 73 74 (*last_avail_idx)++; 75 return head; 76 } 77 78 /** 79 * vringh_kiov_advance - skip bytes from vring_kiov 80 * @iov: an iov passed to vringh_getdesc_*() (updated as we consume) 81 * @len: the maximum length to advance 82 */ 83 void vringh_kiov_advance(struct vringh_kiov *iov, size_t len) 84 { 85 while (len && iov->i < iov->used) { 86 size_t partlen = min(iov->iov[iov->i].iov_len, len); 87 88 iov->consumed += partlen; 89 iov->iov[iov->i].iov_len -= partlen; 90 iov->iov[iov->i].iov_base += partlen; 91 92 if (!iov->iov[iov->i].iov_len) { 93 /* Fix up old iov element then increment. */ 94 iov->iov[iov->i].iov_len = iov->consumed; 95 iov->iov[iov->i].iov_base -= iov->consumed; 96 97 iov->consumed = 0; 98 iov->i++; 99 } 100 101 len -= partlen; 102 } 103 } 104 EXPORT_SYMBOL(vringh_kiov_advance); 105 106 /* Copy some bytes to/from the iovec. Returns num copied. */ 107 static inline ssize_t vringh_iov_xfer(struct vringh *vrh, 108 struct vringh_kiov *iov, 109 void *ptr, size_t len, 110 int (*xfer)(const struct vringh *vrh, 111 void *addr, void *ptr, 112 size_t len)) 113 { 114 int err, done = 0; 115 116 while (len && iov->i < iov->used) { 117 size_t partlen; 118 119 partlen = min(iov->iov[iov->i].iov_len, len); 120 err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen); 121 if (err) 122 return err; 123 done += partlen; 124 len -= partlen; 125 ptr += partlen; 126 iov->consumed += partlen; 127 iov->iov[iov->i].iov_len -= partlen; 128 iov->iov[iov->i].iov_base += partlen; 129 130 if (!iov->iov[iov->i].iov_len) { 131 /* Fix up old iov element then increment. */ 132 iov->iov[iov->i].iov_len = iov->consumed; 133 iov->iov[iov->i].iov_base -= iov->consumed; 134 135 iov->consumed = 0; 136 iov->i++; 137 } 138 } 139 return done; 140 } 141 142 /* May reduce *len if range is shorter. */ 143 static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len, 144 struct vringh_range *range, 145 bool (*getrange)(struct vringh *, 146 u64, struct vringh_range *)) 147 { 148 if (addr < range->start || addr > range->end_incl) { 149 if (!getrange(vrh, addr, range)) 150 return false; 151 } 152 BUG_ON(addr < range->start || addr > range->end_incl); 153 154 /* To end of memory? */ 155 if (unlikely(addr + *len == 0)) { 156 if (range->end_incl == -1ULL) 157 return true; 158 goto truncate; 159 } 160 161 /* Otherwise, don't wrap. */ 162 if (addr + *len < addr) { 163 vringh_bad("Wrapping descriptor %zu@0x%llx", 164 *len, (unsigned long long)addr); 165 return false; 166 } 167 168 if (unlikely(addr + *len - 1 > range->end_incl)) 169 goto truncate; 170 return true; 171 172 truncate: 173 *len = range->end_incl + 1 - addr; 174 return true; 175 } 176 177 static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len, 178 struct vringh_range *range, 179 bool (*getrange)(struct vringh *, 180 u64, struct vringh_range *)) 181 { 182 return true; 183 } 184 185 /* No reason for this code to be inline. */ 186 static int move_to_indirect(const struct vringh *vrh, 187 int *up_next, u16 *i, void *addr, 188 const struct vring_desc *desc, 189 struct vring_desc **descs, int *desc_max) 190 { 191 u32 len; 192 193 /* Indirect tables can't have indirect. */ 194 if (*up_next != -1) { 195 vringh_bad("Multilevel indirect %u->%u", *up_next, *i); 196 return -EINVAL; 197 } 198 199 len = vringh32_to_cpu(vrh, desc->len); 200 if (unlikely(len % sizeof(struct vring_desc))) { 201 vringh_bad("Strange indirect len %u", desc->len); 202 return -EINVAL; 203 } 204 205 /* We will check this when we follow it! */ 206 if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) 207 *up_next = vringh16_to_cpu(vrh, desc->next); 208 else 209 *up_next = -2; 210 *descs = addr; 211 *desc_max = len / sizeof(struct vring_desc); 212 213 /* Now, start at the first indirect. */ 214 *i = 0; 215 return 0; 216 } 217 218 static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp) 219 { 220 struct kvec *new; 221 unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2; 222 223 if (new_num < 8) 224 new_num = 8; 225 226 flag = (iov->max_num & VRINGH_IOV_ALLOCATED); 227 if (flag) 228 new = krealloc_array(iov->iov, new_num, 229 sizeof(struct iovec), gfp); 230 else { 231 new = kmalloc_array(new_num, sizeof(struct iovec), gfp); 232 if (new) { 233 memcpy(new, iov->iov, 234 iov->max_num * sizeof(struct iovec)); 235 flag = VRINGH_IOV_ALLOCATED; 236 } 237 } 238 if (!new) 239 return -ENOMEM; 240 iov->iov = new; 241 iov->max_num = (new_num | flag); 242 return 0; 243 } 244 245 static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next, 246 struct vring_desc **descs, int *desc_max) 247 { 248 u16 i = *up_next; 249 250 *up_next = -1; 251 *descs = vrh->vring.desc; 252 *desc_max = vrh->vring.num; 253 return i; 254 } 255 256 static int slow_copy(struct vringh *vrh, void *dst, const void *src, 257 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 258 struct vringh_range *range, 259 bool (*getrange)(struct vringh *vrh, 260 u64, 261 struct vringh_range *)), 262 bool (*getrange)(struct vringh *vrh, 263 u64 addr, 264 struct vringh_range *r), 265 struct vringh_range *range, 266 int (*copy)(const struct vringh *vrh, 267 void *dst, const void *src, size_t len)) 268 { 269 size_t part, len = sizeof(struct vring_desc); 270 271 do { 272 u64 addr; 273 int err; 274 275 part = len; 276 addr = (u64)(unsigned long)src - range->offset; 277 278 if (!rcheck(vrh, addr, &part, range, getrange)) 279 return -EINVAL; 280 281 err = copy(vrh, dst, src, part); 282 if (err) 283 return err; 284 285 dst += part; 286 src += part; 287 len -= part; 288 } while (len); 289 return 0; 290 } 291 292 static inline int 293 __vringh_iov(struct vringh *vrh, u16 i, 294 struct vringh_kiov *riov, 295 struct vringh_kiov *wiov, 296 bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len, 297 struct vringh_range *range, 298 bool (*getrange)(struct vringh *, u64, 299 struct vringh_range *)), 300 bool (*getrange)(struct vringh *, u64, struct vringh_range *), 301 gfp_t gfp, 302 int (*copy)(const struct vringh *vrh, 303 void *dst, const void *src, size_t len)) 304 { 305 int err, count = 0, indirect_count = 0, up_next, desc_max; 306 struct vring_desc desc, *descs; 307 struct vringh_range range = { -1ULL, 0 }, slowrange; 308 bool slow = false; 309 310 /* We start traversing vring's descriptor table. */ 311 descs = vrh->vring.desc; 312 desc_max = vrh->vring.num; 313 up_next = -1; 314 315 /* You must want something! */ 316 if (WARN_ON(!riov && !wiov)) 317 return -EINVAL; 318 319 if (riov) 320 riov->i = riov->used = riov->consumed = 0; 321 if (wiov) 322 wiov->i = wiov->used = wiov->consumed = 0; 323 324 for (;;) { 325 void *addr; 326 struct vringh_kiov *iov; 327 size_t len; 328 329 if (unlikely(slow)) 330 err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange, 331 &slowrange, copy); 332 else 333 err = copy(vrh, &desc, &descs[i], sizeof(desc)); 334 if (unlikely(err)) 335 goto fail; 336 337 if (unlikely(desc.flags & 338 cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) { 339 u64 a = vringh64_to_cpu(vrh, desc.addr); 340 341 /* Make sure it's OK, and get offset. */ 342 len = vringh32_to_cpu(vrh, desc.len); 343 if (!rcheck(vrh, a, &len, &range, getrange)) { 344 err = -EINVAL; 345 goto fail; 346 } 347 348 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 349 slow = true; 350 /* We need to save this range to use offset */ 351 slowrange = range; 352 } 353 354 addr = (void *)(long)(a + range.offset); 355 err = move_to_indirect(vrh, &up_next, &i, addr, &desc, 356 &descs, &desc_max); 357 if (err) 358 goto fail; 359 continue; 360 } 361 362 if (up_next == -1) 363 count++; 364 else 365 indirect_count++; 366 367 if (count > vrh->vring.num || indirect_count > desc_max) { 368 vringh_bad("Descriptor loop in %p", descs); 369 err = -ELOOP; 370 goto fail; 371 } 372 373 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE)) 374 iov = wiov; 375 else { 376 iov = riov; 377 if (unlikely(wiov && wiov->used)) { 378 vringh_bad("Readable desc %p after writable", 379 &descs[i]); 380 err = -EINVAL; 381 goto fail; 382 } 383 } 384 385 if (!iov) { 386 vringh_bad("Unexpected %s desc", 387 !wiov ? "writable" : "readable"); 388 err = -EPROTO; 389 goto fail; 390 } 391 392 again: 393 /* Make sure it's OK, and get offset. */ 394 len = vringh32_to_cpu(vrh, desc.len); 395 if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range, 396 getrange)) { 397 err = -EINVAL; 398 goto fail; 399 } 400 addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) + 401 range.offset); 402 403 if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) { 404 err = resize_iovec(iov, gfp); 405 if (err) 406 goto fail; 407 } 408 409 iov->iov[iov->used].iov_base = addr; 410 iov->iov[iov->used].iov_len = len; 411 iov->used++; 412 413 if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) { 414 desc.len = cpu_to_vringh32(vrh, 415 vringh32_to_cpu(vrh, desc.len) - len); 416 desc.addr = cpu_to_vringh64(vrh, 417 vringh64_to_cpu(vrh, desc.addr) + len); 418 goto again; 419 } 420 421 if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) { 422 i = vringh16_to_cpu(vrh, desc.next); 423 } else { 424 /* Just in case we need to finish traversing above. */ 425 if (unlikely(up_next > 0)) { 426 i = return_from_indirect(vrh, &up_next, 427 &descs, &desc_max); 428 slow = false; 429 indirect_count = 0; 430 } else 431 break; 432 } 433 434 if (i >= desc_max) { 435 vringh_bad("Chained index %u > %u", i, desc_max); 436 err = -EINVAL; 437 goto fail; 438 } 439 } 440 441 return 0; 442 443 fail: 444 return err; 445 } 446 447 static inline int __vringh_complete(struct vringh *vrh, 448 const struct vring_used_elem *used, 449 unsigned int num_used, 450 int (*putu16)(const struct vringh *vrh, 451 __virtio16 *p, u16 val), 452 int (*putused)(const struct vringh *vrh, 453 struct vring_used_elem *dst, 454 const struct vring_used_elem 455 *src, unsigned num)) 456 { 457 struct vring_used *used_ring; 458 int err; 459 u16 used_idx, off; 460 461 used_ring = vrh->vring.used; 462 used_idx = vrh->last_used_idx + vrh->completed; 463 464 off = used_idx % vrh->vring.num; 465 466 /* Compiler knows num_used == 1 sometimes, hence extra check */ 467 if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) { 468 u16 part = vrh->vring.num - off; 469 err = putused(vrh, &used_ring->ring[off], used, part); 470 if (!err) 471 err = putused(vrh, &used_ring->ring[0], used + part, 472 num_used - part); 473 } else 474 err = putused(vrh, &used_ring->ring[off], used, num_used); 475 476 if (err) { 477 vringh_bad("Failed to write %u used entries %u at %p", 478 num_used, off, &used_ring->ring[off]); 479 return err; 480 } 481 482 /* Make sure buffer is written before we update index. */ 483 virtio_wmb(vrh->weak_barriers); 484 485 err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used); 486 if (err) { 487 vringh_bad("Failed to update used index at %p", 488 &vrh->vring.used->idx); 489 return err; 490 } 491 492 vrh->completed += num_used; 493 return 0; 494 } 495 496 497 static inline int __vringh_need_notify(struct vringh *vrh, 498 int (*getu16)(const struct vringh *vrh, 499 u16 *val, 500 const __virtio16 *p)) 501 { 502 bool notify; 503 u16 used_event; 504 int err; 505 506 /* Flush out used index update. This is paired with the 507 * barrier that the Guest executes when enabling 508 * interrupts. */ 509 virtio_mb(vrh->weak_barriers); 510 511 /* Old-style, without event indices. */ 512 if (!vrh->event_indices) { 513 u16 flags; 514 err = getu16(vrh, &flags, &vrh->vring.avail->flags); 515 if (err) { 516 vringh_bad("Failed to get flags at %p", 517 &vrh->vring.avail->flags); 518 return err; 519 } 520 return (!(flags & VRING_AVAIL_F_NO_INTERRUPT)); 521 } 522 523 /* Modern: we know when other side wants to know. */ 524 err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring)); 525 if (err) { 526 vringh_bad("Failed to get used event idx at %p", 527 &vring_used_event(&vrh->vring)); 528 return err; 529 } 530 531 /* Just in case we added so many that we wrap. */ 532 if (unlikely(vrh->completed > 0xffff)) 533 notify = true; 534 else 535 notify = vring_need_event(used_event, 536 vrh->last_used_idx + vrh->completed, 537 vrh->last_used_idx); 538 539 vrh->last_used_idx += vrh->completed; 540 vrh->completed = 0; 541 return notify; 542 } 543 544 static inline bool __vringh_notify_enable(struct vringh *vrh, 545 int (*getu16)(const struct vringh *vrh, 546 u16 *val, const __virtio16 *p), 547 int (*putu16)(const struct vringh *vrh, 548 __virtio16 *p, u16 val)) 549 { 550 u16 avail; 551 552 if (!vrh->event_indices) { 553 /* Old-school; update flags. */ 554 if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) { 555 vringh_bad("Clearing used flags %p", 556 &vrh->vring.used->flags); 557 return true; 558 } 559 } else { 560 if (putu16(vrh, &vring_avail_event(&vrh->vring), 561 vrh->last_avail_idx) != 0) { 562 vringh_bad("Updating avail event index %p", 563 &vring_avail_event(&vrh->vring)); 564 return true; 565 } 566 } 567 568 /* They could have slipped one in as we were doing that: make 569 * sure it's written, then check again. */ 570 virtio_mb(vrh->weak_barriers); 571 572 if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) { 573 vringh_bad("Failed to check avail idx at %p", 574 &vrh->vring.avail->idx); 575 return true; 576 } 577 578 /* This is unlikely, so we just leave notifications enabled 579 * (if we're using event_indices, we'll only get one 580 * notification anyway). */ 581 return avail == vrh->last_avail_idx; 582 } 583 584 static inline void __vringh_notify_disable(struct vringh *vrh, 585 int (*putu16)(const struct vringh *vrh, 586 __virtio16 *p, u16 val)) 587 { 588 if (!vrh->event_indices) { 589 /* Old-school; update flags. */ 590 if (putu16(vrh, &vrh->vring.used->flags, 591 VRING_USED_F_NO_NOTIFY)) { 592 vringh_bad("Setting used flags %p", 593 &vrh->vring.used->flags); 594 } 595 } 596 } 597 598 /* Userspace access helpers: in this case, addresses are really userspace. */ 599 static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p) 600 { 601 __virtio16 v = 0; 602 int rc = get_user(v, (__force __virtio16 __user *)p); 603 *val = vringh16_to_cpu(vrh, v); 604 return rc; 605 } 606 607 static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val) 608 { 609 __virtio16 v = cpu_to_vringh16(vrh, val); 610 return put_user(v, (__force __virtio16 __user *)p); 611 } 612 613 static inline int copydesc_user(const struct vringh *vrh, 614 void *dst, const void *src, size_t len) 615 { 616 return copy_from_user(dst, (__force void __user *)src, len) ? 617 -EFAULT : 0; 618 } 619 620 static inline int putused_user(const struct vringh *vrh, 621 struct vring_used_elem *dst, 622 const struct vring_used_elem *src, 623 unsigned int num) 624 { 625 return copy_to_user((__force void __user *)dst, src, 626 sizeof(*dst) * num) ? -EFAULT : 0; 627 } 628 629 static inline int xfer_from_user(const struct vringh *vrh, void *src, 630 void *dst, size_t len) 631 { 632 return copy_from_user(dst, (__force void __user *)src, len) ? 633 -EFAULT : 0; 634 } 635 636 static inline int xfer_to_user(const struct vringh *vrh, 637 void *dst, void *src, size_t len) 638 { 639 return copy_to_user((__force void __user *)dst, src, len) ? 640 -EFAULT : 0; 641 } 642 643 /** 644 * vringh_init_user - initialize a vringh for a userspace vring. 645 * @vrh: the vringh to initialize. 646 * @features: the feature bits for this ring. 647 * @num: the number of elements. 648 * @weak_barriers: true if we only need memory barriers, not I/O. 649 * @desc: the userspace descriptor pointer. 650 * @avail: the userspace avail pointer. 651 * @used: the userspace used pointer. 652 * 653 * Returns an error if num is invalid: you should check pointers 654 * yourself! 655 */ 656 int vringh_init_user(struct vringh *vrh, u64 features, 657 unsigned int num, bool weak_barriers, 658 vring_desc_t __user *desc, 659 vring_avail_t __user *avail, 660 vring_used_t __user *used) 661 { 662 /* Sane power of 2 please! */ 663 if (!num || num > 0xffff || (num & (num - 1))) { 664 vringh_bad("Bad ring size %u", num); 665 return -EINVAL; 666 } 667 668 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 669 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 670 vrh->weak_barriers = weak_barriers; 671 vrh->completed = 0; 672 vrh->last_avail_idx = 0; 673 vrh->last_used_idx = 0; 674 vrh->vring.num = num; 675 /* vring expects kernel addresses, but only used via accessors. */ 676 vrh->vring.desc = (__force struct vring_desc *)desc; 677 vrh->vring.avail = (__force struct vring_avail *)avail; 678 vrh->vring.used = (__force struct vring_used *)used; 679 return 0; 680 } 681 EXPORT_SYMBOL(vringh_init_user); 682 683 /** 684 * vringh_getdesc_user - get next available descriptor from userspace ring. 685 * @vrh: the userspace vring. 686 * @riov: where to put the readable descriptors (or NULL) 687 * @wiov: where to put the writable descriptors (or NULL) 688 * @getrange: function to call to check ranges. 689 * @head: head index we received, for passing to vringh_complete_user(). 690 * 691 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 692 * 693 * Note that on error return, you can tell the difference between an 694 * invalid ring and a single invalid descriptor: in the former case, 695 * *head will be vrh->vring.num. You may be able to ignore an invalid 696 * descriptor, but there's not much you can do with an invalid ring. 697 * 698 * Note that you can reuse riov and wiov with subsequent calls. Content is 699 * overwritten and memory reallocated if more space is needed. 700 * When you don't have to use riov and wiov anymore, you should clean up them 701 * calling vringh_iov_cleanup() to release the memory, even on error! 702 */ 703 int vringh_getdesc_user(struct vringh *vrh, 704 struct vringh_iov *riov, 705 struct vringh_iov *wiov, 706 bool (*getrange)(struct vringh *vrh, 707 u64 addr, struct vringh_range *r), 708 u16 *head) 709 { 710 int err; 711 712 *head = vrh->vring.num; 713 err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx); 714 if (err < 0) 715 return err; 716 717 /* Empty... */ 718 if (err == vrh->vring.num) 719 return 0; 720 721 /* We need the layouts to be the identical for this to work */ 722 BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov)); 723 BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) != 724 offsetof(struct vringh_iov, iov)); 725 BUILD_BUG_ON(offsetof(struct vringh_kiov, i) != 726 offsetof(struct vringh_iov, i)); 727 BUILD_BUG_ON(offsetof(struct vringh_kiov, used) != 728 offsetof(struct vringh_iov, used)); 729 BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) != 730 offsetof(struct vringh_iov, max_num)); 731 BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec)); 732 BUILD_BUG_ON(offsetof(struct iovec, iov_base) != 733 offsetof(struct kvec, iov_base)); 734 BUILD_BUG_ON(offsetof(struct iovec, iov_len) != 735 offsetof(struct kvec, iov_len)); 736 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base) 737 != sizeof(((struct kvec *)NULL)->iov_base)); 738 BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len) 739 != sizeof(((struct kvec *)NULL)->iov_len)); 740 741 *head = err; 742 err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov, 743 (struct vringh_kiov *)wiov, 744 range_check, getrange, GFP_KERNEL, copydesc_user); 745 if (err) 746 return err; 747 748 return 1; 749 } 750 EXPORT_SYMBOL(vringh_getdesc_user); 751 752 /** 753 * vringh_iov_pull_user - copy bytes from vring_iov. 754 * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume) 755 * @dst: the place to copy. 756 * @len: the maximum length to copy. 757 * 758 * Returns the bytes copied <= len or a negative errno. 759 */ 760 ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len) 761 { 762 return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov, 763 dst, len, xfer_from_user); 764 } 765 EXPORT_SYMBOL(vringh_iov_pull_user); 766 767 /** 768 * vringh_iov_push_user - copy bytes into vring_iov. 769 * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume) 770 * @src: the place to copy from. 771 * @len: the maximum length to copy. 772 * 773 * Returns the bytes copied <= len or a negative errno. 774 */ 775 ssize_t vringh_iov_push_user(struct vringh_iov *wiov, 776 const void *src, size_t len) 777 { 778 return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov, 779 (void *)src, len, xfer_to_user); 780 } 781 EXPORT_SYMBOL(vringh_iov_push_user); 782 783 /** 784 * vringh_abandon_user - we've decided not to handle the descriptor(s). 785 * @vrh: the vring. 786 * @num: the number of descriptors to put back (ie. num 787 * vringh_get_user() to undo). 788 * 789 * The next vringh_get_user() will return the old descriptor(s) again. 790 */ 791 void vringh_abandon_user(struct vringh *vrh, unsigned int num) 792 { 793 /* We only update vring_avail_event(vr) when we want to be notified, 794 * so we haven't changed that yet. */ 795 vrh->last_avail_idx -= num; 796 } 797 EXPORT_SYMBOL(vringh_abandon_user); 798 799 /** 800 * vringh_complete_user - we've finished with descriptor, publish it. 801 * @vrh: the vring. 802 * @head: the head as filled in by vringh_getdesc_user. 803 * @len: the length of data we have written. 804 * 805 * You should check vringh_need_notify_user() after one or more calls 806 * to this function. 807 */ 808 int vringh_complete_user(struct vringh *vrh, u16 head, u32 len) 809 { 810 struct vring_used_elem used; 811 812 used.id = cpu_to_vringh32(vrh, head); 813 used.len = cpu_to_vringh32(vrh, len); 814 return __vringh_complete(vrh, &used, 1, putu16_user, putused_user); 815 } 816 EXPORT_SYMBOL(vringh_complete_user); 817 818 /** 819 * vringh_complete_multi_user - we've finished with many descriptors. 820 * @vrh: the vring. 821 * @used: the head, length pairs. 822 * @num_used: the number of used elements. 823 * 824 * You should check vringh_need_notify_user() after one or more calls 825 * to this function. 826 */ 827 int vringh_complete_multi_user(struct vringh *vrh, 828 const struct vring_used_elem used[], 829 unsigned num_used) 830 { 831 return __vringh_complete(vrh, used, num_used, 832 putu16_user, putused_user); 833 } 834 EXPORT_SYMBOL(vringh_complete_multi_user); 835 836 /** 837 * vringh_notify_enable_user - we want to know if something changes. 838 * @vrh: the vring. 839 * 840 * This always enables notifications, but returns false if there are 841 * now more buffers available in the vring. 842 */ 843 bool vringh_notify_enable_user(struct vringh *vrh) 844 { 845 return __vringh_notify_enable(vrh, getu16_user, putu16_user); 846 } 847 EXPORT_SYMBOL(vringh_notify_enable_user); 848 849 /** 850 * vringh_notify_disable_user - don't tell us if something changes. 851 * @vrh: the vring. 852 * 853 * This is our normal running state: we disable and then only enable when 854 * we're going to sleep. 855 */ 856 void vringh_notify_disable_user(struct vringh *vrh) 857 { 858 __vringh_notify_disable(vrh, putu16_user); 859 } 860 EXPORT_SYMBOL(vringh_notify_disable_user); 861 862 /** 863 * vringh_need_notify_user - must we tell the other side about used buffers? 864 * @vrh: the vring we've called vringh_complete_user() on. 865 * 866 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 867 */ 868 int vringh_need_notify_user(struct vringh *vrh) 869 { 870 return __vringh_need_notify(vrh, getu16_user); 871 } 872 EXPORT_SYMBOL(vringh_need_notify_user); 873 874 /* Kernelspace access helpers. */ 875 static inline int getu16_kern(const struct vringh *vrh, 876 u16 *val, const __virtio16 *p) 877 { 878 *val = vringh16_to_cpu(vrh, READ_ONCE(*p)); 879 return 0; 880 } 881 882 static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val) 883 { 884 WRITE_ONCE(*p, cpu_to_vringh16(vrh, val)); 885 return 0; 886 } 887 888 static inline int copydesc_kern(const struct vringh *vrh, 889 void *dst, const void *src, size_t len) 890 { 891 memcpy(dst, src, len); 892 return 0; 893 } 894 895 static inline int putused_kern(const struct vringh *vrh, 896 struct vring_used_elem *dst, 897 const struct vring_used_elem *src, 898 unsigned int num) 899 { 900 memcpy(dst, src, num * sizeof(*dst)); 901 return 0; 902 } 903 904 static inline int xfer_kern(const struct vringh *vrh, void *src, 905 void *dst, size_t len) 906 { 907 memcpy(dst, src, len); 908 return 0; 909 } 910 911 static inline int kern_xfer(const struct vringh *vrh, void *dst, 912 void *src, size_t len) 913 { 914 memcpy(dst, src, len); 915 return 0; 916 } 917 918 /** 919 * vringh_init_kern - initialize a vringh for a kernelspace vring. 920 * @vrh: the vringh to initialize. 921 * @features: the feature bits for this ring. 922 * @num: the number of elements. 923 * @weak_barriers: true if we only need memory barriers, not I/O. 924 * @desc: the userspace descriptor pointer. 925 * @avail: the userspace avail pointer. 926 * @used: the userspace used pointer. 927 * 928 * Returns an error if num is invalid. 929 */ 930 int vringh_init_kern(struct vringh *vrh, u64 features, 931 unsigned int num, bool weak_barriers, 932 struct vring_desc *desc, 933 struct vring_avail *avail, 934 struct vring_used *used) 935 { 936 /* Sane power of 2 please! */ 937 if (!num || num > 0xffff || (num & (num - 1))) { 938 vringh_bad("Bad ring size %u", num); 939 return -EINVAL; 940 } 941 942 vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1)); 943 vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX)); 944 vrh->weak_barriers = weak_barriers; 945 vrh->completed = 0; 946 vrh->last_avail_idx = 0; 947 vrh->last_used_idx = 0; 948 vrh->vring.num = num; 949 vrh->vring.desc = desc; 950 vrh->vring.avail = avail; 951 vrh->vring.used = used; 952 return 0; 953 } 954 EXPORT_SYMBOL(vringh_init_kern); 955 956 /** 957 * vringh_getdesc_kern - get next available descriptor from kernelspace ring. 958 * @vrh: the kernelspace vring. 959 * @riov: where to put the readable descriptors (or NULL) 960 * @wiov: where to put the writable descriptors (or NULL) 961 * @head: head index we received, for passing to vringh_complete_kern(). 962 * @gfp: flags for allocating larger riov/wiov. 963 * 964 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 965 * 966 * Note that on error return, you can tell the difference between an 967 * invalid ring and a single invalid descriptor: in the former case, 968 * *head will be vrh->vring.num. You may be able to ignore an invalid 969 * descriptor, but there's not much you can do with an invalid ring. 970 * 971 * Note that you can reuse riov and wiov with subsequent calls. Content is 972 * overwritten and memory reallocated if more space is needed. 973 * When you don't have to use riov and wiov anymore, you should clean up them 974 * calling vringh_kiov_cleanup() to release the memory, even on error! 975 */ 976 int vringh_getdesc_kern(struct vringh *vrh, 977 struct vringh_kiov *riov, 978 struct vringh_kiov *wiov, 979 u16 *head, 980 gfp_t gfp) 981 { 982 int err; 983 984 err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx); 985 if (err < 0) 986 return err; 987 988 /* Empty... */ 989 if (err == vrh->vring.num) 990 return 0; 991 992 *head = err; 993 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 994 gfp, copydesc_kern); 995 if (err) 996 return err; 997 998 return 1; 999 } 1000 EXPORT_SYMBOL(vringh_getdesc_kern); 1001 1002 /** 1003 * vringh_iov_pull_kern - copy bytes from vring_iov. 1004 * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume) 1005 * @dst: the place to copy. 1006 * @len: the maximum length to copy. 1007 * 1008 * Returns the bytes copied <= len or a negative errno. 1009 */ 1010 ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len) 1011 { 1012 return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern); 1013 } 1014 EXPORT_SYMBOL(vringh_iov_pull_kern); 1015 1016 /** 1017 * vringh_iov_push_kern - copy bytes into vring_iov. 1018 * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume) 1019 * @src: the place to copy from. 1020 * @len: the maximum length to copy. 1021 * 1022 * Returns the bytes copied <= len or a negative errno. 1023 */ 1024 ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov, 1025 const void *src, size_t len) 1026 { 1027 return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer); 1028 } 1029 EXPORT_SYMBOL(vringh_iov_push_kern); 1030 1031 /** 1032 * vringh_abandon_kern - we've decided not to handle the descriptor(s). 1033 * @vrh: the vring. 1034 * @num: the number of descriptors to put back (ie. num 1035 * vringh_get_kern() to undo). 1036 * 1037 * The next vringh_get_kern() will return the old descriptor(s) again. 1038 */ 1039 void vringh_abandon_kern(struct vringh *vrh, unsigned int num) 1040 { 1041 /* We only update vring_avail_event(vr) when we want to be notified, 1042 * so we haven't changed that yet. */ 1043 vrh->last_avail_idx -= num; 1044 } 1045 EXPORT_SYMBOL(vringh_abandon_kern); 1046 1047 /** 1048 * vringh_complete_kern - we've finished with descriptor, publish it. 1049 * @vrh: the vring. 1050 * @head: the head as filled in by vringh_getdesc_kern. 1051 * @len: the length of data we have written. 1052 * 1053 * You should check vringh_need_notify_kern() after one or more calls 1054 * to this function. 1055 */ 1056 int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len) 1057 { 1058 struct vring_used_elem used; 1059 1060 used.id = cpu_to_vringh32(vrh, head); 1061 used.len = cpu_to_vringh32(vrh, len); 1062 1063 return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern); 1064 } 1065 EXPORT_SYMBOL(vringh_complete_kern); 1066 1067 /** 1068 * vringh_notify_enable_kern - we want to know if something changes. 1069 * @vrh: the vring. 1070 * 1071 * This always enables notifications, but returns false if there are 1072 * now more buffers available in the vring. 1073 */ 1074 bool vringh_notify_enable_kern(struct vringh *vrh) 1075 { 1076 return __vringh_notify_enable(vrh, getu16_kern, putu16_kern); 1077 } 1078 EXPORT_SYMBOL(vringh_notify_enable_kern); 1079 1080 /** 1081 * vringh_notify_disable_kern - don't tell us if something changes. 1082 * @vrh: the vring. 1083 * 1084 * This is our normal running state: we disable and then only enable when 1085 * we're going to sleep. 1086 */ 1087 void vringh_notify_disable_kern(struct vringh *vrh) 1088 { 1089 __vringh_notify_disable(vrh, putu16_kern); 1090 } 1091 EXPORT_SYMBOL(vringh_notify_disable_kern); 1092 1093 /** 1094 * vringh_need_notify_kern - must we tell the other side about used buffers? 1095 * @vrh: the vring we've called vringh_complete_kern() on. 1096 * 1097 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1098 */ 1099 int vringh_need_notify_kern(struct vringh *vrh) 1100 { 1101 return __vringh_need_notify(vrh, getu16_kern); 1102 } 1103 EXPORT_SYMBOL(vringh_need_notify_kern); 1104 1105 #if IS_REACHABLE(CONFIG_VHOST_IOTLB) 1106 1107 struct iotlb_vec { 1108 union { 1109 struct iovec *iovec; 1110 struct bio_vec *bvec; 1111 } iov; 1112 size_t count; 1113 }; 1114 1115 static int iotlb_translate(const struct vringh *vrh, 1116 u64 addr, u64 len, u64 *translated, 1117 struct iotlb_vec *ivec, u32 perm) 1118 { 1119 struct vhost_iotlb_map *map; 1120 struct vhost_iotlb *iotlb = vrh->iotlb; 1121 int ret = 0; 1122 u64 s = 0, last = addr + len - 1; 1123 1124 spin_lock(vrh->iotlb_lock); 1125 1126 while (len > s) { 1127 uintptr_t io_addr; 1128 size_t io_len; 1129 u64 size; 1130 1131 if (unlikely(ret >= ivec->count)) { 1132 ret = -ENOBUFS; 1133 break; 1134 } 1135 1136 map = vhost_iotlb_itree_first(iotlb, addr, last); 1137 if (!map || map->start > addr) { 1138 ret = -EINVAL; 1139 break; 1140 } else if (!(map->perm & perm)) { 1141 ret = -EPERM; 1142 break; 1143 } 1144 1145 size = map->size - addr + map->start; 1146 io_len = min(len - s, size); 1147 io_addr = map->addr - map->start + addr; 1148 1149 if (vrh->use_va) { 1150 struct iovec *iovec = ivec->iov.iovec; 1151 1152 iovec[ret].iov_len = io_len; 1153 iovec[ret].iov_base = (void __user *)io_addr; 1154 } else { 1155 u64 pfn = io_addr >> PAGE_SHIFT; 1156 struct bio_vec *bvec = ivec->iov.bvec; 1157 1158 bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len, 1159 io_addr & (PAGE_SIZE - 1)); 1160 } 1161 1162 s += size; 1163 addr += size; 1164 ++ret; 1165 } 1166 1167 spin_unlock(vrh->iotlb_lock); 1168 1169 if (translated) 1170 *translated = min(len, s); 1171 1172 return ret; 1173 } 1174 1175 #define IOTLB_IOV_STRIDE 16 1176 1177 static inline int copy_from_iotlb(const struct vringh *vrh, void *dst, 1178 void *src, size_t len) 1179 { 1180 struct iotlb_vec ivec; 1181 union { 1182 struct iovec iovec[IOTLB_IOV_STRIDE]; 1183 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1184 } iov; 1185 u64 total_translated = 0; 1186 1187 ivec.iov.iovec = iov.iovec; 1188 ivec.count = IOTLB_IOV_STRIDE; 1189 1190 while (total_translated < len) { 1191 struct iov_iter iter; 1192 u64 translated; 1193 int ret; 1194 1195 ret = iotlb_translate(vrh, (u64)(uintptr_t)src, 1196 len - total_translated, &translated, 1197 &ivec, VHOST_MAP_RO); 1198 if (ret == -ENOBUFS) 1199 ret = IOTLB_IOV_STRIDE; 1200 else if (ret < 0) 1201 return ret; 1202 1203 if (vrh->use_va) { 1204 iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret, 1205 translated); 1206 } else { 1207 iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret, 1208 translated); 1209 } 1210 1211 ret = copy_from_iter(dst, translated, &iter); 1212 if (ret < 0) 1213 return ret; 1214 1215 src += translated; 1216 dst += translated; 1217 total_translated += translated; 1218 } 1219 1220 return total_translated; 1221 } 1222 1223 static inline int copy_to_iotlb(const struct vringh *vrh, void *dst, 1224 void *src, size_t len) 1225 { 1226 struct iotlb_vec ivec; 1227 union { 1228 struct iovec iovec[IOTLB_IOV_STRIDE]; 1229 struct bio_vec bvec[IOTLB_IOV_STRIDE]; 1230 } iov; 1231 u64 total_translated = 0; 1232 1233 ivec.iov.iovec = iov.iovec; 1234 ivec.count = IOTLB_IOV_STRIDE; 1235 1236 while (total_translated < len) { 1237 struct iov_iter iter; 1238 u64 translated; 1239 int ret; 1240 1241 ret = iotlb_translate(vrh, (u64)(uintptr_t)dst, 1242 len - total_translated, &translated, 1243 &ivec, VHOST_MAP_WO); 1244 if (ret == -ENOBUFS) 1245 ret = IOTLB_IOV_STRIDE; 1246 else if (ret < 0) 1247 return ret; 1248 1249 if (vrh->use_va) { 1250 iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret, 1251 translated); 1252 } else { 1253 iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret, 1254 translated); 1255 } 1256 1257 ret = copy_to_iter(src, translated, &iter); 1258 if (ret < 0) 1259 return ret; 1260 1261 src += translated; 1262 dst += translated; 1263 total_translated += translated; 1264 } 1265 1266 return total_translated; 1267 } 1268 1269 static inline int getu16_iotlb(const struct vringh *vrh, 1270 u16 *val, const __virtio16 *p) 1271 { 1272 struct iotlb_vec ivec; 1273 union { 1274 struct iovec iovec[1]; 1275 struct bio_vec bvec[1]; 1276 } iov; 1277 __virtio16 tmp; 1278 int ret; 1279 1280 ivec.iov.iovec = iov.iovec; 1281 ivec.count = 1; 1282 1283 /* Atomic read is needed for getu16 */ 1284 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1285 NULL, &ivec, VHOST_MAP_RO); 1286 if (ret < 0) 1287 return ret; 1288 1289 if (vrh->use_va) { 1290 ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1291 if (ret) 1292 return ret; 1293 } else { 1294 void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1295 void *from = kaddr + ivec.iov.bvec[0].bv_offset; 1296 1297 tmp = READ_ONCE(*(__virtio16 *)from); 1298 kunmap_local(kaddr); 1299 } 1300 1301 *val = vringh16_to_cpu(vrh, tmp); 1302 1303 return 0; 1304 } 1305 1306 static inline int putu16_iotlb(const struct vringh *vrh, 1307 __virtio16 *p, u16 val) 1308 { 1309 struct iotlb_vec ivec; 1310 union { 1311 struct iovec iovec; 1312 struct bio_vec bvec; 1313 } iov; 1314 __virtio16 tmp; 1315 int ret; 1316 1317 ivec.iov.iovec = &iov.iovec; 1318 ivec.count = 1; 1319 1320 /* Atomic write is needed for putu16 */ 1321 ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p), 1322 NULL, &ivec, VHOST_MAP_RO); 1323 if (ret < 0) 1324 return ret; 1325 1326 tmp = cpu_to_vringh16(vrh, val); 1327 1328 if (vrh->use_va) { 1329 ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base); 1330 if (ret) 1331 return ret; 1332 } else { 1333 void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page); 1334 void *to = kaddr + ivec.iov.bvec[0].bv_offset; 1335 1336 WRITE_ONCE(*(__virtio16 *)to, tmp); 1337 kunmap_local(kaddr); 1338 } 1339 1340 return 0; 1341 } 1342 1343 static inline int copydesc_iotlb(const struct vringh *vrh, 1344 void *dst, const void *src, size_t len) 1345 { 1346 int ret; 1347 1348 ret = copy_from_iotlb(vrh, dst, (void *)src, len); 1349 if (ret != len) 1350 return -EFAULT; 1351 1352 return 0; 1353 } 1354 1355 static inline int xfer_from_iotlb(const struct vringh *vrh, void *src, 1356 void *dst, size_t len) 1357 { 1358 int ret; 1359 1360 ret = copy_from_iotlb(vrh, dst, src, len); 1361 if (ret != len) 1362 return -EFAULT; 1363 1364 return 0; 1365 } 1366 1367 static inline int xfer_to_iotlb(const struct vringh *vrh, 1368 void *dst, void *src, size_t len) 1369 { 1370 int ret; 1371 1372 ret = copy_to_iotlb(vrh, dst, src, len); 1373 if (ret != len) 1374 return -EFAULT; 1375 1376 return 0; 1377 } 1378 1379 static inline int putused_iotlb(const struct vringh *vrh, 1380 struct vring_used_elem *dst, 1381 const struct vring_used_elem *src, 1382 unsigned int num) 1383 { 1384 int size = num * sizeof(*dst); 1385 int ret; 1386 1387 ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst)); 1388 if (ret != size) 1389 return -EFAULT; 1390 1391 return 0; 1392 } 1393 1394 /** 1395 * vringh_init_iotlb - initialize a vringh for a ring with IOTLB. 1396 * @vrh: the vringh to initialize. 1397 * @features: the feature bits for this ring. 1398 * @num: the number of elements. 1399 * @weak_barriers: true if we only need memory barriers, not I/O. 1400 * @desc: the userspace descriptor pointer. 1401 * @avail: the userspace avail pointer. 1402 * @used: the userspace used pointer. 1403 * 1404 * Returns an error if num is invalid. 1405 */ 1406 int vringh_init_iotlb(struct vringh *vrh, u64 features, 1407 unsigned int num, bool weak_barriers, 1408 struct vring_desc *desc, 1409 struct vring_avail *avail, 1410 struct vring_used *used) 1411 { 1412 vrh->use_va = false; 1413 1414 return vringh_init_kern(vrh, features, num, weak_barriers, 1415 desc, avail, used); 1416 } 1417 EXPORT_SYMBOL(vringh_init_iotlb); 1418 1419 /** 1420 * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing 1421 * user VA. 1422 * @vrh: the vringh to initialize. 1423 * @features: the feature bits for this ring. 1424 * @num: the number of elements. 1425 * @weak_barriers: true if we only need memory barriers, not I/O. 1426 * @desc: the userspace descriptor pointer. 1427 * @avail: the userspace avail pointer. 1428 * @used: the userspace used pointer. 1429 * 1430 * Returns an error if num is invalid. 1431 */ 1432 int vringh_init_iotlb_va(struct vringh *vrh, u64 features, 1433 unsigned int num, bool weak_barriers, 1434 struct vring_desc *desc, 1435 struct vring_avail *avail, 1436 struct vring_used *used) 1437 { 1438 vrh->use_va = true; 1439 1440 return vringh_init_kern(vrh, features, num, weak_barriers, 1441 desc, avail, used); 1442 } 1443 EXPORT_SYMBOL(vringh_init_iotlb_va); 1444 1445 /** 1446 * vringh_set_iotlb - initialize a vringh for a ring with IOTLB. 1447 * @vrh: the vring 1448 * @iotlb: iotlb associated with this vring 1449 * @iotlb_lock: spinlock to synchronize the iotlb accesses 1450 */ 1451 void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb, 1452 spinlock_t *iotlb_lock) 1453 { 1454 vrh->iotlb = iotlb; 1455 vrh->iotlb_lock = iotlb_lock; 1456 } 1457 EXPORT_SYMBOL(vringh_set_iotlb); 1458 1459 /** 1460 * vringh_getdesc_iotlb - get next available descriptor from ring with 1461 * IOTLB. 1462 * @vrh: the kernelspace vring. 1463 * @riov: where to put the readable descriptors (or NULL) 1464 * @wiov: where to put the writable descriptors (or NULL) 1465 * @head: head index we received, for passing to vringh_complete_iotlb(). 1466 * @gfp: flags for allocating larger riov/wiov. 1467 * 1468 * Returns 0 if there was no descriptor, 1 if there was, or -errno. 1469 * 1470 * Note that on error return, you can tell the difference between an 1471 * invalid ring and a single invalid descriptor: in the former case, 1472 * *head will be vrh->vring.num. You may be able to ignore an invalid 1473 * descriptor, but there's not much you can do with an invalid ring. 1474 * 1475 * Note that you can reuse riov and wiov with subsequent calls. Content is 1476 * overwritten and memory reallocated if more space is needed. 1477 * When you don't have to use riov and wiov anymore, you should clean up them 1478 * calling vringh_kiov_cleanup() to release the memory, even on error! 1479 */ 1480 int vringh_getdesc_iotlb(struct vringh *vrh, 1481 struct vringh_kiov *riov, 1482 struct vringh_kiov *wiov, 1483 u16 *head, 1484 gfp_t gfp) 1485 { 1486 int err; 1487 1488 err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx); 1489 if (err < 0) 1490 return err; 1491 1492 /* Empty... */ 1493 if (err == vrh->vring.num) 1494 return 0; 1495 1496 *head = err; 1497 err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL, 1498 gfp, copydesc_iotlb); 1499 if (err) 1500 return err; 1501 1502 return 1; 1503 } 1504 EXPORT_SYMBOL(vringh_getdesc_iotlb); 1505 1506 /** 1507 * vringh_iov_pull_iotlb - copy bytes from vring_iov. 1508 * @vrh: the vring. 1509 * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume) 1510 * @dst: the place to copy. 1511 * @len: the maximum length to copy. 1512 * 1513 * Returns the bytes copied <= len or a negative errno. 1514 */ 1515 ssize_t vringh_iov_pull_iotlb(struct vringh *vrh, 1516 struct vringh_kiov *riov, 1517 void *dst, size_t len) 1518 { 1519 return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb); 1520 } 1521 EXPORT_SYMBOL(vringh_iov_pull_iotlb); 1522 1523 /** 1524 * vringh_iov_push_iotlb - copy bytes into vring_iov. 1525 * @vrh: the vring. 1526 * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume) 1527 * @src: the place to copy from. 1528 * @len: the maximum length to copy. 1529 * 1530 * Returns the bytes copied <= len or a negative errno. 1531 */ 1532 ssize_t vringh_iov_push_iotlb(struct vringh *vrh, 1533 struct vringh_kiov *wiov, 1534 const void *src, size_t len) 1535 { 1536 return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb); 1537 } 1538 EXPORT_SYMBOL(vringh_iov_push_iotlb); 1539 1540 /** 1541 * vringh_abandon_iotlb - we've decided not to handle the descriptor(s). 1542 * @vrh: the vring. 1543 * @num: the number of descriptors to put back (ie. num 1544 * vringh_get_iotlb() to undo). 1545 * 1546 * The next vringh_get_iotlb() will return the old descriptor(s) again. 1547 */ 1548 void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num) 1549 { 1550 /* We only update vring_avail_event(vr) when we want to be notified, 1551 * so we haven't changed that yet. 1552 */ 1553 vrh->last_avail_idx -= num; 1554 } 1555 EXPORT_SYMBOL(vringh_abandon_iotlb); 1556 1557 /** 1558 * vringh_complete_iotlb - we've finished with descriptor, publish it. 1559 * @vrh: the vring. 1560 * @head: the head as filled in by vringh_getdesc_iotlb. 1561 * @len: the length of data we have written. 1562 * 1563 * You should check vringh_need_notify_iotlb() after one or more calls 1564 * to this function. 1565 */ 1566 int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len) 1567 { 1568 struct vring_used_elem used; 1569 1570 used.id = cpu_to_vringh32(vrh, head); 1571 used.len = cpu_to_vringh32(vrh, len); 1572 1573 return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb); 1574 } 1575 EXPORT_SYMBOL(vringh_complete_iotlb); 1576 1577 /** 1578 * vringh_notify_enable_iotlb - we want to know if something changes. 1579 * @vrh: the vring. 1580 * 1581 * This always enables notifications, but returns false if there are 1582 * now more buffers available in the vring. 1583 */ 1584 bool vringh_notify_enable_iotlb(struct vringh *vrh) 1585 { 1586 return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb); 1587 } 1588 EXPORT_SYMBOL(vringh_notify_enable_iotlb); 1589 1590 /** 1591 * vringh_notify_disable_iotlb - don't tell us if something changes. 1592 * @vrh: the vring. 1593 * 1594 * This is our normal running state: we disable and then only enable when 1595 * we're going to sleep. 1596 */ 1597 void vringh_notify_disable_iotlb(struct vringh *vrh) 1598 { 1599 __vringh_notify_disable(vrh, putu16_iotlb); 1600 } 1601 EXPORT_SYMBOL(vringh_notify_disable_iotlb); 1602 1603 /** 1604 * vringh_need_notify_iotlb - must we tell the other side about used buffers? 1605 * @vrh: the vring we've called vringh_complete_iotlb() on. 1606 * 1607 * Returns -errno or 0 if we don't need to tell the other side, 1 if we do. 1608 */ 1609 int vringh_need_notify_iotlb(struct vringh *vrh) 1610 { 1611 return __vringh_need_notify(vrh, getu16_iotlb); 1612 } 1613 EXPORT_SYMBOL(vringh_need_notify_iotlb); 1614 1615 #endif 1616 1617 MODULE_LICENSE("GPL"); 1618