1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * channel program interfaces 4 * 5 * Copyright IBM Corp. 2017 6 * 7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> 8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/ratelimit.h> 12 #include <linux/mm.h> 13 #include <linux/slab.h> 14 #include <linux/highmem.h> 15 #include <linux/iommu.h> 16 #include <linux/vfio.h> 17 #include <asm/idals.h> 18 19 #include "vfio_ccw_cp.h" 20 #include "vfio_ccw_private.h" 21 22 struct page_array { 23 /* Array that stores pages need to pin. */ 24 dma_addr_t *pa_iova; 25 /* Array that receives the pinned pages. */ 26 struct page **pa_page; 27 /* Number of pages pinned from @pa_iova. */ 28 int pa_nr; 29 }; 30 31 struct ccwchain { 32 struct list_head next; 33 struct ccw1 *ch_ccw; 34 /* Guest physical address of the current chain. */ 35 u64 ch_iova; 36 /* Count of the valid ccws in chain. */ 37 int ch_len; 38 /* Pinned PAGEs for the original data. */ 39 struct page_array *ch_pa; 40 }; 41 42 /* 43 * page_array_alloc() - alloc memory for page array 44 * @pa: page_array on which to perform the operation 45 * @len: number of pages that should be pinned from @iova 46 * 47 * Attempt to allocate memory for page array. 48 * 49 * Usage of page_array: 50 * We expect (pa_nr == 0) and (pa_iova == NULL), any field in 51 * this structure will be filled in by this function. 52 * 53 * Returns: 54 * 0 if page array is allocated 55 * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL 56 * -ENOMEM if alloc failed 57 */ 58 static int page_array_alloc(struct page_array *pa, unsigned int len) 59 { 60 if (pa->pa_nr || pa->pa_iova) 61 return -EINVAL; 62 63 if (len == 0) 64 return -EINVAL; 65 66 pa->pa_nr = len; 67 68 pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL); 69 if (!pa->pa_iova) 70 return -ENOMEM; 71 72 pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL); 73 if (!pa->pa_page) { 74 kfree(pa->pa_iova); 75 return -ENOMEM; 76 } 77 78 return 0; 79 } 80 81 /* 82 * page_array_unpin() - Unpin user pages in memory 83 * @pa: page_array on which to perform the operation 84 * @vdev: the vfio device to perform the operation 85 * @pa_nr: number of user pages to unpin 86 * @unaligned: were pages unaligned on the pin request 87 * 88 * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, 89 * otherwise only clear pa->pa_nr 90 */ 91 static void page_array_unpin(struct page_array *pa, 92 struct vfio_device *vdev, int pa_nr, bool unaligned) 93 { 94 int unpinned = 0, npage = 1; 95 96 while (unpinned < pa_nr) { 97 dma_addr_t *first = &pa->pa_iova[unpinned]; 98 dma_addr_t *last = &first[npage]; 99 100 if (unpinned + npage < pa_nr && 101 *first + npage * PAGE_SIZE == *last && 102 !unaligned) { 103 npage++; 104 continue; 105 } 106 107 vfio_unpin_pages(vdev, *first, npage); 108 unpinned += npage; 109 npage = 1; 110 } 111 112 pa->pa_nr = 0; 113 } 114 115 /* 116 * page_array_pin() - Pin user pages in memory 117 * @pa: page_array on which to perform the operation 118 * @vdev: the vfio device to perform pin operations 119 * @unaligned: are pages aligned to 4K boundary? 120 * 121 * Returns number of pages pinned upon success. 122 * If the pin request partially succeeds, or fails completely, 123 * all pages are left unpinned and a negative error value is returned. 124 * 125 * Requests to pin "aligned" pages can be coalesced into a single 126 * vfio_pin_pages request for the sake of efficiency, based on the 127 * expectation of 4K page requests. Unaligned requests are probably 128 * dealing with 2K "pages", and cannot be coalesced without 129 * reworking this logic to incorporate that math. 130 */ 131 static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned) 132 { 133 int pinned = 0, npage = 1; 134 int ret = 0; 135 136 while (pinned < pa->pa_nr) { 137 dma_addr_t *first = &pa->pa_iova[pinned]; 138 dma_addr_t *last = &first[npage]; 139 140 if (pinned + npage < pa->pa_nr && 141 *first + npage * PAGE_SIZE == *last && 142 !unaligned) { 143 npage++; 144 continue; 145 } 146 147 ret = vfio_pin_pages(vdev, *first, npage, 148 IOMMU_READ | IOMMU_WRITE, 149 &pa->pa_page[pinned]); 150 if (ret < 0) { 151 goto err_out; 152 } else if (ret > 0 && ret != npage) { 153 pinned += ret; 154 ret = -EINVAL; 155 goto err_out; 156 } 157 pinned += npage; 158 npage = 1; 159 } 160 161 return ret; 162 163 err_out: 164 page_array_unpin(pa, vdev, pinned, unaligned); 165 return ret; 166 } 167 168 /* Unpin the pages before releasing the memory. */ 169 static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned) 170 { 171 page_array_unpin(pa, vdev, pa->pa_nr, unaligned); 172 kfree(pa->pa_page); 173 kfree(pa->pa_iova); 174 } 175 176 static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) 177 { 178 u64 iova_pfn_start = iova >> PAGE_SHIFT; 179 u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; 180 u64 pfn; 181 int i; 182 183 for (i = 0; i < pa->pa_nr; i++) { 184 pfn = pa->pa_iova[i] >> PAGE_SHIFT; 185 if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) 186 return true; 187 } 188 189 return false; 190 } 191 /* Create the list of IDAL words for a page_array. */ 192 static inline void page_array_idal_create_words(struct page_array *pa, 193 dma64_t *idaws) 194 { 195 int i; 196 197 /* 198 * Idal words (execept the first one) rely on the memory being 4k 199 * aligned. If a user virtual address is 4K aligned, then it's 200 * corresponding kernel physical address will also be 4K aligned. Thus 201 * there will be no problem here to simply use the phys to create an 202 * idaw. 203 */ 204 205 for (i = 0; i < pa->pa_nr; i++) { 206 idaws[i] = virt_to_dma64(page_to_virt(pa->pa_page[i])); 207 208 /* Incorporate any offset from each starting address */ 209 idaws[i] = dma64_add(idaws[i], pa->pa_iova[i] & ~PAGE_MASK); 210 } 211 } 212 213 static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) 214 { 215 struct ccw0 ccw0; 216 struct ccw1 *pccw1 = source; 217 int i; 218 219 for (i = 0; i < len; i++) { 220 ccw0 = *(struct ccw0 *)pccw1; 221 if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) { 222 pccw1->cmd_code = CCW_CMD_TIC; 223 pccw1->flags = 0; 224 pccw1->count = 0; 225 } else { 226 pccw1->cmd_code = ccw0.cmd_code; 227 pccw1->flags = ccw0.flags; 228 pccw1->count = ccw0.count; 229 } 230 pccw1->cda = u32_to_dma32(ccw0.cda); 231 pccw1++; 232 } 233 } 234 235 #define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k) 236 237 /* 238 * Helpers to operate ccwchain. 239 */ 240 #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02) 241 #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) 242 #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) 243 244 #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) 245 246 #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) 247 248 #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) 249 #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP) 250 251 #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) 252 253 /* 254 * ccw_does_data_transfer() 255 * 256 * Determine whether a CCW will move any data, such that the guest pages 257 * would need to be pinned before performing the I/O. 258 * 259 * Returns 1 if yes, 0 if no. 260 */ 261 static inline int ccw_does_data_transfer(struct ccw1 *ccw) 262 { 263 /* If the count field is zero, then no data will be transferred */ 264 if (ccw->count == 0) 265 return 0; 266 267 /* If the command is a NOP, then no data will be transferred */ 268 if (ccw_is_noop(ccw)) 269 return 0; 270 271 /* If the skip flag is off, then data will be transferred */ 272 if (!ccw_is_skip(ccw)) 273 return 1; 274 275 /* 276 * If the skip flag is on, it is only meaningful if the command 277 * code is a read, read backward, sense, or sense ID. In those 278 * cases, no data will be transferred. 279 */ 280 if (ccw_is_read(ccw) || ccw_is_read_backward(ccw)) 281 return 0; 282 283 if (ccw_is_sense(ccw)) 284 return 0; 285 286 /* The skip flag is on, but it is ignored for this command code. */ 287 return 1; 288 } 289 290 /* 291 * is_cpa_within_range() 292 * 293 * @cpa: channel program address being questioned 294 * @head: address of the beginning of a CCW chain 295 * @len: number of CCWs within the chain 296 * 297 * Determine whether the address of a CCW (whether a new chain, 298 * or the target of a TIC) falls within a range (including the end points). 299 * 300 * Returns 1 if yes, 0 if no. 301 */ 302 static inline int is_cpa_within_range(dma32_t cpa, u32 head, int len) 303 { 304 u32 tail = head + (len - 1) * sizeof(struct ccw1); 305 u32 gcpa = dma32_to_u32(cpa); 306 307 return head <= gcpa && gcpa <= tail; 308 } 309 310 static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len) 311 { 312 if (!ccw_is_tic(ccw)) 313 return 0; 314 315 return is_cpa_within_range(ccw->cda, head, len); 316 } 317 318 static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) 319 { 320 struct ccwchain *chain; 321 322 chain = kzalloc(sizeof(*chain), GFP_KERNEL); 323 if (!chain) 324 return NULL; 325 326 chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL); 327 if (!chain->ch_ccw) 328 goto out_err; 329 330 chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL); 331 if (!chain->ch_pa) 332 goto out_err; 333 334 list_add_tail(&chain->next, &cp->ccwchain_list); 335 336 return chain; 337 338 out_err: 339 kfree(chain->ch_ccw); 340 kfree(chain); 341 return NULL; 342 } 343 344 static void ccwchain_free(struct ccwchain *chain) 345 { 346 list_del(&chain->next); 347 kfree(chain->ch_pa); 348 kfree(chain->ch_ccw); 349 kfree(chain); 350 } 351 352 /* Free resource for a ccw that allocated memory for its cda. */ 353 static void ccwchain_cda_free(struct ccwchain *chain, int idx) 354 { 355 struct ccw1 *ccw = &chain->ch_ccw[idx]; 356 357 if (ccw_is_tic(ccw)) 358 return; 359 360 kfree(dma32_to_virt(ccw->cda)); 361 } 362 363 /** 364 * ccwchain_calc_length - calculate the length of the ccw chain. 365 * @iova: guest physical address of the target ccw chain 366 * @cp: channel_program on which to perform the operation 367 * 368 * This is the chain length not considering any TICs. 369 * You need to do a new round for each TIC target. 370 * 371 * The program is also validated for absence of not yet supported 372 * indirect data addressing scenarios. 373 * 374 * Returns: the length of the ccw chain or -errno. 375 */ 376 static int ccwchain_calc_length(u64 iova, struct channel_program *cp) 377 { 378 struct ccw1 *ccw = cp->guest_cp; 379 int cnt = 0; 380 381 do { 382 cnt++; 383 384 /* 385 * We want to keep counting if the current CCW has the 386 * command-chaining flag enabled, or if it is a TIC CCW 387 * that loops back into the current chain. The latter 388 * is used for device orientation, where the CCW PRIOR to 389 * the TIC can either jump to the TIC or a CCW immediately 390 * after the TIC, depending on the results of its operation. 391 */ 392 if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) 393 break; 394 395 ccw++; 396 } while (cnt < CCWCHAIN_LEN_MAX + 1); 397 398 if (cnt == CCWCHAIN_LEN_MAX + 1) 399 cnt = -EINVAL; 400 401 return cnt; 402 } 403 404 static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) 405 { 406 struct ccwchain *chain; 407 u32 ccw_head; 408 409 list_for_each_entry(chain, &cp->ccwchain_list, next) { 410 ccw_head = chain->ch_iova; 411 if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len)) 412 return 1; 413 } 414 415 return 0; 416 } 417 418 static int ccwchain_loop_tic(struct ccwchain *chain, 419 struct channel_program *cp); 420 421 static int ccwchain_handle_ccw(dma32_t cda, struct channel_program *cp) 422 { 423 struct vfio_device *vdev = 424 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 425 struct ccwchain *chain; 426 int len, ret; 427 u32 gcda; 428 429 gcda = dma32_to_u32(cda); 430 /* Copy 2K (the most we support today) of possible CCWs */ 431 ret = vfio_dma_rw(vdev, gcda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false); 432 if (ret) 433 return ret; 434 435 /* Convert any Format-0 CCWs to Format-1 */ 436 if (!cp->orb.cmd.fmt) 437 convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX); 438 439 /* Count the CCWs in the current chain */ 440 len = ccwchain_calc_length(gcda, cp); 441 if (len < 0) 442 return len; 443 444 /* Need alloc a new chain for this one. */ 445 chain = ccwchain_alloc(cp, len); 446 if (!chain) 447 return -ENOMEM; 448 449 chain->ch_len = len; 450 chain->ch_iova = gcda; 451 452 /* Copy the actual CCWs into the new chain */ 453 memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1)); 454 455 /* Loop for tics on this new chain. */ 456 ret = ccwchain_loop_tic(chain, cp); 457 458 if (ret) 459 ccwchain_free(chain); 460 461 return ret; 462 } 463 464 /* Loop for TICs. */ 465 static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp) 466 { 467 struct ccw1 *tic; 468 int i, ret; 469 470 for (i = 0; i < chain->ch_len; i++) { 471 tic = &chain->ch_ccw[i]; 472 473 if (!ccw_is_tic(tic)) 474 continue; 475 476 /* May transfer to an existing chain. */ 477 if (tic_target_chain_exists(tic, cp)) 478 continue; 479 480 /* Build a ccwchain for the next segment */ 481 ret = ccwchain_handle_ccw(tic->cda, cp); 482 if (ret) 483 return ret; 484 } 485 486 return 0; 487 } 488 489 static int ccwchain_fetch_tic(struct ccw1 *ccw, 490 struct channel_program *cp) 491 { 492 struct ccwchain *iter; 493 u32 offset, ccw_head; 494 495 list_for_each_entry(iter, &cp->ccwchain_list, next) { 496 ccw_head = iter->ch_iova; 497 if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { 498 /* Calculate offset of TIC target */ 499 offset = dma32_to_u32(ccw->cda) - ccw_head; 500 ccw->cda = virt_to_dma32((void *)iter->ch_ccw + offset); 501 return 0; 502 } 503 } 504 505 return -EFAULT; 506 } 507 508 static dma64_t *get_guest_idal(struct ccw1 *ccw, struct channel_program *cp, int idaw_nr) 509 { 510 struct vfio_device *vdev = 511 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 512 dma64_t *idaws; 513 dma32_t *idaws_f1; 514 int idal_len = idaw_nr * sizeof(*idaws); 515 int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE; 516 int idaw_mask = ~(idaw_size - 1); 517 int i, ret; 518 519 idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); 520 if (!idaws) 521 return ERR_PTR(-ENOMEM); 522 523 if (ccw_is_idal(ccw)) { 524 /* Copy IDAL from guest */ 525 ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), idaws, idal_len, false); 526 if (ret) { 527 kfree(idaws); 528 return ERR_PTR(ret); 529 } 530 } else { 531 /* Fabricate an IDAL based off CCW data address */ 532 if (cp->orb.cmd.c64) { 533 idaws[0] = u64_to_dma64(dma32_to_u32(ccw->cda)); 534 for (i = 1; i < idaw_nr; i++) { 535 idaws[i] = dma64_add(idaws[i - 1], idaw_size); 536 idaws[i] = dma64_and(idaws[i], idaw_mask); 537 } 538 } else { 539 idaws_f1 = (dma32_t *)idaws; 540 idaws_f1[0] = ccw->cda; 541 for (i = 1; i < idaw_nr; i++) { 542 idaws_f1[i] = dma32_add(idaws_f1[i - 1], idaw_size); 543 idaws_f1[i] = dma32_and(idaws_f1[i], idaw_mask); 544 } 545 } 546 } 547 548 return idaws; 549 } 550 551 /* 552 * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer 553 * a specified amount of data 554 * 555 * @ccw: The Channel Command Word being translated 556 * @cp: Channel Program being processed 557 * 558 * The ORB is examined, since it specifies what IDAWs could actually be 559 * used by any CCW in the channel program, regardless of whether or not 560 * the CCW actually does. An ORB that does not specify Format-2-IDAW 561 * Control could still contain a CCW with an IDAL, which would be 562 * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within 563 * the channel program must follow the same size requirements. 564 */ 565 static int ccw_count_idaws(struct ccw1 *ccw, 566 struct channel_program *cp) 567 { 568 struct vfio_device *vdev = 569 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 570 u64 iova; 571 int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32); 572 int ret; 573 int bytes = 1; 574 575 if (ccw->count) 576 bytes = ccw->count; 577 578 if (ccw_is_idal(ccw)) { 579 /* Read first IDAW to check its starting address. */ 580 /* All subsequent IDAWs will be 2K- or 4K-aligned. */ 581 ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), &iova, size, false); 582 if (ret) 583 return ret; 584 585 /* 586 * Format-1 IDAWs only occupy the first 32 bits, 587 * and bit 0 is always off. 588 */ 589 if (!cp->orb.cmd.c64) 590 iova = iova >> 32; 591 } else { 592 iova = dma32_to_u32(ccw->cda); 593 } 594 595 /* Format-1 IDAWs operate on 2K each */ 596 if (!cp->orb.cmd.c64) 597 return idal_2k_nr_words((void *)iova, bytes); 598 599 /* Using the 2K variant of Format-2 IDAWs? */ 600 if (cp->orb.cmd.i2k) 601 return idal_2k_nr_words((void *)iova, bytes); 602 603 /* The 'usual' case is 4K Format-2 IDAWs */ 604 return idal_nr_words((void *)iova, bytes); 605 } 606 607 static int ccwchain_fetch_ccw(struct ccw1 *ccw, 608 struct page_array *pa, 609 struct channel_program *cp) 610 { 611 struct vfio_device *vdev = 612 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 613 dma64_t *idaws; 614 dma32_t *idaws_f1; 615 int ret; 616 int idaw_nr; 617 int i; 618 619 /* Calculate size of IDAL */ 620 idaw_nr = ccw_count_idaws(ccw, cp); 621 if (idaw_nr < 0) 622 return idaw_nr; 623 624 /* Allocate an IDAL from host storage */ 625 idaws = get_guest_idal(ccw, cp, idaw_nr); 626 if (IS_ERR(idaws)) { 627 ret = PTR_ERR(idaws); 628 goto out_init; 629 } 630 631 /* 632 * Allocate an array of pages to pin/translate. 633 * The number of pages is actually the count of the idaws 634 * required for the data transfer, since we only only support 635 * 4K IDAWs today. 636 */ 637 ret = page_array_alloc(pa, idaw_nr); 638 if (ret < 0) 639 goto out_free_idaws; 640 641 /* 642 * Copy guest IDAWs into page_array, in case the memory they 643 * occupy is not contiguous. 644 */ 645 idaws_f1 = (dma32_t *)idaws; 646 for (i = 0; i < idaw_nr; i++) { 647 if (cp->orb.cmd.c64) 648 pa->pa_iova[i] = dma64_to_u64(idaws[i]); 649 else 650 pa->pa_iova[i] = dma32_to_u32(idaws_f1[i]); 651 } 652 653 if (ccw_does_data_transfer(ccw)) { 654 ret = page_array_pin(pa, vdev, idal_is_2k(cp)); 655 if (ret < 0) 656 goto out_unpin; 657 } else { 658 pa->pa_nr = 0; 659 } 660 661 ccw->cda = virt_to_dma32(idaws); 662 ccw->flags |= CCW_FLAG_IDA; 663 664 /* Populate the IDAL with pinned/translated addresses from page */ 665 page_array_idal_create_words(pa, idaws); 666 667 return 0; 668 669 out_unpin: 670 page_array_unpin_free(pa, vdev, idal_is_2k(cp)); 671 out_free_idaws: 672 kfree(idaws); 673 out_init: 674 ccw->cda = 0; 675 return ret; 676 } 677 678 /* 679 * Fetch one ccw. 680 * To reduce memory copy, we'll pin the cda page in memory, 681 * and to get rid of the cda 2G limitation of ccw1, we'll translate 682 * direct ccws to idal ccws. 683 */ 684 static int ccwchain_fetch_one(struct ccw1 *ccw, 685 struct page_array *pa, 686 struct channel_program *cp) 687 688 { 689 if (ccw_is_tic(ccw)) 690 return ccwchain_fetch_tic(ccw, cp); 691 692 return ccwchain_fetch_ccw(ccw, pa, cp); 693 } 694 695 /** 696 * cp_init() - allocate ccwchains for a channel program. 697 * @cp: channel_program on which to perform the operation 698 * @orb: control block for the channel program from the guest 699 * 700 * This creates one or more ccwchain(s), and copies the raw data of 701 * the target channel program from @orb->cmd.iova to the new ccwchain(s). 702 * 703 * Limitations: 704 * 1. Supports idal(c64) ccw chaining. 705 * 2. Supports 4k idaw. 706 * 707 * Returns: 708 * %0 on success and a negative error value on failure. 709 */ 710 int cp_init(struct channel_program *cp, union orb *orb) 711 { 712 struct vfio_device *vdev = 713 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 714 /* custom ratelimit used to avoid flood during guest IPL */ 715 static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); 716 int ret; 717 718 /* this is an error in the caller */ 719 if (cp->initialized) 720 return -EBUSY; 721 722 /* 723 * We only support prefetching the channel program. We assume all channel 724 * programs executed by supported guests likewise support prefetching. 725 * Executing a channel program that does not specify prefetching will 726 * typically not cause an error, but a warning is issued to help identify 727 * the problem if something does break. 728 */ 729 if (!orb->cmd.pfch && __ratelimit(&ratelimit_state)) 730 dev_warn( 731 vdev->dev, 732 "Prefetching channel program even though prefetch not specified in ORB"); 733 734 INIT_LIST_HEAD(&cp->ccwchain_list); 735 memcpy(&cp->orb, orb, sizeof(*orb)); 736 737 /* Build a ccwchain for the first CCW segment */ 738 ret = ccwchain_handle_ccw(orb->cmd.cpa, cp); 739 740 if (!ret) 741 cp->initialized = true; 742 743 return ret; 744 } 745 746 747 /** 748 * cp_free() - free resources for channel program. 749 * @cp: channel_program on which to perform the operation 750 * 751 * This unpins the memory pages and frees the memory space occupied by 752 * @cp, which must have been returned by a previous call to cp_init(). 753 * Otherwise, undefined behavior occurs. 754 */ 755 void cp_free(struct channel_program *cp) 756 { 757 struct vfio_device *vdev = 758 &container_of(cp, struct vfio_ccw_private, cp)->vdev; 759 struct ccwchain *chain, *temp; 760 int i; 761 762 if (!cp->initialized) 763 return; 764 765 cp->initialized = false; 766 list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { 767 for (i = 0; i < chain->ch_len; i++) { 768 page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp)); 769 ccwchain_cda_free(chain, i); 770 } 771 ccwchain_free(chain); 772 } 773 } 774 775 /** 776 * cp_prefetch() - translate a guest physical address channel program to 777 * a real-device runnable channel program. 778 * @cp: channel_program on which to perform the operation 779 * 780 * This function translates the guest-physical-address channel program 781 * and stores the result to ccwchain list. @cp must have been 782 * initialized by a previous call with cp_init(). Otherwise, undefined 783 * behavior occurs. 784 * For each chain composing the channel program: 785 * - On entry ch_len holds the count of CCWs to be translated. 786 * - On exit ch_len is adjusted to the count of successfully translated CCWs. 787 * This allows cp_free to find in ch_len the count of CCWs to free in a chain. 788 * 789 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced 790 * as helpers to do ccw chain translation inside the kernel. Basically 791 * they accept a channel program issued by a virtual machine, and 792 * translate the channel program to a real-device runnable channel 793 * program. 794 * 795 * These APIs will copy the ccws into kernel-space buffers, and update 796 * the guest physical addresses with their corresponding host physical 797 * addresses. Then channel I/O device drivers could issue the 798 * translated channel program to real devices to perform an I/O 799 * operation. 800 * 801 * These interfaces are designed to support translation only for 802 * channel programs, which are generated and formatted by a 803 * guest. Thus this will make it possible for things like VFIO to 804 * leverage the interfaces to passthrough a channel I/O mediated 805 * device in QEMU. 806 * 807 * We support direct ccw chaining by translating them to idal ccws. 808 * 809 * Returns: 810 * %0 on success and a negative error value on failure. 811 */ 812 int cp_prefetch(struct channel_program *cp) 813 { 814 struct ccwchain *chain; 815 struct ccw1 *ccw; 816 struct page_array *pa; 817 int len, idx, ret; 818 819 /* this is an error in the caller */ 820 if (!cp->initialized) 821 return -EINVAL; 822 823 list_for_each_entry(chain, &cp->ccwchain_list, next) { 824 len = chain->ch_len; 825 for (idx = 0; idx < len; idx++) { 826 ccw = &chain->ch_ccw[idx]; 827 pa = &chain->ch_pa[idx]; 828 829 ret = ccwchain_fetch_one(ccw, pa, cp); 830 if (ret) 831 goto out_err; 832 } 833 } 834 835 return 0; 836 out_err: 837 /* Only cleanup the chain elements that were actually translated. */ 838 chain->ch_len = idx; 839 list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { 840 chain->ch_len = 0; 841 } 842 return ret; 843 } 844 845 /** 846 * cp_get_orb() - get the orb of the channel program 847 * @cp: channel_program on which to perform the operation 848 * @sch: subchannel the operation will be performed against 849 * 850 * This function returns the address of the updated orb of the channel 851 * program. Channel I/O device drivers could use this orb to issue a 852 * ssch. 853 */ 854 union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch) 855 { 856 union orb *orb; 857 struct ccwchain *chain; 858 struct ccw1 *cpa; 859 860 /* this is an error in the caller */ 861 if (!cp->initialized) 862 return NULL; 863 864 orb = &cp->orb; 865 866 orb->cmd.intparm = (u32)virt_to_phys(sch); 867 orb->cmd.fmt = 1; 868 869 /* 870 * Everything built by vfio-ccw is a Format-2 IDAL. 871 * If the input was a Format-1 IDAL, indicate that 872 * 2K Format-2 IDAWs were created here. 873 */ 874 if (!orb->cmd.c64) 875 orb->cmd.i2k = 1; 876 orb->cmd.c64 = 1; 877 878 if (orb->cmd.lpm == 0) 879 orb->cmd.lpm = sch->lpm; 880 881 chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next); 882 cpa = chain->ch_ccw; 883 orb->cmd.cpa = virt_to_dma32(cpa); 884 885 return orb; 886 } 887 888 /** 889 * cp_update_scsw() - update scsw for a channel program. 890 * @cp: channel_program on which to perform the operation 891 * @scsw: I/O results of the channel program and also the target to be 892 * updated 893 * 894 * @scsw contains the I/O results of the channel program that pointed 895 * to by @cp. However what @scsw->cpa stores is a host physical 896 * address, which is meaningless for the guest, which is waiting for 897 * the I/O results. 898 * 899 * This function updates @scsw->cpa to its coressponding guest physical 900 * address. 901 */ 902 void cp_update_scsw(struct channel_program *cp, union scsw *scsw) 903 { 904 struct ccwchain *chain; 905 dma32_t cpa = scsw->cmd.cpa; 906 u32 ccw_head; 907 908 if (!cp->initialized) 909 return; 910 911 /* 912 * LATER: 913 * For now, only update the cmd.cpa part. We may need to deal with 914 * other portions of the schib as well, even if we don't return them 915 * in the ioctl directly. Path status changes etc. 916 */ 917 list_for_each_entry(chain, &cp->ccwchain_list, next) { 918 ccw_head = dma32_to_u32(virt_to_dma32(chain->ch_ccw)); 919 /* 920 * On successful execution, cpa points just beyond the end 921 * of the chain. 922 */ 923 if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) { 924 /* 925 * (cpa - ccw_head) is the offset value of the host 926 * physical ccw to its chain head. 927 * Adding this value to the guest physical ccw chain 928 * head gets us the guest cpa: 929 * cpa = chain->ch_iova + (cpa - ccw_head) 930 */ 931 cpa = dma32_add(cpa, chain->ch_iova - ccw_head); 932 break; 933 } 934 } 935 936 scsw->cmd.cpa = cpa; 937 } 938 939 /** 940 * cp_iova_pinned() - check if an iova is pinned for a ccw chain. 941 * @cp: channel_program on which to perform the operation 942 * @iova: the iova to check 943 * @length: the length to check from @iova 944 * 945 * If the @iova is currently pinned for the ccw chain, return true; 946 * else return false. 947 */ 948 bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) 949 { 950 struct ccwchain *chain; 951 int i; 952 953 if (!cp->initialized) 954 return false; 955 956 list_for_each_entry(chain, &cp->ccwchain_list, next) { 957 for (i = 0; i < chain->ch_len; i++) 958 if (page_array_iova_pinned(&chain->ch_pa[i], iova, length)) 959 return true; 960 } 961 962 return false; 963 } 964