1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include "hfi.h" 49 50 /* additive distance between non-SOP and SOP space */ 51 #define SOP_DISTANCE (TXE_PIO_SIZE / 2) 52 #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1) 53 /* number of QUADWORDs in a block */ 54 #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64)) 55 56 /** 57 * pio_copy - copy data block to MMIO space 58 * @pbuf: a number of blocks allocated within a PIO send context 59 * @pbc: PBC to send 60 * @from: source, must be 8 byte aligned 61 * @count: number of DWORD (32-bit) quantities to copy from source 62 * 63 * Copy data from source to PIO Send Buffer memory, 8 bytes at a time. 64 * Must always write full BLOCK_SIZE bytes blocks. The first block must 65 * be written to the corresponding SOP=1 address. 66 * 67 * Known: 68 * o pbuf->start always starts on a block boundary 69 * o pbuf can wrap only at a block boundary 70 */ 71 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, 72 const void *from, size_t count) 73 { 74 void __iomem *dest = pbuf->start + SOP_DISTANCE; 75 void __iomem *send = dest + PIO_BLOCK_SIZE; 76 void __iomem *dend; /* 8-byte data end */ 77 78 /* write the PBC */ 79 writeq(pbc, dest); 80 dest += sizeof(u64); 81 82 /* calculate where the QWORD data ends - in SOP=1 space */ 83 dend = dest + ((count >> 1) * sizeof(u64)); 84 85 if (dend < send) { 86 /* 87 * all QWORD data is within the SOP block, does *not* 88 * reach the end of the SOP block 89 */ 90 91 while (dest < dend) { 92 writeq(*(u64 *)from, dest); 93 from += sizeof(u64); 94 dest += sizeof(u64); 95 } 96 /* 97 * No boundary checks are needed here: 98 * 0. We're not on the SOP block boundary 99 * 1. The possible DWORD dangle will still be within 100 * the SOP block 101 * 2. We cannot wrap except on a block boundary. 102 */ 103 } else { 104 /* QWORD data extends _to_ or beyond the SOP block */ 105 106 /* write 8-byte SOP chunk data */ 107 while (dest < send) { 108 writeq(*(u64 *)from, dest); 109 from += sizeof(u64); 110 dest += sizeof(u64); 111 } 112 /* drop out of the SOP range */ 113 dest -= SOP_DISTANCE; 114 dend -= SOP_DISTANCE; 115 116 /* 117 * If the wrap comes before or matches the data end, 118 * copy until until the wrap, then wrap. 119 * 120 * If the data ends at the end of the SOP above and 121 * the buffer wraps, then pbuf->end == dend == dest 122 * and nothing will get written, but we will wrap in 123 * case there is a dangling DWORD. 124 */ 125 if (pbuf->end <= dend) { 126 while (dest < pbuf->end) { 127 writeq(*(u64 *)from, dest); 128 from += sizeof(u64); 129 dest += sizeof(u64); 130 } 131 132 dest -= pbuf->size; 133 dend -= pbuf->size; 134 } 135 136 /* write 8-byte non-SOP, non-wrap chunk data */ 137 while (dest < dend) { 138 writeq(*(u64 *)from, dest); 139 from += sizeof(u64); 140 dest += sizeof(u64); 141 } 142 } 143 /* at this point we have wrapped if we are going to wrap */ 144 145 /* write dangling u32, if any */ 146 if (count & 1) { 147 union mix val; 148 149 val.val64 = 0; 150 val.val32[0] = *(u32 *)from; 151 writeq(val.val64, dest); 152 dest += sizeof(u64); 153 } 154 /* 155 * fill in rest of block, no need to check pbuf->end 156 * as we only wrap on a block boundary 157 */ 158 while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { 159 writeq(0, dest); 160 dest += sizeof(u64); 161 } 162 163 /* finished with this buffer */ 164 this_cpu_dec(*pbuf->sc->buffers_allocated); 165 preempt_enable(); 166 } 167 168 /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ 169 #define USE_SHIFTS 1 170 #ifdef USE_SHIFTS 171 /* 172 * Handle carry bytes using shifts and masks. 173 * 174 * NOTE: the value the unused portion of carry is expected to always be zero. 175 */ 176 177 /* 178 * "zero" shift - bit shift used to zero out upper bytes. Input is 179 * the count of LSB bytes to preserve. 180 */ 181 #define zshift(x) (8 * (8 - (x))) 182 183 /* 184 * "merge" shift - bit shift used to merge with carry bytes. Input is 185 * the LSB byte count to move beyond. 186 */ 187 #define mshift(x) (8 * (x)) 188 189 /* 190 * Read nbytes bytes from "from" and return them in the LSB bytes 191 * of pbuf->carry. Other bytes are zeroed. Any previous value 192 * pbuf->carry is lost. 193 * 194 * NOTES: 195 * o do not read from from if nbytes is zero 196 * o from may _not_ be u64 aligned 197 * o nbytes must not span a QW boundary 198 */ 199 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, 200 unsigned int nbytes) 201 { 202 unsigned long off; 203 204 if (nbytes == 0) { 205 pbuf->carry.val64 = 0; 206 } else { 207 /* align our pointer */ 208 off = (unsigned long)from & 0x7; 209 from = (void *)((unsigned long)from & ~0x7l); 210 pbuf->carry.val64 = ((*(u64 *)from) 211 << zshift(nbytes + off))/* zero upper bytes */ 212 >> zshift(nbytes); /* place at bottom */ 213 } 214 pbuf->carry_bytes = nbytes; 215 } 216 217 /* 218 * Read nbytes bytes from "from" and put them at the next significant bytes 219 * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra 220 * read does not overfill carry. 221 * 222 * NOTES: 223 * o from may _not_ be u64 aligned 224 * o nbytes may span a QW boundary 225 */ 226 static inline void read_extra_bytes(struct pio_buf *pbuf, 227 const void *from, unsigned int nbytes) 228 { 229 unsigned long off = (unsigned long)from & 0x7; 230 unsigned int room, xbytes; 231 232 /* align our pointer */ 233 from = (void *)((unsigned long)from & ~0x7l); 234 235 /* check count first - don't read anything if count is zero */ 236 while (nbytes) { 237 /* find the number of bytes in this u64 */ 238 room = 8 - off; /* this u64 has room for this many bytes */ 239 xbytes = min(room, nbytes); 240 241 /* 242 * shift down to zero lower bytes, shift up to zero upper 243 * bytes, shift back down to move into place 244 */ 245 pbuf->carry.val64 |= (((*(u64 *)from) 246 >> mshift(off)) 247 << zshift(xbytes)) 248 >> zshift(xbytes + pbuf->carry_bytes); 249 off = 0; 250 pbuf->carry_bytes += xbytes; 251 nbytes -= xbytes; 252 from += sizeof(u64); 253 } 254 } 255 256 /* 257 * Zero extra bytes from the end of pbuf->carry. 258 * 259 * NOTES: 260 * o zbytes <= old_bytes 261 */ 262 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) 263 { 264 unsigned int remaining; 265 266 if (zbytes == 0) /* nothing to do */ 267 return; 268 269 remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ 270 271 /* NOTE: zshift only guaranteed to work if remaining != 0 */ 272 if (remaining) 273 pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) 274 >> zshift(remaining); 275 else 276 pbuf->carry.val64 = 0; 277 pbuf->carry_bytes = remaining; 278 } 279 280 /* 281 * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. 282 * Put the unused part of the next 8 bytes of src into the LSB bytes of 283 * pbuf->carry with the upper bytes zeroed.. 284 * 285 * NOTES: 286 * o result must keep unused bytes zeroed 287 * o src must be u64 aligned 288 */ 289 static inline void merge_write8( 290 struct pio_buf *pbuf, 291 void __iomem *dest, 292 const void *src) 293 { 294 u64 new, temp; 295 296 new = *(u64 *)src; 297 temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); 298 writeq(temp, dest); 299 pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); 300 } 301 302 /* 303 * Write a quad word using all bytes of carry. 304 */ 305 static inline void carry8_write8(union mix carry, void __iomem *dest) 306 { 307 writeq(carry.val64, dest); 308 } 309 310 /* 311 * Write a quad word using all the valid bytes of carry. If carry 312 * has zero valid bytes, nothing is written. 313 * Returns 0 on nothing written, non-zero on quad word written. 314 */ 315 static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) 316 { 317 if (pbuf->carry_bytes) { 318 /* unused bytes are always kept zeroed, so just write */ 319 writeq(pbuf->carry.val64, dest); 320 return 1; 321 } 322 323 return 0; 324 } 325 326 #else /* USE_SHIFTS */ 327 /* 328 * Handle carry bytes using byte copies. 329 * 330 * NOTE: the value the unused portion of carry is left uninitialized. 331 */ 332 333 /* 334 * Jump copy - no-loop copy for < 8 bytes. 335 */ 336 static inline void jcopy(u8 *dest, const u8 *src, u32 n) 337 { 338 switch (n) { 339 case 7: 340 *dest++ = *src++; 341 case 6: 342 *dest++ = *src++; 343 case 5: 344 *dest++ = *src++; 345 case 4: 346 *dest++ = *src++; 347 case 3: 348 *dest++ = *src++; 349 case 2: 350 *dest++ = *src++; 351 case 1: 352 *dest++ = *src++; 353 } 354 } 355 356 /* 357 * Read nbytes from "from" and and place them in the low bytes 358 * of pbuf->carry. Other bytes are left as-is. Any previous 359 * value in pbuf->carry is lost. 360 * 361 * NOTES: 362 * o do not read from from if nbytes is zero 363 * o from may _not_ be u64 aligned. 364 */ 365 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, 366 unsigned int nbytes) 367 { 368 jcopy(&pbuf->carry.val8[0], from, nbytes); 369 pbuf->carry_bytes = nbytes; 370 } 371 372 /* 373 * Read nbytes bytes from "from" and put them at the end of pbuf->carry. 374 * It is expected that the extra read does not overfill carry. 375 * 376 * NOTES: 377 * o from may _not_ be u64 aligned 378 * o nbytes may span a QW boundary 379 */ 380 static inline void read_extra_bytes(struct pio_buf *pbuf, 381 const void *from, unsigned int nbytes) 382 { 383 jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes); 384 pbuf->carry_bytes += nbytes; 385 } 386 387 /* 388 * Zero extra bytes from the end of pbuf->carry. 389 * 390 * We do not care about the value of unused bytes in carry, so just 391 * reduce the byte count. 392 * 393 * NOTES: 394 * o zbytes <= old_bytes 395 */ 396 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) 397 { 398 pbuf->carry_bytes -= zbytes; 399 } 400 401 /* 402 * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. 403 * Put the unused part of the next 8 bytes of src into the low bytes of 404 * pbuf->carry. 405 */ 406 static inline void merge_write8( 407 struct pio_buf *pbuf, 408 void *dest, 409 const void *src) 410 { 411 u32 remainder = 8 - pbuf->carry_bytes; 412 413 jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); 414 writeq(pbuf->carry.val64, dest); 415 jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); 416 } 417 418 /* 419 * Write a quad word using all bytes of carry. 420 */ 421 static inline void carry8_write8(union mix carry, void *dest) 422 { 423 writeq(carry.val64, dest); 424 } 425 426 /* 427 * Write a quad word using all the valid bytes of carry. If carry 428 * has zero valid bytes, nothing is written. 429 * Returns 0 on nothing written, non-zero on quad word written. 430 */ 431 static inline int carry_write8(struct pio_buf *pbuf, void *dest) 432 { 433 if (pbuf->carry_bytes) { 434 u64 zero = 0; 435 436 jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, 437 8 - pbuf->carry_bytes); 438 writeq(pbuf->carry.val64, dest); 439 return 1; 440 } 441 442 return 0; 443 } 444 #endif /* USE_SHIFTS */ 445 446 /* 447 * Segmented PIO Copy - start 448 * 449 * Start a PIO copy. 450 * 451 * @pbuf: destination buffer 452 * @pbc: the PBC for the PIO buffer 453 * @from: data source, QWORD aligned 454 * @nbytes: bytes to copy 455 */ 456 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, 457 const void *from, size_t nbytes) 458 { 459 void __iomem *dest = pbuf->start + SOP_DISTANCE; 460 void __iomem *send = dest + PIO_BLOCK_SIZE; 461 void __iomem *dend; /* 8-byte data end */ 462 463 writeq(pbc, dest); 464 dest += sizeof(u64); 465 466 /* calculate where the QWORD data ends - in SOP=1 space */ 467 dend = dest + ((nbytes >> 3) * sizeof(u64)); 468 469 if (dend < send) { 470 /* 471 * all QWORD data is within the SOP block, does *not* 472 * reach the end of the SOP block 473 */ 474 475 while (dest < dend) { 476 writeq(*(u64 *)from, dest); 477 from += sizeof(u64); 478 dest += sizeof(u64); 479 } 480 /* 481 * No boundary checks are needed here: 482 * 0. We're not on the SOP block boundary 483 * 1. The possible DWORD dangle will still be within 484 * the SOP block 485 * 2. We cannot wrap except on a block boundary. 486 */ 487 } else { 488 /* QWORD data extends _to_ or beyond the SOP block */ 489 490 /* write 8-byte SOP chunk data */ 491 while (dest < send) { 492 writeq(*(u64 *)from, dest); 493 from += sizeof(u64); 494 dest += sizeof(u64); 495 } 496 /* drop out of the SOP range */ 497 dest -= SOP_DISTANCE; 498 dend -= SOP_DISTANCE; 499 500 /* 501 * If the wrap comes before or matches the data end, 502 * copy until until the wrap, then wrap. 503 * 504 * If the data ends at the end of the SOP above and 505 * the buffer wraps, then pbuf->end == dend == dest 506 * and nothing will get written, but we will wrap in 507 * case there is a dangling DWORD. 508 */ 509 if (pbuf->end <= dend) { 510 while (dest < pbuf->end) { 511 writeq(*(u64 *)from, dest); 512 from += sizeof(u64); 513 dest += sizeof(u64); 514 } 515 516 dest -= pbuf->size; 517 dend -= pbuf->size; 518 } 519 520 /* write 8-byte non-SOP, non-wrap chunk data */ 521 while (dest < dend) { 522 writeq(*(u64 *)from, dest); 523 from += sizeof(u64); 524 dest += sizeof(u64); 525 } 526 } 527 /* at this point we have wrapped if we are going to wrap */ 528 529 /* ...but it doesn't matter as we're done writing */ 530 531 /* save dangling bytes, if any */ 532 read_low_bytes(pbuf, from, nbytes & 0x7); 533 534 pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3); 535 } 536 537 /* 538 * Mid copy helper, "mixed case" - source is 64-bit aligned but carry 539 * bytes are non-zero. 540 * 541 * Whole u64s must be written to the chip, so bytes must be manually merged. 542 * 543 * @pbuf: destination buffer 544 * @from: data source, is QWORD aligned. 545 * @nbytes: bytes to copy 546 * 547 * Must handle nbytes < 8. 548 */ 549 static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) 550 { 551 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 552 void __iomem *dend; /* 8-byte data end */ 553 unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; 554 unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; 555 556 /* calculate 8-byte data end */ 557 dend = dest + (qw_to_write * sizeof(u64)); 558 559 if (pbuf->qw_written < PIO_BLOCK_QWS) { 560 /* 561 * Still within SOP block. We don't need to check for 562 * wrap because we are still in the first block and 563 * can only wrap on block boundaries. 564 */ 565 void __iomem *send; /* SOP end */ 566 void __iomem *xend; 567 568 /* 569 * calculate the end of data or end of block, whichever 570 * comes first 571 */ 572 send = pbuf->start + PIO_BLOCK_SIZE; 573 xend = min(send, dend); 574 575 /* shift up to SOP=1 space */ 576 dest += SOP_DISTANCE; 577 xend += SOP_DISTANCE; 578 579 /* write 8-byte chunk data */ 580 while (dest < xend) { 581 merge_write8(pbuf, dest, from); 582 from += sizeof(u64); 583 dest += sizeof(u64); 584 } 585 586 /* shift down to SOP=0 space */ 587 dest -= SOP_DISTANCE; 588 } 589 /* 590 * At this point dest could be (either, both, or neither): 591 * - at dend 592 * - at the wrap 593 */ 594 595 /* 596 * If the wrap comes before or matches the data end, 597 * copy until until the wrap, then wrap. 598 * 599 * If dest is at the wrap, we will fall into the if, 600 * not do the loop, when wrap. 601 * 602 * If the data ends at the end of the SOP above and 603 * the buffer wraps, then pbuf->end == dend == dest 604 * and nothing will get written. 605 */ 606 if (pbuf->end <= dend) { 607 while (dest < pbuf->end) { 608 merge_write8(pbuf, dest, from); 609 from += sizeof(u64); 610 dest += sizeof(u64); 611 } 612 613 dest -= pbuf->size; 614 dend -= pbuf->size; 615 } 616 617 /* write 8-byte non-SOP, non-wrap chunk data */ 618 while (dest < dend) { 619 merge_write8(pbuf, dest, from); 620 from += sizeof(u64); 621 dest += sizeof(u64); 622 } 623 624 /* adjust carry */ 625 if (pbuf->carry_bytes < bytes_left) { 626 /* need to read more */ 627 read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); 628 } else { 629 /* remove invalid bytes */ 630 zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); 631 } 632 633 pbuf->qw_written += qw_to_write; 634 } 635 636 /* 637 * Mid copy helper, "straight case" - source pointer is 64-bit aligned 638 * with no carry bytes. 639 * 640 * @pbuf: destination buffer 641 * @from: data source, is QWORD aligned 642 * @nbytes: bytes to copy 643 * 644 * Must handle nbytes < 8. 645 */ 646 static void mid_copy_straight(struct pio_buf *pbuf, 647 const void *from, size_t nbytes) 648 { 649 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 650 void __iomem *dend; /* 8-byte data end */ 651 652 /* calculate 8-byte data end */ 653 dend = dest + ((nbytes >> 3) * sizeof(u64)); 654 655 if (pbuf->qw_written < PIO_BLOCK_QWS) { 656 /* 657 * Still within SOP block. We don't need to check for 658 * wrap because we are still in the first block and 659 * can only wrap on block boundaries. 660 */ 661 void __iomem *send; /* SOP end */ 662 void __iomem *xend; 663 664 /* 665 * calculate the end of data or end of block, whichever 666 * comes first 667 */ 668 send = pbuf->start + PIO_BLOCK_SIZE; 669 xend = min(send, dend); 670 671 /* shift up to SOP=1 space */ 672 dest += SOP_DISTANCE; 673 xend += SOP_DISTANCE; 674 675 /* write 8-byte chunk data */ 676 while (dest < xend) { 677 writeq(*(u64 *)from, dest); 678 from += sizeof(u64); 679 dest += sizeof(u64); 680 } 681 682 /* shift down to SOP=0 space */ 683 dest -= SOP_DISTANCE; 684 } 685 /* 686 * At this point dest could be (either, both, or neither): 687 * - at dend 688 * - at the wrap 689 */ 690 691 /* 692 * If the wrap comes before or matches the data end, 693 * copy until until the wrap, then wrap. 694 * 695 * If dest is at the wrap, we will fall into the if, 696 * not do the loop, when wrap. 697 * 698 * If the data ends at the end of the SOP above and 699 * the buffer wraps, then pbuf->end == dend == dest 700 * and nothing will get written. 701 */ 702 if (pbuf->end <= dend) { 703 while (dest < pbuf->end) { 704 writeq(*(u64 *)from, dest); 705 from += sizeof(u64); 706 dest += sizeof(u64); 707 } 708 709 dest -= pbuf->size; 710 dend -= pbuf->size; 711 } 712 713 /* write 8-byte non-SOP, non-wrap chunk data */ 714 while (dest < dend) { 715 writeq(*(u64 *)from, dest); 716 from += sizeof(u64); 717 dest += sizeof(u64); 718 } 719 720 /* we know carry_bytes was zero on entry to this routine */ 721 read_low_bytes(pbuf, from, nbytes & 0x7); 722 723 pbuf->qw_written += nbytes >> 3; 724 } 725 726 /* 727 * Segmented PIO Copy - middle 728 * 729 * Must handle any aligned tail and any aligned source with any byte count. 730 * 731 * @pbuf: a number of blocks allocated within a PIO send context 732 * @from: data source 733 * @nbytes: number of bytes to copy 734 */ 735 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) 736 { 737 unsigned long from_align = (unsigned long)from & 0x7; 738 739 if (pbuf->carry_bytes + nbytes < 8) { 740 /* not enough bytes to fill a QW */ 741 read_extra_bytes(pbuf, from, nbytes); 742 return; 743 } 744 745 if (from_align) { 746 /* misaligned source pointer - align it */ 747 unsigned long to_align; 748 749 /* bytes to read to align "from" */ 750 to_align = 8 - from_align; 751 752 /* 753 * In the advance-to-alignment logic below, we do not need 754 * to check if we are using more than nbytes. This is because 755 * if we are here, we already know that carry+nbytes will 756 * fill at least one QW. 757 */ 758 if (pbuf->carry_bytes + to_align < 8) { 759 /* not enough align bytes to fill a QW */ 760 read_extra_bytes(pbuf, from, to_align); 761 from += to_align; 762 nbytes -= to_align; 763 } else { 764 /* bytes to fill carry */ 765 unsigned long to_fill = 8 - pbuf->carry_bytes; 766 /* bytes left over to be read */ 767 unsigned long extra = to_align - to_fill; 768 void __iomem *dest; 769 770 /* fill carry... */ 771 read_extra_bytes(pbuf, from, to_fill); 772 from += to_fill; 773 nbytes -= to_fill; 774 775 /* ...now write carry */ 776 dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 777 778 /* 779 * The two checks immediately below cannot both be 780 * true, hence the else. If we have wrapped, we 781 * cannot still be within the first block. 782 * Conversely, if we are still in the first block, we 783 * cannot have wrapped. We do the wrap check first 784 * as that is more likely. 785 */ 786 /* adjust if we've wrapped */ 787 if (dest >= pbuf->end) 788 dest -= pbuf->size; 789 /* jump to SOP range if within the first block */ 790 else if (pbuf->qw_written < PIO_BLOCK_QWS) 791 dest += SOP_DISTANCE; 792 793 carry8_write8(pbuf->carry, dest); 794 pbuf->qw_written++; 795 796 /* read any extra bytes to do final alignment */ 797 /* this will overwrite anything in pbuf->carry */ 798 read_low_bytes(pbuf, from, extra); 799 from += extra; 800 nbytes -= extra; 801 } 802 803 /* at this point, from is QW aligned */ 804 } 805 806 if (pbuf->carry_bytes) 807 mid_copy_mix(pbuf, from, nbytes); 808 else 809 mid_copy_straight(pbuf, from, nbytes); 810 } 811 812 /* 813 * Segmented PIO Copy - end 814 * 815 * Write any remainder (in pbuf->carry) and finish writing the whole block. 816 * 817 * @pbuf: a number of blocks allocated within a PIO send context 818 */ 819 void seg_pio_copy_end(struct pio_buf *pbuf) 820 { 821 void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); 822 823 /* 824 * The two checks immediately below cannot both be true, hence the 825 * else. If we have wrapped, we cannot still be within the first 826 * block. Conversely, if we are still in the first block, we 827 * cannot have wrapped. We do the wrap check first as that is 828 * more likely. 829 */ 830 /* adjust if we have wrapped */ 831 if (dest >= pbuf->end) 832 dest -= pbuf->size; 833 /* jump to the SOP range if within the first block */ 834 else if (pbuf->qw_written < PIO_BLOCK_QWS) 835 dest += SOP_DISTANCE; 836 837 /* write final bytes, if any */ 838 if (carry_write8(pbuf, dest)) { 839 dest += sizeof(u64); 840 /* 841 * NOTE: We do not need to recalculate whether dest needs 842 * SOP_DISTANCE or not. 843 * 844 * If we are in the first block and the dangle write 845 * keeps us in the same block, dest will need 846 * to retain SOP_DISTANCE in the loop below. 847 * 848 * If we are in the first block and the dangle write pushes 849 * us to the next block, then loop below will not run 850 * and dest is not used. Hence we do not need to update 851 * it. 852 * 853 * If we are past the first block, then SOP_DISTANCE 854 * was never added, so there is nothing to do. 855 */ 856 } 857 858 /* fill in rest of block */ 859 while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) { 860 writeq(0, dest); 861 dest += sizeof(u64); 862 } 863 864 /* finished with this buffer */ 865 this_cpu_dec(*pbuf->sc->buffers_allocated); 866 preempt_enable(); 867 } 868