xref: /linux/drivers/infiniband/hw/hfi1/pio_copy.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Copyright(c) 2015, 2016 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 #include "hfi.h"
49 
50 /* additive distance between non-SOP and SOP space */
51 #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
52 #define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
53 /* number of QUADWORDs in a block */
54 #define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
55 
56 /**
57  * pio_copy - copy data block to MMIO space
58  * @pbuf: a number of blocks allocated within a PIO send context
59  * @pbc: PBC to send
60  * @from: source, must be 8 byte aligned
61  * @count: number of DWORD (32-bit) quantities to copy from source
62  *
63  * Copy data from source to PIO Send Buffer memory, 8 bytes at a time.
64  * Must always write full BLOCK_SIZE bytes blocks.  The first block must
65  * be written to the corresponding SOP=1 address.
66  *
67  * Known:
68  * o pbuf->start always starts on a block boundary
69  * o pbuf can wrap only at a block boundary
70  */
71 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
72 	      const void *from, size_t count)
73 {
74 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
75 	void __iomem *send = dest + PIO_BLOCK_SIZE;
76 	void __iomem *dend;			/* 8-byte data end */
77 
78 	/* write the PBC */
79 	writeq(pbc, dest);
80 	dest += sizeof(u64);
81 
82 	/* calculate where the QWORD data ends - in SOP=1 space */
83 	dend = dest + ((count >> 1) * sizeof(u64));
84 
85 	if (dend < send) {
86 		/*
87 		 * all QWORD data is within the SOP block, does *not*
88 		 * reach the end of the SOP block
89 		 */
90 
91 		while (dest < dend) {
92 			writeq(*(u64 *)from, dest);
93 			from += sizeof(u64);
94 			dest += sizeof(u64);
95 		}
96 		/*
97 		 * No boundary checks are needed here:
98 		 * 0. We're not on the SOP block boundary
99 		 * 1. The possible DWORD dangle will still be within
100 		 *    the SOP block
101 		 * 2. We cannot wrap except on a block boundary.
102 		 */
103 	} else {
104 		/* QWORD data extends _to_ or beyond the SOP block */
105 
106 		/* write 8-byte SOP chunk data */
107 		while (dest < send) {
108 			writeq(*(u64 *)from, dest);
109 			from += sizeof(u64);
110 			dest += sizeof(u64);
111 		}
112 		/* drop out of the SOP range */
113 		dest -= SOP_DISTANCE;
114 		dend -= SOP_DISTANCE;
115 
116 		/*
117 		 * If the wrap comes before or matches the data end,
118 		 * copy until until the wrap, then wrap.
119 		 *
120 		 * If the data ends at the end of the SOP above and
121 		 * the buffer wraps, then pbuf->end == dend == dest
122 		 * and nothing will get written, but we will wrap in
123 		 * case there is a dangling DWORD.
124 		 */
125 		if (pbuf->end <= dend) {
126 			while (dest < pbuf->end) {
127 				writeq(*(u64 *)from, dest);
128 				from += sizeof(u64);
129 				dest += sizeof(u64);
130 			}
131 
132 			dest -= pbuf->size;
133 			dend -= pbuf->size;
134 		}
135 
136 		/* write 8-byte non-SOP, non-wrap chunk data */
137 		while (dest < dend) {
138 			writeq(*(u64 *)from, dest);
139 			from += sizeof(u64);
140 			dest += sizeof(u64);
141 		}
142 	}
143 	/* at this point we have wrapped if we are going to wrap */
144 
145 	/* write dangling u32, if any */
146 	if (count & 1) {
147 		union mix val;
148 
149 		val.val64 = 0;
150 		val.val32[0] = *(u32 *)from;
151 		writeq(val.val64, dest);
152 		dest += sizeof(u64);
153 	}
154 	/*
155 	 * fill in rest of block, no need to check pbuf->end
156 	 * as we only wrap on a block boundary
157 	 */
158 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
159 		writeq(0, dest);
160 		dest += sizeof(u64);
161 	}
162 
163 	/* finished with this buffer */
164 	this_cpu_dec(*pbuf->sc->buffers_allocated);
165 	preempt_enable();
166 }
167 
168 /* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
169 #define USE_SHIFTS 1
170 #ifdef USE_SHIFTS
171 /*
172  * Handle carry bytes using shifts and masks.
173  *
174  * NOTE: the value the unused portion of carry is expected to always be zero.
175  */
176 
177 /*
178  * "zero" shift - bit shift used to zero out upper bytes.  Input is
179  * the count of LSB bytes to preserve.
180  */
181 #define zshift(x) (8 * (8 - (x)))
182 
183 /*
184  * "merge" shift - bit shift used to merge with carry bytes.  Input is
185  * the LSB byte count to move beyond.
186  */
187 #define mshift(x) (8 * (x))
188 
189 /*
190  * Read nbytes bytes from "from" and return them in the LSB bytes
191  * of pbuf->carry.  Other bytes are zeroed.  Any previous value
192  * pbuf->carry is lost.
193  *
194  * NOTES:
195  * o do not read from from if nbytes is zero
196  * o from may _not_ be u64 aligned
197  * o nbytes must not span a QW boundary
198  */
199 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
200 				  unsigned int nbytes)
201 {
202 	unsigned long off;
203 
204 	if (nbytes == 0) {
205 		pbuf->carry.val64 = 0;
206 	} else {
207 		/* align our pointer */
208 		off = (unsigned long)from & 0x7;
209 		from = (void *)((unsigned long)from & ~0x7l);
210 		pbuf->carry.val64 = ((*(u64 *)from)
211 				<< zshift(nbytes + off))/* zero upper bytes */
212 				>> zshift(nbytes);	/* place at bottom */
213 	}
214 	pbuf->carry_bytes = nbytes;
215 }
216 
217 /*
218  * Read nbytes bytes from "from" and put them at the next significant bytes
219  * of pbuf->carry.  Unused bytes are zeroed.  It is expected that the extra
220  * read does not overfill carry.
221  *
222  * NOTES:
223  * o from may _not_ be u64 aligned
224  * o nbytes may span a QW boundary
225  */
226 static inline void read_extra_bytes(struct pio_buf *pbuf,
227 				    const void *from, unsigned int nbytes)
228 {
229 	unsigned long off = (unsigned long)from & 0x7;
230 	unsigned int room, xbytes;
231 
232 	/* align our pointer */
233 	from = (void *)((unsigned long)from & ~0x7l);
234 
235 	/* check count first - don't read anything if count is zero */
236 	while (nbytes) {
237 		/* find the number of bytes in this u64 */
238 		room = 8 - off;	/* this u64 has room for this many bytes */
239 		xbytes = min(room, nbytes);
240 
241 		/*
242 		 * shift down to zero lower bytes, shift up to zero upper
243 		 * bytes, shift back down to move into place
244 		 */
245 		pbuf->carry.val64 |= (((*(u64 *)from)
246 					>> mshift(off))
247 					<< zshift(xbytes))
248 					>> zshift(xbytes + pbuf->carry_bytes);
249 		off = 0;
250 		pbuf->carry_bytes += xbytes;
251 		nbytes -= xbytes;
252 		from += sizeof(u64);
253 	}
254 }
255 
256 /*
257  * Zero extra bytes from the end of pbuf->carry.
258  *
259  * NOTES:
260  * o zbytes <= old_bytes
261  */
262 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
263 {
264 	unsigned int remaining;
265 
266 	if (zbytes == 0)	/* nothing to do */
267 		return;
268 
269 	remaining = pbuf->carry_bytes - zbytes;	/* remaining bytes */
270 
271 	/* NOTE: zshift only guaranteed to work if remaining != 0 */
272 	if (remaining)
273 		pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
274 					>> zshift(remaining);
275 	else
276 		pbuf->carry.val64 = 0;
277 	pbuf->carry_bytes = remaining;
278 }
279 
280 /*
281  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
282  * Put the unused part of the next 8 bytes of src into the LSB bytes of
283  * pbuf->carry with the upper bytes zeroed..
284  *
285  * NOTES:
286  * o result must keep unused bytes zeroed
287  * o src must be u64 aligned
288  */
289 static inline void merge_write8(
290 	struct pio_buf *pbuf,
291 	void __iomem *dest,
292 	const void *src)
293 {
294 	u64 new, temp;
295 
296 	new = *(u64 *)src;
297 	temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
298 	writeq(temp, dest);
299 	pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
300 }
301 
302 /*
303  * Write a quad word using all bytes of carry.
304  */
305 static inline void carry8_write8(union mix carry, void __iomem *dest)
306 {
307 	writeq(carry.val64, dest);
308 }
309 
310 /*
311  * Write a quad word using all the valid bytes of carry.  If carry
312  * has zero valid bytes, nothing is written.
313  * Returns 0 on nothing written, non-zero on quad word written.
314  */
315 static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
316 {
317 	if (pbuf->carry_bytes) {
318 		/* unused bytes are always kept zeroed, so just write */
319 		writeq(pbuf->carry.val64, dest);
320 		return 1;
321 	}
322 
323 	return 0;
324 }
325 
326 #else /* USE_SHIFTS */
327 /*
328  * Handle carry bytes using byte copies.
329  *
330  * NOTE: the value the unused portion of carry is left uninitialized.
331  */
332 
333 /*
334  * Jump copy - no-loop copy for < 8 bytes.
335  */
336 static inline void jcopy(u8 *dest, const u8 *src, u32 n)
337 {
338 	switch (n) {
339 	case 7:
340 		*dest++ = *src++;
341 	case 6:
342 		*dest++ = *src++;
343 	case 5:
344 		*dest++ = *src++;
345 	case 4:
346 		*dest++ = *src++;
347 	case 3:
348 		*dest++ = *src++;
349 	case 2:
350 		*dest++ = *src++;
351 	case 1:
352 		*dest++ = *src++;
353 	}
354 }
355 
356 /*
357  * Read nbytes from "from" and and place them in the low bytes
358  * of pbuf->carry.  Other bytes are left as-is.  Any previous
359  * value in pbuf->carry is lost.
360  *
361  * NOTES:
362  * o do not read from from if nbytes is zero
363  * o from may _not_ be u64 aligned.
364  */
365 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
366 				  unsigned int nbytes)
367 {
368 	jcopy(&pbuf->carry.val8[0], from, nbytes);
369 	pbuf->carry_bytes = nbytes;
370 }
371 
372 /*
373  * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
374  * It is expected that the extra read does not overfill carry.
375  *
376  * NOTES:
377  * o from may _not_ be u64 aligned
378  * o nbytes may span a QW boundary
379  */
380 static inline void read_extra_bytes(struct pio_buf *pbuf,
381 				    const void *from, unsigned int nbytes)
382 {
383 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
384 	pbuf->carry_bytes += nbytes;
385 }
386 
387 /*
388  * Zero extra bytes from the end of pbuf->carry.
389  *
390  * We do not care about the value of unused bytes in carry, so just
391  * reduce the byte count.
392  *
393  * NOTES:
394  * o zbytes <= old_bytes
395  */
396 static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
397 {
398 	pbuf->carry_bytes -= zbytes;
399 }
400 
401 /*
402  * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
403  * Put the unused part of the next 8 bytes of src into the low bytes of
404  * pbuf->carry.
405  */
406 static inline void merge_write8(
407 	struct pio_buf *pbuf,
408 	void *dest,
409 	const void *src)
410 {
411 	u32 remainder = 8 - pbuf->carry_bytes;
412 
413 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
414 	writeq(pbuf->carry.val64, dest);
415 	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
416 }
417 
418 /*
419  * Write a quad word using all bytes of carry.
420  */
421 static inline void carry8_write8(union mix carry, void *dest)
422 {
423 	writeq(carry.val64, dest);
424 }
425 
426 /*
427  * Write a quad word using all the valid bytes of carry.  If carry
428  * has zero valid bytes, nothing is written.
429  * Returns 0 on nothing written, non-zero on quad word written.
430  */
431 static inline int carry_write8(struct pio_buf *pbuf, void *dest)
432 {
433 	if (pbuf->carry_bytes) {
434 		u64 zero = 0;
435 
436 		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
437 		      8 - pbuf->carry_bytes);
438 		writeq(pbuf->carry.val64, dest);
439 		return 1;
440 	}
441 
442 	return 0;
443 }
444 #endif /* USE_SHIFTS */
445 
446 /*
447  * Segmented PIO Copy - start
448  *
449  * Start a PIO copy.
450  *
451  * @pbuf: destination buffer
452  * @pbc: the PBC for the PIO buffer
453  * @from: data source, QWORD aligned
454  * @nbytes: bytes to copy
455  */
456 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
457 			const void *from, size_t nbytes)
458 {
459 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
460 	void __iomem *send = dest + PIO_BLOCK_SIZE;
461 	void __iomem *dend;			/* 8-byte data end */
462 
463 	writeq(pbc, dest);
464 	dest += sizeof(u64);
465 
466 	/* calculate where the QWORD data ends - in SOP=1 space */
467 	dend = dest + ((nbytes >> 3) * sizeof(u64));
468 
469 	if (dend < send) {
470 		/*
471 		 * all QWORD data is within the SOP block, does *not*
472 		 * reach the end of the SOP block
473 		 */
474 
475 		while (dest < dend) {
476 			writeq(*(u64 *)from, dest);
477 			from += sizeof(u64);
478 			dest += sizeof(u64);
479 		}
480 		/*
481 		 * No boundary checks are needed here:
482 		 * 0. We're not on the SOP block boundary
483 		 * 1. The possible DWORD dangle will still be within
484 		 *    the SOP block
485 		 * 2. We cannot wrap except on a block boundary.
486 		 */
487 	} else {
488 		/* QWORD data extends _to_ or beyond the SOP block */
489 
490 		/* write 8-byte SOP chunk data */
491 		while (dest < send) {
492 			writeq(*(u64 *)from, dest);
493 			from += sizeof(u64);
494 			dest += sizeof(u64);
495 		}
496 		/* drop out of the SOP range */
497 		dest -= SOP_DISTANCE;
498 		dend -= SOP_DISTANCE;
499 
500 		/*
501 		 * If the wrap comes before or matches the data end,
502 		 * copy until until the wrap, then wrap.
503 		 *
504 		 * If the data ends at the end of the SOP above and
505 		 * the buffer wraps, then pbuf->end == dend == dest
506 		 * and nothing will get written, but we will wrap in
507 		 * case there is a dangling DWORD.
508 		 */
509 		if (pbuf->end <= dend) {
510 			while (dest < pbuf->end) {
511 				writeq(*(u64 *)from, dest);
512 				from += sizeof(u64);
513 				dest += sizeof(u64);
514 			}
515 
516 			dest -= pbuf->size;
517 			dend -= pbuf->size;
518 		}
519 
520 		/* write 8-byte non-SOP, non-wrap chunk data */
521 		while (dest < dend) {
522 			writeq(*(u64 *)from, dest);
523 			from += sizeof(u64);
524 			dest += sizeof(u64);
525 		}
526 	}
527 	/* at this point we have wrapped if we are going to wrap */
528 
529 	/* ...but it doesn't matter as we're done writing */
530 
531 	/* save dangling bytes, if any */
532 	read_low_bytes(pbuf, from, nbytes & 0x7);
533 
534 	pbuf->qw_written = 1 /*PBC*/ + (nbytes >> 3);
535 }
536 
537 /*
538  * Mid copy helper, "mixed case" - source is 64-bit aligned but carry
539  * bytes are non-zero.
540  *
541  * Whole u64s must be written to the chip, so bytes must be manually merged.
542  *
543  * @pbuf: destination buffer
544  * @from: data source, is QWORD aligned.
545  * @nbytes: bytes to copy
546  *
547  * Must handle nbytes < 8.
548  */
549 static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
550 {
551 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
552 	void __iomem *dend;			/* 8-byte data end */
553 	unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
554 	unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
555 
556 	/* calculate 8-byte data end */
557 	dend = dest + (qw_to_write * sizeof(u64));
558 
559 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
560 		/*
561 		 * Still within SOP block.  We don't need to check for
562 		 * wrap because we are still in the first block and
563 		 * can only wrap on block boundaries.
564 		 */
565 		void __iomem *send;		/* SOP end */
566 		void __iomem *xend;
567 
568 		/*
569 		 * calculate the end of data or end of block, whichever
570 		 * comes first
571 		 */
572 		send = pbuf->start + PIO_BLOCK_SIZE;
573 		xend = min(send, dend);
574 
575 		/* shift up to SOP=1 space */
576 		dest += SOP_DISTANCE;
577 		xend += SOP_DISTANCE;
578 
579 		/* write 8-byte chunk data */
580 		while (dest < xend) {
581 			merge_write8(pbuf, dest, from);
582 			from += sizeof(u64);
583 			dest += sizeof(u64);
584 		}
585 
586 		/* shift down to SOP=0 space */
587 		dest -= SOP_DISTANCE;
588 	}
589 	/*
590 	 * At this point dest could be (either, both, or neither):
591 	 * - at dend
592 	 * - at the wrap
593 	 */
594 
595 	/*
596 	 * If the wrap comes before or matches the data end,
597 	 * copy until until the wrap, then wrap.
598 	 *
599 	 * If dest is at the wrap, we will fall into the if,
600 	 * not do the loop, when wrap.
601 	 *
602 	 * If the data ends at the end of the SOP above and
603 	 * the buffer wraps, then pbuf->end == dend == dest
604 	 * and nothing will get written.
605 	 */
606 	if (pbuf->end <= dend) {
607 		while (dest < pbuf->end) {
608 			merge_write8(pbuf, dest, from);
609 			from += sizeof(u64);
610 			dest += sizeof(u64);
611 		}
612 
613 		dest -= pbuf->size;
614 		dend -= pbuf->size;
615 	}
616 
617 	/* write 8-byte non-SOP, non-wrap chunk data */
618 	while (dest < dend) {
619 		merge_write8(pbuf, dest, from);
620 		from += sizeof(u64);
621 		dest += sizeof(u64);
622 	}
623 
624 	/* adjust carry */
625 	if (pbuf->carry_bytes < bytes_left) {
626 		/* need to read more */
627 		read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
628 	} else {
629 		/* remove invalid bytes */
630 		zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
631 	}
632 
633 	pbuf->qw_written += qw_to_write;
634 }
635 
636 /*
637  * Mid copy helper, "straight case" - source pointer is 64-bit aligned
638  * with no carry bytes.
639  *
640  * @pbuf: destination buffer
641  * @from: data source, is QWORD aligned
642  * @nbytes: bytes to copy
643  *
644  * Must handle nbytes < 8.
645  */
646 static void mid_copy_straight(struct pio_buf *pbuf,
647 			      const void *from, size_t nbytes)
648 {
649 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
650 	void __iomem *dend;			/* 8-byte data end */
651 
652 	/* calculate 8-byte data end */
653 	dend = dest + ((nbytes >> 3) * sizeof(u64));
654 
655 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
656 		/*
657 		 * Still within SOP block.  We don't need to check for
658 		 * wrap because we are still in the first block and
659 		 * can only wrap on block boundaries.
660 		 */
661 		void __iomem *send;		/* SOP end */
662 		void __iomem *xend;
663 
664 		/*
665 		 * calculate the end of data or end of block, whichever
666 		 * comes first
667 		 */
668 		send = pbuf->start + PIO_BLOCK_SIZE;
669 		xend = min(send, dend);
670 
671 		/* shift up to SOP=1 space */
672 		dest += SOP_DISTANCE;
673 		xend += SOP_DISTANCE;
674 
675 		/* write 8-byte chunk data */
676 		while (dest < xend) {
677 			writeq(*(u64 *)from, dest);
678 			from += sizeof(u64);
679 			dest += sizeof(u64);
680 		}
681 
682 		/* shift down to SOP=0 space */
683 		dest -= SOP_DISTANCE;
684 	}
685 	/*
686 	 * At this point dest could be (either, both, or neither):
687 	 * - at dend
688 	 * - at the wrap
689 	 */
690 
691 	/*
692 	 * If the wrap comes before or matches the data end,
693 	 * copy until until the wrap, then wrap.
694 	 *
695 	 * If dest is at the wrap, we will fall into the if,
696 	 * not do the loop, when wrap.
697 	 *
698 	 * If the data ends at the end of the SOP above and
699 	 * the buffer wraps, then pbuf->end == dend == dest
700 	 * and nothing will get written.
701 	 */
702 	if (pbuf->end <= dend) {
703 		while (dest < pbuf->end) {
704 			writeq(*(u64 *)from, dest);
705 			from += sizeof(u64);
706 			dest += sizeof(u64);
707 		}
708 
709 		dest -= pbuf->size;
710 		dend -= pbuf->size;
711 	}
712 
713 	/* write 8-byte non-SOP, non-wrap chunk data */
714 	while (dest < dend) {
715 		writeq(*(u64 *)from, dest);
716 		from += sizeof(u64);
717 		dest += sizeof(u64);
718 	}
719 
720 	/* we know carry_bytes was zero on entry to this routine */
721 	read_low_bytes(pbuf, from, nbytes & 0x7);
722 
723 	pbuf->qw_written += nbytes >> 3;
724 }
725 
726 /*
727  * Segmented PIO Copy - middle
728  *
729  * Must handle any aligned tail and any aligned source with any byte count.
730  *
731  * @pbuf: a number of blocks allocated within a PIO send context
732  * @from: data source
733  * @nbytes: number of bytes to copy
734  */
735 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
736 {
737 	unsigned long from_align = (unsigned long)from & 0x7;
738 
739 	if (pbuf->carry_bytes + nbytes < 8) {
740 		/* not enough bytes to fill a QW */
741 		read_extra_bytes(pbuf, from, nbytes);
742 		return;
743 	}
744 
745 	if (from_align) {
746 		/* misaligned source pointer - align it */
747 		unsigned long to_align;
748 
749 		/* bytes to read to align "from" */
750 		to_align = 8 - from_align;
751 
752 		/*
753 		 * In the advance-to-alignment logic below, we do not need
754 		 * to check if we are using more than nbytes.  This is because
755 		 * if we are here, we already know that carry+nbytes will
756 		 * fill at least one QW.
757 		 */
758 		if (pbuf->carry_bytes + to_align < 8) {
759 			/* not enough align bytes to fill a QW */
760 			read_extra_bytes(pbuf, from, to_align);
761 			from += to_align;
762 			nbytes -= to_align;
763 		} else {
764 			/* bytes to fill carry */
765 			unsigned long to_fill = 8 - pbuf->carry_bytes;
766 			/* bytes left over to be read */
767 			unsigned long extra = to_align - to_fill;
768 			void __iomem *dest;
769 
770 			/* fill carry... */
771 			read_extra_bytes(pbuf, from, to_fill);
772 			from += to_fill;
773 			nbytes -= to_fill;
774 
775 			/* ...now write carry */
776 			dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
777 
778 			/*
779 			 * The two checks immediately below cannot both be
780 			 * true, hence the else.  If we have wrapped, we
781 			 * cannot still be within the first block.
782 			 * Conversely, if we are still in the first block, we
783 			 * cannot have wrapped.  We do the wrap check first
784 			 * as that is more likely.
785 			 */
786 			/* adjust if we've wrapped */
787 			if (dest >= pbuf->end)
788 				dest -= pbuf->size;
789 			/* jump to SOP range if within the first block */
790 			else if (pbuf->qw_written < PIO_BLOCK_QWS)
791 				dest += SOP_DISTANCE;
792 
793 			carry8_write8(pbuf->carry, dest);
794 			pbuf->qw_written++;
795 
796 			/* read any extra bytes to do final alignment */
797 			/* this will overwrite anything in pbuf->carry */
798 			read_low_bytes(pbuf, from, extra);
799 			from += extra;
800 			nbytes -= extra;
801 		}
802 
803 		/* at this point, from is QW aligned */
804 	}
805 
806 	if (pbuf->carry_bytes)
807 		mid_copy_mix(pbuf, from, nbytes);
808 	else
809 		mid_copy_straight(pbuf, from, nbytes);
810 }
811 
812 /*
813  * Segmented PIO Copy - end
814  *
815  * Write any remainder (in pbuf->carry) and finish writing the whole block.
816  *
817  * @pbuf: a number of blocks allocated within a PIO send context
818  */
819 void seg_pio_copy_end(struct pio_buf *pbuf)
820 {
821 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
822 
823 	/*
824 	 * The two checks immediately below cannot both be true, hence the
825 	 * else.  If we have wrapped, we cannot still be within the first
826 	 * block.  Conversely, if we are still in the first block, we
827 	 * cannot have wrapped.  We do the wrap check first as that is
828 	 * more likely.
829 	 */
830 	/* adjust if we have wrapped */
831 	if (dest >= pbuf->end)
832 		dest -= pbuf->size;
833 	/* jump to the SOP range if within the first block */
834 	else if (pbuf->qw_written < PIO_BLOCK_QWS)
835 		dest += SOP_DISTANCE;
836 
837 	/* write final bytes, if any */
838 	if (carry_write8(pbuf, dest)) {
839 		dest += sizeof(u64);
840 		/*
841 		 * NOTE: We do not need to recalculate whether dest needs
842 		 * SOP_DISTANCE or not.
843 		 *
844 		 * If we are in the first block and the dangle write
845 		 * keeps us in the same block, dest will need
846 		 * to retain SOP_DISTANCE in the loop below.
847 		 *
848 		 * If we are in the first block and the dangle write pushes
849 		 * us to the next block, then loop below will not run
850 		 * and dest is not used.  Hence we do not need to update
851 		 * it.
852 		 *
853 		 * If we are past the first block, then SOP_DISTANCE
854 		 * was never added, so there is nothing to do.
855 		 */
856 	}
857 
858 	/* fill in rest of block */
859 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
860 		writeq(0, dest);
861 		dest += sizeof(u64);
862 	}
863 
864 	/* finished with this buffer */
865 	this_cpu_dec(*pbuf->sc->buffers_allocated);
866 	preempt_enable();
867 }
868