xref: /titanic_52/usr/src/uts/i86pc/io/ioat/ioat_chan.c (revision c1ecd8b9404ee0d96d93f02e82c441b9bb149a3d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/errno.h>
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/kmem.h>
33 #include <sys/ddi.h>
34 #include <sys/stat.h>
35 #include <sys/sunddi.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/modctl.h>
39 #include <sys/ddi_impldefs.h>
40 #include <sys/sysmacros.h>
41 #include <vm/hat.h>
42 #include <vm/as.h>
43 #include <sys/mach_mmu.h>
44 #ifdef __xpv
45 #include <sys/hypervisor.h>
46 #endif
47 
48 #include <sys/ioat.h>
49 
50 
51 extern ddi_device_acc_attr_t ioat_acc_attr;
52 
53 /* dma attr for the descriptor rings */
54 ddi_dma_attr_t ioat_desc_dma_attr = {
55 	DMA_ATTR_V0,		/* dma_attr_version */
56 	0x0,			/* dma_attr_addr_lo */
57 	0xffffffffffffffff,	/* dma_attr_addr_hi */
58 	0xffffffff,		/* dma_attr_count_max */
59 	0x1000,			/* dma_attr_align */
60 	0x1,			/* dma_attr_burstsizes */
61 	0x1,			/* dma_attr_minxfer */
62 	0xffffffff,		/* dma_attr_maxxfer */
63 	0xffffffff,		/* dma_attr_seg */
64 	0x1,			/* dma_attr_sgllen */
65 	0x1,			/* dma_attr_granular */
66 	0x0,			/* dma_attr_flags */
67 };
68 
69 /* dma attr for the completion buffers */
70 ddi_dma_attr_t ioat_cmpl_dma_attr = {
71 	DMA_ATTR_V0,		/* dma_attr_version */
72 	0x0,			/* dma_attr_addr_lo */
73 	0xffffffffffffffff,	/* dma_attr_addr_hi */
74 	0xffffffff,		/* dma_attr_count_max */
75 	0x40,			/* dma_attr_align */
76 	0x1,			/* dma_attr_burstsizes */
77 	0x1,			/* dma_attr_minxfer */
78 	0xffffffff,		/* dma_attr_maxxfer */
79 	0xffffffff,		/* dma_attr_seg */
80 	0x1,			/* dma_attr_sgllen */
81 	0x1,			/* dma_attr_granular */
82 	0x0,			/* dma_attr_flags */
83 };
84 
85 static int ioat_completion_alloc(ioat_channel_t channel);
86 static void ioat_completion_free(ioat_channel_t channel);
87 static void ioat_channel_start(ioat_channel_t channel);
88 static void ioat_channel_reset(ioat_channel_t channel);
89 
90 int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
91 void ioat_ring_free(ioat_channel_t channel);
92 void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
93 int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
94     dcopy_cmd_t cmd);
95 
96 static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
97     uint64_t dest_addr, uint32_t size, uint32_t ctrl);
98 static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
99 
100 
101 /*
102  * ioat_channel_init()
103  */
104 int
105 ioat_channel_init(ioat_state_t *state)
106 {
107 	int i;
108 
109 	/*
110 	 * initialize each dma channel's state which doesn't change across
111 	 * channel alloc/free.
112 	 */
113 	state->is_chansize = sizeof (struct ioat_channel_s) *
114 	    state->is_num_channels;
115 	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
116 	for (i = 0; i < state->is_num_channels; i++) {
117 		state->is_channel[i].ic_state = state;
118 		state->is_channel[i].ic_regs = (uint8_t *)
119 		    ((uintptr_t)state->is_genregs +
120 		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
121 	}
122 
123 	/* initial the allocator (from 0 to state->is_num_channels) */
124 	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
125 
126 	return (DDI_SUCCESS);
127 }
128 
129 
130 /*
131  * ioat_channel_fini()
132  */
133 void
134 ioat_channel_fini(ioat_state_t *state)
135 {
136 	ioat_rs_fini(&state->is_channel_rs);
137 	kmem_free(state->is_channel, state->is_chansize);
138 }
139 
140 
141 /*
142  * ioat_channel_alloc()
143  *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
144  *	available)
145  */
146 /*ARGSUSED*/
147 int
148 ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
149     uint_t size, dcopy_query_channel_t *info, void *channel_private)
150 {
151 #define	CHANSTRSIZE	20
152 	struct ioat_channel_s *channel;
153 	char chanstr[CHANSTRSIZE];
154 	ioat_channel_t *chan;
155 	ioat_state_t *state;
156 	size_t cmd_size;
157 	uint_t chan_num;
158 	uint32_t estat;
159 	int e;
160 
161 
162 	state = (ioat_state_t *)device_private;
163 	chan = (ioat_channel_t *)channel_private;
164 
165 	/* allocate a H/W channel */
166 	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
167 	if (e != DDI_SUCCESS) {
168 		return (DCOPY_NORESOURCES);
169 	}
170 
171 	channel = &state->is_channel[chan_num];
172 	channel->ic_inuse = B_TRUE;
173 	channel->ic_chan_num = chan_num;
174 	channel->ic_ver = state->is_ver;
175 	channel->ic_dca_active = B_FALSE;
176 	channel->ic_channel_state = IOAT_CHANNEL_OK;
177 	channel->ic_dcopy_handle = handle;
178 
179 #ifdef	DEBUG
180 	{
181 		/* if we're cbv2, verify that the V2 compatibility bit is set */
182 		uint16_t reg;
183 		if (channel->ic_ver == IOAT_CBv2) {
184 			reg = ddi_get16(state->is_reg_handle,
185 			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
186 			ASSERT(reg & 0x2);
187 		}
188 	}
189 #endif
190 
191 	/*
192 	 * Configure DMA channel
193 	 *   Channel In Use
194 	 *   Error Interrupt Enable
195 	 *   Any Error Abort Enable
196 	 *   Error Completion Enable
197 	 */
198 	ddi_put16(state->is_reg_handle,
199 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
200 
201 	/* check channel error register, clear any errors */
202 	estat = ddi_get32(state->is_reg_handle,
203 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
204 	if (estat != 0) {
205 #ifdef	DEBUG
206 		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
207 		    "enable\n", estat, channel->ic_chan_num);
208 #endif
209 		ddi_put32(state->is_reg_handle,
210 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
211 	}
212 
213 	/* allocate and initialize the descriptor buf */
214 	e = ioat_ring_alloc(channel, size);
215 	if (e != DDI_SUCCESS) {
216 		goto chinitfail_desc_alloc;
217 	}
218 
219 	/* allocate and initialize the completion space */
220 	e = ioat_completion_alloc(channel);
221 	if (e != DDI_SUCCESS) {
222 		goto chinitfail_completion_alloc;
223 	}
224 
225 	/* setup kmem_cache for commands */
226 	cmd_size = sizeof (struct dcopy_cmd_s) +
227 	    sizeof (struct dcopy_cmd_priv_s) +
228 	    sizeof (struct ioat_cmd_private_s);
229 	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
230 	    state->is_instance, channel->ic_chan_num);
231 	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
232 	    NULL, NULL, NULL, NULL, NULL, 0);
233 	if (channel->ic_cmd_cache == NULL) {
234 		goto chinitfail_kmem_cache;
235 	}
236 
237 	/* start-up the channel */
238 	ioat_channel_start(channel);
239 
240 	/* fill in the channel info returned to dcopy */
241 	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
242 	info->qc_id = state->is_deviceinfo.di_id;
243 	info->qc_capabilities = (uint64_t)state->is_capabilities;
244 	info->qc_channel_size = (uint64_t)size;
245 	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
246 	if (channel->ic_ver == IOAT_CBv1) {
247 		info->qc_dca_supported = B_FALSE;
248 	} else {
249 		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
250 			info->qc_dca_supported = B_TRUE;
251 		} else {
252 			info->qc_dca_supported = B_FALSE;
253 		}
254 	}
255 
256 	*chan = channel;
257 
258 	return (DCOPY_SUCCESS);
259 
260 chinitfail_kmem_cache:
261 	ioat_completion_free(channel);
262 chinitfail_completion_alloc:
263 	ioat_ring_free(channel);
264 chinitfail_desc_alloc:
265 	return (DCOPY_FAILURE);
266 }
267 
268 
269 /*
270  * ioat_channel_suspend()
271  */
272 /*ARGSUSED*/
273 void
274 ioat_channel_suspend(ioat_state_t *state)
275 {
276 	/*
277 	 * normally you would disable interrupts and reset the H/W here. But
278 	 * since the suspend framework doesn't know who is using us, it may
279 	 * not suspend their I/O before us.  Since we won't actively be doing
280 	 * any DMA or interrupts unless someone asks us to, it's safe to not
281 	 * do anything here.
282 	 */
283 }
284 
285 
286 /*
287  * ioat_channel_resume()
288  */
289 int
290 ioat_channel_resume(ioat_state_t *state)
291 {
292 	ioat_channel_ring_t *ring;
293 	ioat_channel_t channel;
294 	uint32_t estat;
295 	int i;
296 
297 
298 	for (i = 0; i < state->is_num_channels; i++) {
299 		channel = &state->is_channel[i];
300 		ring = channel->ic_ring;
301 
302 		if (!channel->ic_inuse) {
303 			continue;
304 		}
305 
306 		/*
307 		 * Configure DMA channel
308 		 *   Channel In Use
309 		 *   Error Interrupt Enable
310 		 *   Any Error Abort Enable
311 		 *   Error Completion Enable
312 		 */
313 		ddi_put16(state->is_reg_handle,
314 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
315 
316 		/* check channel error register, clear any errors */
317 		estat = ddi_get32(state->is_reg_handle,
318 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
319 		if (estat != 0) {
320 #ifdef	DEBUG
321 			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
322 			    " (%d) enable\n", estat, channel->ic_chan_num);
323 #endif
324 			ddi_put32(state->is_reg_handle,
325 			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
326 			    estat);
327 		}
328 
329 		/* Re-initialize the ring */
330 		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
331 		/* write the physical address into the chain address register */
332 		if (channel->ic_ver == IOAT_CBv1) {
333 			ddi_put32(state->is_reg_handle,
334 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
335 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
336 			ddi_put32(state->is_reg_handle,
337 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
338 			    (uint32_t)(ring->cr_phys_desc >> 32));
339 		} else {
340 			ASSERT(channel->ic_ver == IOAT_CBv2);
341 			ddi_put32(state->is_reg_handle,
342 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
343 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
344 			ddi_put32(state->is_reg_handle,
345 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
346 			    (uint32_t)(ring->cr_phys_desc >> 32));
347 		}
348 
349 		/* re-initialize the completion buffer */
350 		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
351 		/* write the phys addr into the completion address register */
352 		ddi_put32(state->is_reg_handle,
353 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
354 		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
355 		ddi_put32(state->is_reg_handle,
356 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
357 		    (uint32_t)(channel->ic_phys_cmpl >> 32));
358 
359 		/* start-up the channel */
360 		ioat_channel_start(channel);
361 
362 	}
363 
364 	return (DDI_SUCCESS);
365 }
366 
367 
368 /*
369  * ioat_channel_free()
370  */
371 void
372 ioat_channel_free(void *channel_private)
373 {
374 	struct ioat_channel_s *channel;
375 	ioat_channel_t *chan;
376 	ioat_state_t *state;
377 	uint_t chan_num;
378 
379 
380 	chan = (ioat_channel_t *)channel_private;
381 	channel = *chan;
382 
383 	state = channel->ic_state;
384 	chan_num = channel->ic_chan_num;
385 
386 	/* disable the interrupts */
387 	ddi_put16(state->is_reg_handle,
388 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
389 
390 	ioat_channel_reset(channel);
391 
392 	/* cleanup command cache */
393 	kmem_cache_destroy(channel->ic_cmd_cache);
394 
395 	/* clean-up/free-up the completion space and descriptors */
396 	ioat_completion_free(channel);
397 	ioat_ring_free(channel);
398 
399 	channel->ic_inuse = B_FALSE;
400 
401 	/* free the H/W DMA engine */
402 	ioat_rs_free(state->is_channel_rs, chan_num);
403 
404 	*chan = NULL;
405 }
406 
407 
408 /*
409  * ioat_channel_intr()
410  */
411 void
412 ioat_channel_intr(ioat_channel_t channel)
413 {
414 	ioat_state_t *state;
415 	uint16_t chanctrl;
416 	uint32_t chanerr;
417 	uint32_t status;
418 
419 
420 	state = channel->ic_state;
421 
422 	if (channel->ic_ver == IOAT_CBv1) {
423 		status = ddi_get32(state->is_reg_handle,
424 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
425 	} else {
426 		ASSERT(channel->ic_ver == IOAT_CBv2);
427 		status = ddi_get32(state->is_reg_handle,
428 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
429 	}
430 
431 	/* if that status isn't ACTIVE or IDLE, the channel has failed */
432 	if (status & IOAT_CHAN_STS_FAIL_MASK) {
433 		chanerr = ddi_get32(state->is_reg_handle,
434 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
435 		cmn_err(CE_WARN, "channel(%d) fatal failure! "
436 		    "chanstat_lo=0x%X; chanerr=0x%X\n",
437 		    channel->ic_chan_num, status, chanerr);
438 		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
439 		ioat_channel_reset(channel);
440 
441 		return;
442 	}
443 
444 	/*
445 	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
446 	 * ensure the write completes.
447 	 */
448 	chanctrl = ddi_get16(state->is_reg_handle,
449 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
450 	ddi_put16(state->is_reg_handle,
451 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
452 	(void) ddi_get16(state->is_reg_handle,
453 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
454 
455 	/* tell dcopy we have seen a completion on this channel */
456 	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
457 }
458 
459 
460 /*
461  * ioat_channel_start()
462  */
463 void
464 ioat_channel_start(ioat_channel_t channel)
465 {
466 	ioat_chan_dma_desc_t desc;
467 
468 	/* set the first descriptor up as a NULL descriptor */
469 	bzero(&desc, sizeof (desc));
470 	desc.dd_size = 0;
471 	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
472 	    IOAT_DESC_CTRL_CMPL;
473 	desc.dd_next_desc = 0x0;
474 
475 	/* setup the very first descriptor */
476 	ioat_ring_seed(channel, &desc);
477 }
478 
479 
480 /*
481  * ioat_channel_reset()
482  */
483 void
484 ioat_channel_reset(ioat_channel_t channel)
485 {
486 	ioat_state_t *state;
487 
488 	state = channel->ic_state;
489 
490 	/* hit the reset bit */
491 	if (channel->ic_ver == IOAT_CBv1) {
492 		ddi_put8(state->is_reg_handle,
493 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
494 	} else {
495 		ASSERT(channel->ic_ver == IOAT_CBv2);
496 		ddi_put8(state->is_reg_handle,
497 		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
498 	}
499 }
500 
501 
502 /*
503  * ioat_completion_alloc()
504  */
505 int
506 ioat_completion_alloc(ioat_channel_t channel)
507 {
508 	ioat_state_t *state;
509 	size_t real_length;
510 	uint_t cookie_cnt;
511 	int e;
512 
513 
514 	state = channel->ic_state;
515 
516 	/*
517 	 * allocate memory for the completion status, zero it out, and get
518 	 * the paddr. We'll allocate a physically contiguous cache line.
519 	 */
520 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
521 	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
522 	if (e != DDI_SUCCESS) {
523 		goto cmplallocfail_alloc_handle;
524 	}
525 	channel->ic_cmpl_alloc_size = 64;
526 	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
527 	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
528 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
529 	    (caddr_t *)&channel->ic_cmpl, &real_length,
530 	    &channel->ic_cmpl_handle);
531 	if (e != DDI_SUCCESS) {
532 		goto cmplallocfail_mem_alloc;
533 	}
534 	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
535 	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
536 	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
537 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
538 	    &channel->ic_cmpl_cookie, &cookie_cnt);
539 	if (e != DDI_SUCCESS) {
540 		goto cmplallocfail_addr_bind;
541 	}
542 	ASSERT(cookie_cnt == 1);
543 	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
544 	    channel->ic_cmpl_alloc_size);
545 	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
546 
547 	/* write the physical address into the completion address register */
548 	ddi_put32(state->is_reg_handle,
549 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
550 	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
551 	ddi_put32(state->is_reg_handle,
552 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
553 	    (uint32_t)(channel->ic_phys_cmpl >> 32));
554 
555 	return (DDI_SUCCESS);
556 
557 cmplallocfail_addr_bind:
558 	ddi_dma_mem_free(&channel->ic_desc_handle);
559 cmplallocfail_mem_alloc:
560 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
561 cmplallocfail_alloc_handle:
562 	return (DDI_FAILURE);
563 }
564 
565 
566 /*
567  * ioat_completion_free()
568  */
569 void
570 ioat_completion_free(ioat_channel_t channel)
571 {
572 	ioat_state_t *state;
573 
574 	state = channel->ic_state;
575 
576 	/* reset the completion address register */
577 	ddi_put32(state->is_reg_handle,
578 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
579 	ddi_put32(state->is_reg_handle,
580 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
581 
582 	/* unbind, then free up the memory, dma handle */
583 	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
584 	ddi_dma_mem_free(&channel->ic_cmpl_handle);
585 	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
586 }
587 
588 /*
589  * ioat_ring_alloc()
590  */
591 int
592 ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
593 {
594 	ioat_channel_ring_t *ring;
595 	ioat_state_t *state;
596 	size_t real_length;
597 	uint_t cookie_cnt;
598 	int e;
599 
600 
601 	state = channel->ic_state;
602 
603 	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
604 	channel->ic_ring = ring;
605 	ring->cr_chan = channel;
606 	ring->cr_post_cnt = 0;
607 
608 	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
609 	    channel->ic_state->is_iblock_cookie);
610 	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
611 	    channel->ic_state->is_iblock_cookie);
612 
613 	/*
614 	 * allocate memory for the ring, zero it out, and get the paddr.
615 	 * We'll allocate a physically contiguous chunck of memory  which
616 	 * simplifies the completion logic.
617 	 */
618 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
619 	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
620 	if (e != DDI_SUCCESS) {
621 		goto ringallocfail_alloc_handle;
622 	}
623 	/*
624 	 * allocate one extra descriptor so we can simplify the empty/full
625 	 * logic. Then round that number up to a whole multiple of 4.
626 	 */
627 	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
628 	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
629 	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
630 	    sizeof (ioat_chan_desc_t);
631 	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
632 	    channel->ic_desc_alloc_size, &ioat_acc_attr,
633 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
634 	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
635 	if (e != DDI_SUCCESS) {
636 		goto ringallocfail_mem_alloc;
637 	}
638 	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
639 	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
640 	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
641 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
642 	    &channel->ic_desc_cookies, &cookie_cnt);
643 	if (e != DDI_SUCCESS) {
644 		goto ringallocfail_addr_bind;
645 	}
646 	ASSERT(cookie_cnt == 1);
647 	ASSERT(channel->ic_desc_cookies.dmac_size ==
648 	    channel->ic_desc_alloc_size);
649 	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
650 
651 	/* write the physical address into the chain address register */
652 	if (channel->ic_ver == IOAT_CBv1) {
653 		ddi_put32(state->is_reg_handle,
654 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
655 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
656 		ddi_put32(state->is_reg_handle,
657 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
658 		    (uint32_t)(ring->cr_phys_desc >> 32));
659 	} else {
660 		ASSERT(channel->ic_ver == IOAT_CBv2);
661 		ddi_put32(state->is_reg_handle,
662 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
663 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
664 		ddi_put32(state->is_reg_handle,
665 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
666 		    (uint32_t)(ring->cr_phys_desc >> 32));
667 	}
668 
669 	return (DCOPY_SUCCESS);
670 
671 ringallocfail_addr_bind:
672 	ddi_dma_mem_free(&channel->ic_desc_handle);
673 ringallocfail_mem_alloc:
674 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
675 ringallocfail_alloc_handle:
676 	mutex_destroy(&ring->cr_desc_mutex);
677 	mutex_destroy(&ring->cr_cmpl_mutex);
678 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
679 
680 	return (DCOPY_FAILURE);
681 }
682 
683 
684 /*
685  * ioat_ring_free()
686  */
687 void
688 ioat_ring_free(ioat_channel_t channel)
689 {
690 	ioat_state_t *state;
691 
692 
693 	state = channel->ic_state;
694 
695 	/* reset the chain address register */
696 	if (channel->ic_ver == IOAT_CBv1) {
697 		ddi_put32(state->is_reg_handle,
698 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
699 		ddi_put32(state->is_reg_handle,
700 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
701 	} else {
702 		ASSERT(channel->ic_ver == IOAT_CBv2);
703 		ddi_put32(state->is_reg_handle,
704 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
705 		ddi_put32(state->is_reg_handle,
706 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
707 	}
708 
709 	/* unbind, then free up the memory, dma handle */
710 	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
711 	ddi_dma_mem_free(&channel->ic_desc_handle);
712 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
713 
714 	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
715 	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
716 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
717 
718 }
719 
720 
721 /*
722  * ioat_ring_seed()
723  *    write the first descriptor in the ring.
724  */
725 void
726 ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
727 {
728 	ioat_channel_ring_t *ring;
729 	ioat_chan_dma_desc_t *desc;
730 	ioat_chan_dma_desc_t *prev;
731 	ioat_state_t *state;
732 
733 
734 	state = channel->ic_state;
735 	ring = channel->ic_ring;
736 
737 	/* init the completion state */
738 	ring->cr_cmpl_gen = 0x0;
739 	ring->cr_cmpl_last = 0x0;
740 
741 	/* write in the descriptor and init the descriptor state */
742 	ring->cr_post_cnt++;
743 	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
744 	ring->cr_desc_gen = 0;
745 	ring->cr_desc_prev = 0;
746 	ring->cr_desc_next = 1;
747 
748 	if (channel->ic_ver == IOAT_CBv1) {
749 		/* hit the start bit */
750 		ddi_put8(state->is_reg_handle,
751 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
752 	} else {
753 		/*
754 		 * if this is CBv2, link the descriptor to an empty
755 		 * descriptor
756 		 */
757 		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
758 		desc = (ioat_chan_dma_desc_t *)
759 		    &ring->cr_desc[ring->cr_desc_next];
760 		prev = (ioat_chan_dma_desc_t *)
761 		    &ring->cr_desc[ring->cr_desc_prev];
762 
763 		desc->dd_ctrl = 0;
764 		desc->dd_next_desc = 0x0;
765 
766 		prev->dd_next_desc = ring->cr_phys_desc +
767 		    (ring->cr_desc_next << 6);
768 
769 		ddi_put16(state->is_reg_handle,
770 		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
771 		    (uint16_t)1);
772 	}
773 
774 }
775 
776 
777 /*
778  * ioat_cmd_alloc()
779  */
780 int
781 ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
782 {
783 	ioat_cmd_private_t *priv;
784 	ioat_channel_t channel;
785 	dcopy_cmd_t oldcmd;
786 	int kmflag;
787 
788 
789 	channel = (ioat_channel_t)private;
790 
791 	if (flags & DCOPY_NOSLEEP) {
792 		kmflag = KM_NOSLEEP;
793 	} else {
794 		kmflag = KM_SLEEP;
795 	}
796 
797 	/* save the command passed incase DCOPY_ALLOC_LINK is set */
798 	oldcmd = *cmd;
799 
800 	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
801 	if (*cmd == NULL) {
802 		return (DCOPY_NORESOURCES);
803 	}
804 
805 	/* setup the dcopy and ioat private state pointers */
806 	(*cmd)->dp_version = DCOPY_CMD_V0;
807 	(*cmd)->dp_cmd = 0;
808 	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
809 	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
810 	(*cmd)->dp_private->pr_device_cmd_private =
811 	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
812 	    sizeof (struct dcopy_cmd_priv_s));
813 
814 	/*
815 	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
816 	 * just allocated.
817 	 */
818 	priv = (*cmd)->dp_private->pr_device_cmd_private;
819 	if (flags & DCOPY_ALLOC_LINK) {
820 		priv->ip_next = oldcmd;
821 	} else {
822 		priv->ip_next = NULL;
823 	}
824 
825 	return (DCOPY_SUCCESS);
826 }
827 
828 
829 /*
830  * ioat_cmd_free()
831  */
832 void
833 ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
834 {
835 	ioat_cmd_private_t *priv;
836 	ioat_channel_t channel;
837 	dcopy_cmd_t next;
838 	dcopy_cmd_t cmd;
839 
840 
841 	channel = (ioat_channel_t)private;
842 	cmd = *(cmdp);
843 
844 	/*
845 	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
846 	 * ioat_cmd_alloc() for more info).
847 	 */
848 	while (cmd != NULL) {
849 		priv = cmd->dp_private->pr_device_cmd_private;
850 		next = priv->ip_next;
851 		kmem_cache_free(channel->ic_cmd_cache, cmd);
852 		cmd = next;
853 	}
854 	*cmdp = NULL;
855 }
856 
857 
858 /*
859  * ioat_cmd_post()
860  */
861 int
862 ioat_cmd_post(void *private, dcopy_cmd_t cmd)
863 {
864 	ioat_channel_ring_t *ring;
865 	ioat_cmd_private_t *priv;
866 	ioat_channel_t channel;
867 	ioat_state_t *state;
868 	uint64_t dest_paddr;
869 	uint64_t src_paddr;
870 	uint64_t dest_addr;
871 	uint32_t dest_size;
872 	uint64_t src_addr;
873 	uint32_t src_size;
874 	size_t xfer_size;
875 	uint32_t ctrl;
876 	size_t size;
877 	int e;
878 
879 
880 	channel = (ioat_channel_t)private;
881 	priv = cmd->dp_private->pr_device_cmd_private;
882 
883 	state = channel->ic_state;
884 	ring = channel->ic_ring;
885 
886 	mutex_enter(&ring->cr_desc_mutex);
887 
888 	/* if the channel has had a fatal failure, return failure */
889 	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
890 		mutex_exit(&ring->cr_cmpl_mutex);
891 		return (DCOPY_FAILURE);
892 	}
893 
894 	/* make sure we have space for the descriptors */
895 	e = ioat_ring_reserve(channel, ring, cmd);
896 	if (e != DCOPY_SUCCESS) {
897 		mutex_exit(&ring->cr_cmpl_mutex);
898 		return (DCOPY_NORESOURCES);
899 	}
900 
901 	/* if we support DCA, and the DCA flag is set, post a DCA desc */
902 	if ((channel->ic_ver == IOAT_CBv2) &&
903 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
904 		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
905 	}
906 
907 	/*
908 	 * the dma copy may have to be broken up into multiple descriptors
909 	 * since we can't cross a page boundary.
910 	 */
911 	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
912 	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
913 	src_addr = cmd->dp.copy.cc_source;
914 	dest_addr = cmd->dp.copy.cc_dest;
915 	size = cmd->dp.copy.cc_size;
916 	while (size > 0) {
917 		src_paddr = pa_to_ma(src_addr);
918 		dest_paddr = pa_to_ma(dest_addr);
919 
920 		/* adjust for any offset into the page */
921 		if ((src_addr & PAGEOFFSET) == 0) {
922 			src_size = PAGESIZE;
923 		} else {
924 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
925 		}
926 		if ((dest_addr & PAGEOFFSET) == 0) {
927 			dest_size = PAGESIZE;
928 		} else {
929 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
930 		}
931 
932 		/* take the smallest of the three */
933 		xfer_size = MIN(src_size, dest_size);
934 		xfer_size = MIN(xfer_size, size);
935 
936 		/*
937 		 * if this is the last descriptor, and we are supposed to
938 		 * generate a completion, generate a completion. same logic
939 		 * for interrupt.
940 		 */
941 		ctrl = 0;
942 		if (xfer_size == size) {
943 			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
944 				ctrl |= IOAT_DESC_CTRL_CMPL;
945 			}
946 			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
947 				ctrl |= IOAT_DESC_CTRL_INTR;
948 			}
949 		}
950 
951 		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
952 		    ctrl);
953 
954 		/* go to the next page */
955 		src_addr += xfer_size;
956 		dest_addr += xfer_size;
957 		size -= xfer_size;
958 	}
959 
960 	/*
961 	 * if we are going to create a completion, save away the state so we
962 	 * can poll on it.
963 	 */
964 	if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
965 		priv->ip_generation = ring->cr_desc_gen_prev;
966 		priv->ip_index = ring->cr_desc_prev;
967 	}
968 
969 	/* if queue not defined, tell the DMA engine about it */
970 	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
971 		if (channel->ic_ver == IOAT_CBv1) {
972 			ddi_put8(state->is_reg_handle,
973 			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
974 			    0x2);
975 		} else {
976 			ASSERT(channel->ic_ver == IOAT_CBv2);
977 			ddi_put16(state->is_reg_handle,
978 			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
979 			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
980 		}
981 	}
982 
983 	mutex_exit(&ring->cr_desc_mutex);
984 
985 	return (DCOPY_SUCCESS);
986 }
987 
988 
989 /*
990  * ioat_cmd_post_dca()
991  */
992 static void
993 ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
994 {
995 	ioat_chan_dca_desc_t *desc;
996 	ioat_chan_dca_desc_t *prev;
997 	ioat_channel_t channel;
998 
999 
1000 	channel = ring->cr_chan;
1001 	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1002 	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1003 
1004 	/* keep track of the number of descs posted for cbv2 */
1005 	ring->cr_post_cnt++;
1006 
1007 	/*
1008 	 * post a context change desriptor. If dca has never been used on
1009 	 * this channel, or if the id doesn't match the last id used on this
1010 	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
1011 	 * and save away the id we're using.
1012 	 */
1013 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
1014 	desc->dd_next_desc = 0x0;
1015 	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
1016 		channel->ic_dca_active = B_TRUE;
1017 		channel->ic_dca_current = dca_id;
1018 		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
1019 		desc->dd_cntx = dca_id;
1020 	}
1021 
1022 	/* Put the descriptors physical address in the previous descriptor */
1023 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1024 	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
1025 
1026 	/* sync the current desc */
1027 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1028 	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1029 
1030 	/* update the previous desc and sync it too */
1031 	prev->dd_next_desc = ring->cr_phys_desc +
1032 	    (ring->cr_desc_next << 6);
1033 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1034 	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
1035 
1036 	/* save the current desc_next and desc_last for the completion */
1037 	ring->cr_desc_prev = ring->cr_desc_next;
1038 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1039 
1040 	/* increment next/gen so it points to the next free desc */
1041 	ring->cr_desc_next++;
1042 	if (ring->cr_desc_next > ring->cr_desc_last) {
1043 		ring->cr_desc_next = 0;
1044 		ring->cr_desc_gen++;
1045 	}
1046 
1047 	/*
1048 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1049 	 * we always leave on desc empty to detect full, this works out.
1050 	 */
1051 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1052 		desc = (ioat_chan_dca_desc_t *)
1053 		    &ring->cr_desc[ring->cr_desc_next];
1054 		prev = (ioat_chan_dca_desc_t *)
1055 		    &ring->cr_desc[ring->cr_desc_prev];
1056 		desc->dd_ctrl = 0;
1057 		desc->dd_next_desc = 0x0;
1058 
1059 		prev->dd_next_desc = ring->cr_phys_desc +
1060 		    (ring->cr_desc_next << 6);
1061 	}
1062 }
1063 
1064 
1065 /*
1066  * ioat_cmd_post_copy()
1067  *
1068  */
1069 static void
1070 ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
1071     uint64_t dest_addr, uint32_t size, uint32_t ctrl)
1072 {
1073 	ioat_chan_dma_desc_t *desc;
1074 	ioat_chan_dma_desc_t *prev;
1075 	ioat_channel_t channel;
1076 
1077 
1078 	channel = ring->cr_chan;
1079 	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1080 	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1081 
1082 	/* keep track of the number of descs posted for cbv2 */
1083 	ring->cr_post_cnt++;
1084 
1085 	/* write in the DMA desc */
1086 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
1087 	desc->dd_size = size;
1088 	desc->dd_src_paddr = src_addr;
1089 	desc->dd_dest_paddr = dest_addr;
1090 	desc->dd_next_desc = 0x0;
1091 
1092 	/* Put the descriptors physical address in the previous descriptor */
1093 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1094 	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
1095 
1096 	/* sync the current desc */
1097 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1098 	    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1099 
1100 	/* update the previous desc and sync it too */
1101 	prev->dd_next_desc = ring->cr_phys_desc +
1102 	    (ring->cr_desc_next << 6);
1103 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1104 	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
1105 
1106 	/* increment next/gen so it points to the next free desc */
1107 	ring->cr_desc_prev = ring->cr_desc_next;
1108 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1109 
1110 	/* increment next/gen so it points to the next free desc */
1111 	ring->cr_desc_next++;
1112 	if (ring->cr_desc_next > ring->cr_desc_last) {
1113 		ring->cr_desc_next = 0;
1114 		ring->cr_desc_gen++;
1115 	}
1116 
1117 	/*
1118 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1119 	 * we always leave on desc empty to detect full, this works out.
1120 	 */
1121 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1122 		desc = (ioat_chan_dma_desc_t *)
1123 		    &ring->cr_desc[ring->cr_desc_next];
1124 		prev = (ioat_chan_dma_desc_t *)
1125 		    &ring->cr_desc[ring->cr_desc_prev];
1126 		desc->dd_size = 0;
1127 		desc->dd_ctrl = 0;
1128 		desc->dd_next_desc = 0x0;
1129 
1130 		prev->dd_next_desc = ring->cr_phys_desc +
1131 		    (ring->cr_desc_next << 6);
1132 	}
1133 }
1134 
1135 
1136 /*
1137  * ioat_cmd_poll()
1138  */
1139 int
1140 ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
1141 {
1142 	ioat_channel_ring_t *ring;
1143 	ioat_cmd_private_t *priv;
1144 	ioat_channel_t channel;
1145 	uint64_t generation;
1146 	uint64_t last_cmpl;
1147 
1148 
1149 	channel = (ioat_channel_t)private;
1150 	priv = cmd->dp_private->pr_device_cmd_private;
1151 
1152 	ring = channel->ic_ring;
1153 	ASSERT(ring != NULL);
1154 
1155 	mutex_enter(&ring->cr_cmpl_mutex);
1156 
1157 	/* if the channel had a fatal failure, fail all polls */
1158 	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
1159 	    IOAT_CMPL_FAILED(channel)) {
1160 		mutex_exit(&ring->cr_cmpl_mutex);
1161 		return (DCOPY_FAILURE);
1162 	}
1163 
1164 	/*
1165 	 * if the current completion is the same as the last time we read one,
1166 	 * post is still pending, nothing further to do. We track completions
1167 	 * as indexes into the ring since post uses VAs and the H/W returns
1168 	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
1169 	 */
1170 	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
1171 	    DDI_DMA_SYNC_FORCPU);
1172 	last_cmpl = IOAT_CMPL_INDEX(channel);
1173 	if (last_cmpl != ring->cr_cmpl_last) {
1174 		/*
1175 		 * if we wrapped the ring, increment the generation. Store
1176 		 * the last cmpl. This logic assumes a physically contiguous
1177 		 * ring.
1178 		 */
1179 		if (last_cmpl < ring->cr_cmpl_last) {
1180 			ring->cr_cmpl_gen++;
1181 		}
1182 		ring->cr_cmpl_last = last_cmpl;
1183 		generation = ring->cr_cmpl_gen;
1184 
1185 	} else {
1186 		generation = ring->cr_cmpl_gen;
1187 	}
1188 
1189 	mutex_exit(&ring->cr_cmpl_mutex);
1190 
1191 	/*
1192 	 * if cmd isn't passed in, well return.  Useful for updating the
1193 	 * consumer pointer (ring->cr_cmpl_last).
1194 	 */
1195 	if (cmd == NULL) {
1196 		return (DCOPY_PENDING);
1197 	}
1198 
1199 	/*
1200 	 * if the post's generation is old, this post has completed. No reason
1201 	 * to go check the last completion. if the generation is the same
1202 	 * and if the post is before or = to the last completion processed,
1203 	 * the post has completed.
1204 	 */
1205 	if (priv->ip_generation < generation) {
1206 		return (DCOPY_COMPLETED);
1207 	} else if ((priv->ip_generation == generation) &&
1208 	    (priv->ip_index <= last_cmpl)) {
1209 		return (DCOPY_COMPLETED);
1210 	}
1211 
1212 	return (DCOPY_PENDING);
1213 }
1214 
1215 
1216 /*
1217  * ioat_ring_reserve()
1218  */
1219 int
1220 ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
1221     dcopy_cmd_t cmd)
1222 {
1223 	uint64_t dest_addr;
1224 	uint32_t dest_size;
1225 	uint64_t src_addr;
1226 	uint32_t src_size;
1227 	size_t xfer_size;
1228 	uint64_t desc;
1229 	int num_desc;
1230 	size_t size;
1231 	int i;
1232 
1233 
1234 	/*
1235 	 * figure out how many descriptors we need. This can include a dca
1236 	 * desc and multiple desc for a dma copy.
1237 	 */
1238 	num_desc = 0;
1239 	if ((channel->ic_ver == IOAT_CBv2) &&
1240 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
1241 		num_desc++;
1242 	}
1243 	src_addr = cmd->dp.copy.cc_source;
1244 	dest_addr = cmd->dp.copy.cc_dest;
1245 	size = cmd->dp.copy.cc_size;
1246 	while (size > 0) {
1247 		num_desc++;
1248 
1249 		/* adjust for any offset into the page */
1250 		if ((src_addr & PAGEOFFSET) == 0) {
1251 			src_size = PAGESIZE;
1252 		} else {
1253 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
1254 		}
1255 		if ((dest_addr & PAGEOFFSET) == 0) {
1256 			dest_size = PAGESIZE;
1257 		} else {
1258 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
1259 		}
1260 
1261 		/* take the smallest of the three */
1262 		xfer_size = MIN(src_size, dest_size);
1263 		xfer_size = MIN(xfer_size, size);
1264 
1265 		/* go to the next page */
1266 		src_addr += xfer_size;
1267 		dest_addr += xfer_size;
1268 		size -= xfer_size;
1269 	}
1270 
1271 	/* Make sure we have space for these descriptors */
1272 	desc = ring->cr_desc_next;
1273 	for (i = 0; i < num_desc; i++) {
1274 
1275 		/*
1276 		 * if this is the last descriptor in the ring, see if the
1277 		 * last completed descriptor is #0.
1278 		 */
1279 		if (desc == ring->cr_desc_last) {
1280 			if (ring->cr_cmpl_last == 0) {
1281 				/*
1282 				 * if we think the ring is full, update where
1283 				 * the H/W really is and check for full again.
1284 				 */
1285 				(void) ioat_cmd_poll(channel, NULL);
1286 				if (ring->cr_cmpl_last == 0) {
1287 					return (DCOPY_NORESOURCES);
1288 				}
1289 			}
1290 
1291 			/*
1292 			 * go to the next descriptor which is zero in this
1293 			 * case.
1294 			 */
1295 			desc = 0;
1296 
1297 		/*
1298 		 * if this is not the last descriptor in the ring, see if
1299 		 * the last completion we saw was the next descriptor.
1300 		 */
1301 		} else {
1302 			if ((desc + 1) == ring->cr_cmpl_last) {
1303 				/*
1304 				 * if we think the ring is full, update where
1305 				 * the H/W really is and check for full again.
1306 				 */
1307 				(void) ioat_cmd_poll(channel, NULL);
1308 				if ((desc + 1) == ring->cr_cmpl_last) {
1309 					return (DCOPY_NORESOURCES);
1310 				}
1311 			}
1312 
1313 			/* go to the next descriptor */
1314 			desc++;
1315 		}
1316 	}
1317 
1318 	return (DCOPY_SUCCESS);
1319 }
1320