xref: /illumos-gate/usr/src/uts/i86pc/io/ioat/ioat_chan.c (revision fb2caebe9e38ee2e6e469d5136fb247faaa7299b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/errno.h>
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/kmem.h>
31 #include <sys/ddi.h>
32 #include <sys/stat.h>
33 #include <sys/sunddi.h>
34 #include <sys/file.h>
35 #include <sys/open.h>
36 #include <sys/modctl.h>
37 #include <sys/ddi_impldefs.h>
38 #include <sys/sysmacros.h>
39 #include <vm/hat.h>
40 #include <vm/as.h>
41 #include <sys/mach_mmu.h>
42 #ifdef __xpv
43 #include <sys/hypervisor.h>
44 #endif
45 
46 #include <sys/ioat.h>
47 
48 
49 extern ddi_device_acc_attr_t ioat_acc_attr;
50 
51 /* dma attr for the descriptor rings */
52 ddi_dma_attr_t ioat_desc_dma_attr = {
53 	DMA_ATTR_V0,		/* dma_attr_version */
54 	0x0,			/* dma_attr_addr_lo */
55 	0xffffffffffffffff,	/* dma_attr_addr_hi */
56 	0xffffffff,		/* dma_attr_count_max */
57 	0x1000,			/* dma_attr_align */
58 	0x1,			/* dma_attr_burstsizes */
59 	0x1,			/* dma_attr_minxfer */
60 	0xffffffff,		/* dma_attr_maxxfer */
61 	0xffffffff,		/* dma_attr_seg */
62 	0x1,			/* dma_attr_sgllen */
63 	0x1,			/* dma_attr_granular */
64 	0x0,			/* dma_attr_flags */
65 };
66 
67 /* dma attr for the completion buffers */
68 ddi_dma_attr_t ioat_cmpl_dma_attr = {
69 	DMA_ATTR_V0,		/* dma_attr_version */
70 	0x0,			/* dma_attr_addr_lo */
71 	0xffffffffffffffff,	/* dma_attr_addr_hi */
72 	0xffffffff,		/* dma_attr_count_max */
73 	0x40,			/* dma_attr_align */
74 	0x1,			/* dma_attr_burstsizes */
75 	0x1,			/* dma_attr_minxfer */
76 	0xffffffff,		/* dma_attr_maxxfer */
77 	0xffffffff,		/* dma_attr_seg */
78 	0x1,			/* dma_attr_sgllen */
79 	0x1,			/* dma_attr_granular */
80 	0x0,			/* dma_attr_flags */
81 };
82 
83 static int ioat_completion_alloc(ioat_channel_t channel);
84 static void ioat_completion_free(ioat_channel_t channel);
85 static void ioat_channel_start(ioat_channel_t channel);
86 static void ioat_channel_reset(ioat_channel_t channel);
87 
88 int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
89 void ioat_ring_free(ioat_channel_t channel);
90 void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
91 int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
92     dcopy_cmd_t cmd);
93 
94 static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
95     uint64_t dest_addr, uint32_t size, uint32_t ctrl);
96 static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
97 
98 
99 /*
100  * ioat_channel_init()
101  */
102 int
103 ioat_channel_init(ioat_state_t *state)
104 {
105 	int i;
106 
107 	/*
108 	 * initialize each dma channel's state which doesn't change across
109 	 * channel alloc/free.
110 	 */
111 	state->is_chansize = sizeof (struct ioat_channel_s) *
112 	    state->is_num_channels;
113 	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
114 	for (i = 0; i < state->is_num_channels; i++) {
115 		state->is_channel[i].ic_state = state;
116 		state->is_channel[i].ic_regs = (uint8_t *)
117 		    ((uintptr_t)state->is_genregs +
118 		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
119 	}
120 
121 	/* initial the allocator (from 0 to state->is_num_channels) */
122 	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
123 
124 	return (DDI_SUCCESS);
125 }
126 
127 
128 /*
129  * ioat_channel_fini()
130  */
131 void
132 ioat_channel_fini(ioat_state_t *state)
133 {
134 	ioat_rs_fini(&state->is_channel_rs);
135 	kmem_free(state->is_channel, state->is_chansize);
136 }
137 
138 
139 /*
140  * ioat_channel_alloc()
141  *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
142  *	available)
143  */
144 /*ARGSUSED*/
145 int
146 ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
147     uint_t size, dcopy_query_channel_t *info, void *channel_private)
148 {
149 #define	CHANSTRSIZE	20
150 	struct ioat_channel_s *channel;
151 	char chanstr[CHANSTRSIZE];
152 	ioat_channel_t *chan;
153 	ioat_state_t *state;
154 	size_t cmd_size;
155 	uint_t chan_num;
156 	uint32_t estat;
157 	int e;
158 
159 
160 	state = (ioat_state_t *)device_private;
161 	chan = (ioat_channel_t *)channel_private;
162 
163 	/* allocate a H/W channel */
164 	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
165 	if (e != DDI_SUCCESS) {
166 		return (DCOPY_NORESOURCES);
167 	}
168 
169 	channel = &state->is_channel[chan_num];
170 	channel->ic_inuse = B_TRUE;
171 	channel->ic_chan_num = chan_num;
172 	channel->ic_ver = state->is_ver;
173 	channel->ic_dca_active = B_FALSE;
174 	channel->ic_channel_state = IOAT_CHANNEL_OK;
175 	channel->ic_dcopy_handle = handle;
176 
177 #ifdef	DEBUG
178 	{
179 		/* if we're cbv2, verify that the V2 compatibility bit is set */
180 		uint16_t reg;
181 		if (channel->ic_ver == IOAT_CBv2) {
182 			reg = ddi_get16(state->is_reg_handle,
183 			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
184 			ASSERT(reg & 0x2);
185 		}
186 	}
187 #endif
188 
189 	/*
190 	 * Configure DMA channel
191 	 *   Channel In Use
192 	 *   Error Interrupt Enable
193 	 *   Any Error Abort Enable
194 	 *   Error Completion Enable
195 	 */
196 	ddi_put16(state->is_reg_handle,
197 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
198 
199 	/* check channel error register, clear any errors */
200 	estat = ddi_get32(state->is_reg_handle,
201 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
202 	if (estat != 0) {
203 #ifdef	DEBUG
204 		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
205 		    "enable\n", estat, channel->ic_chan_num);
206 #endif
207 		ddi_put32(state->is_reg_handle,
208 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
209 	}
210 
211 	/* allocate and initialize the descriptor buf */
212 	e = ioat_ring_alloc(channel, size);
213 	if (e != DDI_SUCCESS) {
214 		goto chinitfail_desc_alloc;
215 	}
216 
217 	/* allocate and initialize the completion space */
218 	e = ioat_completion_alloc(channel);
219 	if (e != DDI_SUCCESS) {
220 		goto chinitfail_completion_alloc;
221 	}
222 
223 	/* setup kmem_cache for commands */
224 	cmd_size = sizeof (struct dcopy_cmd_s) +
225 	    sizeof (struct dcopy_cmd_priv_s) +
226 	    sizeof (struct ioat_cmd_private_s);
227 	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
228 	    state->is_instance, channel->ic_chan_num);
229 	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
230 	    NULL, NULL, NULL, NULL, NULL, 0);
231 	if (channel->ic_cmd_cache == NULL) {
232 		goto chinitfail_kmem_cache;
233 	}
234 
235 	/* start-up the channel */
236 	ioat_channel_start(channel);
237 
238 	/* fill in the channel info returned to dcopy */
239 	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
240 	info->qc_id = state->is_deviceinfo.di_id;
241 	info->qc_capabilities = (uint64_t)state->is_capabilities;
242 	info->qc_channel_size = (uint64_t)size;
243 	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
244 	if (channel->ic_ver == IOAT_CBv1) {
245 		info->qc_dca_supported = B_FALSE;
246 	} else {
247 		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
248 			info->qc_dca_supported = B_TRUE;
249 		} else {
250 			info->qc_dca_supported = B_FALSE;
251 		}
252 	}
253 
254 	*chan = channel;
255 
256 	return (DCOPY_SUCCESS);
257 
258 chinitfail_kmem_cache:
259 	ioat_completion_free(channel);
260 chinitfail_completion_alloc:
261 	ioat_ring_free(channel);
262 chinitfail_desc_alloc:
263 	return (DCOPY_FAILURE);
264 }
265 
266 
267 /*
268  * ioat_channel_suspend()
269  */
270 /*ARGSUSED*/
271 void
272 ioat_channel_suspend(ioat_state_t *state)
273 {
274 	/*
275 	 * normally you would disable interrupts and reset the H/W here. But
276 	 * since the suspend framework doesn't know who is using us, it may
277 	 * not suspend their I/O before us.  Since we won't actively be doing
278 	 * any DMA or interrupts unless someone asks us to, it's safe to not
279 	 * do anything here.
280 	 */
281 }
282 
283 
284 /*
285  * ioat_channel_resume()
286  */
287 int
288 ioat_channel_resume(ioat_state_t *state)
289 {
290 	ioat_channel_ring_t *ring;
291 	ioat_channel_t channel;
292 	uint32_t estat;
293 	int i;
294 
295 
296 	for (i = 0; i < state->is_num_channels; i++) {
297 		channel = &state->is_channel[i];
298 		ring = channel->ic_ring;
299 
300 		if (!channel->ic_inuse) {
301 			continue;
302 		}
303 
304 		/*
305 		 * Configure DMA channel
306 		 *   Channel In Use
307 		 *   Error Interrupt Enable
308 		 *   Any Error Abort Enable
309 		 *   Error Completion Enable
310 		 */
311 		ddi_put16(state->is_reg_handle,
312 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
313 
314 		/* check channel error register, clear any errors */
315 		estat = ddi_get32(state->is_reg_handle,
316 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
317 		if (estat != 0) {
318 #ifdef	DEBUG
319 			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
320 			    " (%d) enable\n", estat, channel->ic_chan_num);
321 #endif
322 			ddi_put32(state->is_reg_handle,
323 			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
324 			    estat);
325 		}
326 
327 		/* Re-initialize the ring */
328 		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
329 		/* write the physical address into the chain address register */
330 		if (channel->ic_ver == IOAT_CBv1) {
331 			ddi_put32(state->is_reg_handle,
332 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
333 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
334 			ddi_put32(state->is_reg_handle,
335 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
336 			    (uint32_t)(ring->cr_phys_desc >> 32));
337 		} else {
338 			ASSERT(channel->ic_ver == IOAT_CBv2);
339 			ddi_put32(state->is_reg_handle,
340 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
341 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
342 			ddi_put32(state->is_reg_handle,
343 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
344 			    (uint32_t)(ring->cr_phys_desc >> 32));
345 		}
346 
347 		/* re-initialize the completion buffer */
348 		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
349 		/* write the phys addr into the completion address register */
350 		ddi_put32(state->is_reg_handle,
351 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
352 		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
353 		ddi_put32(state->is_reg_handle,
354 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
355 		    (uint32_t)(channel->ic_phys_cmpl >> 32));
356 
357 		/* start-up the channel */
358 		ioat_channel_start(channel);
359 
360 	}
361 
362 	return (DDI_SUCCESS);
363 }
364 
365 /*
366  * quiesce(9E) entry point.
367  *
368  * This function is called when the system is single-threaded at high
369  * PIL with preemption disabled. Therefore, this function must not be
370  * blocked.
371  *
372  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
373  * DDI_FAILURE indicates an error condition and should almost never happen.
374  */
375 void
376 ioat_channel_quiesce(ioat_state_t *state)
377 {
378 	int i;
379 
380 	/*
381 	 * Walk through all channels and quiesce
382 	 */
383 	for (i = 0; i < state->is_num_channels; i++) {
384 
385 		ioat_channel_t	channel = state->is_channel + i;
386 
387 		if (!channel->ic_inuse)
388 			continue;
389 
390 		/* disable the interrupts */
391 		ddi_put16(state->is_reg_handle,
392 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL],
393 		    0x0);
394 
395 		ioat_channel_reset(channel);
396 	}
397 }
398 
399 
400 /*
401  * ioat_channel_free()
402  */
403 void
404 ioat_channel_free(void *channel_private)
405 {
406 	struct ioat_channel_s *channel;
407 	ioat_channel_t *chan;
408 	ioat_state_t *state;
409 	uint_t chan_num;
410 
411 
412 	chan = (ioat_channel_t *)channel_private;
413 	channel = *chan;
414 
415 	state = channel->ic_state;
416 	chan_num = channel->ic_chan_num;
417 
418 	/* disable the interrupts */
419 	ddi_put16(state->is_reg_handle,
420 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
421 
422 	ioat_channel_reset(channel);
423 
424 	/* cleanup command cache */
425 	kmem_cache_destroy(channel->ic_cmd_cache);
426 
427 	/* clean-up/free-up the completion space and descriptors */
428 	ioat_completion_free(channel);
429 	ioat_ring_free(channel);
430 
431 	channel->ic_inuse = B_FALSE;
432 
433 	/* free the H/W DMA engine */
434 	ioat_rs_free(state->is_channel_rs, chan_num);
435 
436 	*chan = NULL;
437 }
438 
439 
440 /*
441  * ioat_channel_intr()
442  */
443 void
444 ioat_channel_intr(ioat_channel_t channel)
445 {
446 	ioat_state_t *state;
447 	uint16_t chanctrl;
448 	uint32_t chanerr;
449 	uint32_t status;
450 
451 
452 	state = channel->ic_state;
453 
454 	if (channel->ic_ver == IOAT_CBv1) {
455 		status = ddi_get32(state->is_reg_handle,
456 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
457 	} else {
458 		ASSERT(channel->ic_ver == IOAT_CBv2);
459 		status = ddi_get32(state->is_reg_handle,
460 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
461 	}
462 
463 	/* if that status isn't ACTIVE or IDLE, the channel has failed */
464 	if (status & IOAT_CHAN_STS_FAIL_MASK) {
465 		chanerr = ddi_get32(state->is_reg_handle,
466 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
467 		cmn_err(CE_WARN, "channel(%d) fatal failure! "
468 		    "chanstat_lo=0x%X; chanerr=0x%X\n",
469 		    channel->ic_chan_num, status, chanerr);
470 		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
471 		ioat_channel_reset(channel);
472 
473 		return;
474 	}
475 
476 	/*
477 	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
478 	 * ensure the write completes.
479 	 */
480 	chanctrl = ddi_get16(state->is_reg_handle,
481 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
482 	ddi_put16(state->is_reg_handle,
483 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
484 	(void) ddi_get16(state->is_reg_handle,
485 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
486 
487 	/* tell dcopy we have seen a completion on this channel */
488 	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
489 }
490 
491 
492 /*
493  * ioat_channel_start()
494  */
495 void
496 ioat_channel_start(ioat_channel_t channel)
497 {
498 	ioat_chan_dma_desc_t desc;
499 
500 	/* set the first descriptor up as a NULL descriptor */
501 	bzero(&desc, sizeof (desc));
502 	desc.dd_size = 0;
503 	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
504 	    IOAT_DESC_CTRL_CMPL;
505 	desc.dd_next_desc = 0x0;
506 
507 	/* setup the very first descriptor */
508 	ioat_ring_seed(channel, &desc);
509 }
510 
511 
512 /*
513  * ioat_channel_reset()
514  */
515 void
516 ioat_channel_reset(ioat_channel_t channel)
517 {
518 	ioat_state_t *state;
519 
520 	state = channel->ic_state;
521 
522 	/* hit the reset bit */
523 	if (channel->ic_ver == IOAT_CBv1) {
524 		ddi_put8(state->is_reg_handle,
525 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
526 	} else {
527 		ASSERT(channel->ic_ver == IOAT_CBv2);
528 		ddi_put8(state->is_reg_handle,
529 		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
530 	}
531 }
532 
533 
534 /*
535  * ioat_completion_alloc()
536  */
537 int
538 ioat_completion_alloc(ioat_channel_t channel)
539 {
540 	ioat_state_t *state;
541 	size_t real_length;
542 	uint_t cookie_cnt;
543 	int e;
544 
545 
546 	state = channel->ic_state;
547 
548 	/*
549 	 * allocate memory for the completion status, zero it out, and get
550 	 * the paddr. We'll allocate a physically contiguous cache line.
551 	 */
552 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
553 	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
554 	if (e != DDI_SUCCESS) {
555 		goto cmplallocfail_alloc_handle;
556 	}
557 	channel->ic_cmpl_alloc_size = 64;
558 	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
559 	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
560 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
561 	    (caddr_t *)&channel->ic_cmpl, &real_length,
562 	    &channel->ic_cmpl_handle);
563 	if (e != DDI_SUCCESS) {
564 		goto cmplallocfail_mem_alloc;
565 	}
566 	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
567 	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
568 	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
569 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
570 	    &channel->ic_cmpl_cookie, &cookie_cnt);
571 	if (e != DDI_SUCCESS) {
572 		goto cmplallocfail_addr_bind;
573 	}
574 	ASSERT(cookie_cnt == 1);
575 	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
576 	    channel->ic_cmpl_alloc_size);
577 	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
578 
579 	/* write the physical address into the completion address register */
580 	ddi_put32(state->is_reg_handle,
581 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
582 	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
583 	ddi_put32(state->is_reg_handle,
584 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
585 	    (uint32_t)(channel->ic_phys_cmpl >> 32));
586 
587 	return (DDI_SUCCESS);
588 
589 cmplallocfail_addr_bind:
590 	ddi_dma_mem_free(&channel->ic_desc_handle);
591 cmplallocfail_mem_alloc:
592 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
593 cmplallocfail_alloc_handle:
594 	return (DDI_FAILURE);
595 }
596 
597 
598 /*
599  * ioat_completion_free()
600  */
601 void
602 ioat_completion_free(ioat_channel_t channel)
603 {
604 	ioat_state_t *state;
605 
606 	state = channel->ic_state;
607 
608 	/* reset the completion address register */
609 	ddi_put32(state->is_reg_handle,
610 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
611 	ddi_put32(state->is_reg_handle,
612 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
613 
614 	/* unbind, then free up the memory, dma handle */
615 	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
616 	ddi_dma_mem_free(&channel->ic_cmpl_handle);
617 	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
618 }
619 
620 /*
621  * ioat_ring_alloc()
622  */
623 int
624 ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
625 {
626 	ioat_channel_ring_t *ring;
627 	ioat_state_t *state;
628 	size_t real_length;
629 	uint_t cookie_cnt;
630 	int e;
631 
632 
633 	state = channel->ic_state;
634 
635 	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
636 	channel->ic_ring = ring;
637 	ring->cr_chan = channel;
638 	ring->cr_post_cnt = 0;
639 
640 	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
641 	    channel->ic_state->is_iblock_cookie);
642 	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
643 	    channel->ic_state->is_iblock_cookie);
644 
645 	/*
646 	 * allocate memory for the ring, zero it out, and get the paddr.
647 	 * We'll allocate a physically contiguous chunck of memory  which
648 	 * simplifies the completion logic.
649 	 */
650 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
651 	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
652 	if (e != DDI_SUCCESS) {
653 		goto ringallocfail_alloc_handle;
654 	}
655 	/*
656 	 * allocate one extra descriptor so we can simplify the empty/full
657 	 * logic. Then round that number up to a whole multiple of 4.
658 	 */
659 	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
660 	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
661 	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
662 	    sizeof (ioat_chan_desc_t);
663 	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
664 	    channel->ic_desc_alloc_size, &ioat_acc_attr,
665 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
666 	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
667 	if (e != DDI_SUCCESS) {
668 		goto ringallocfail_mem_alloc;
669 	}
670 	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
671 	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
672 	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
673 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
674 	    &channel->ic_desc_cookies, &cookie_cnt);
675 	if (e != DDI_SUCCESS) {
676 		goto ringallocfail_addr_bind;
677 	}
678 	ASSERT(cookie_cnt == 1);
679 	ASSERT(channel->ic_desc_cookies.dmac_size ==
680 	    channel->ic_desc_alloc_size);
681 	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
682 
683 	/* write the physical address into the chain address register */
684 	if (channel->ic_ver == IOAT_CBv1) {
685 		ddi_put32(state->is_reg_handle,
686 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
687 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
688 		ddi_put32(state->is_reg_handle,
689 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
690 		    (uint32_t)(ring->cr_phys_desc >> 32));
691 	} else {
692 		ASSERT(channel->ic_ver == IOAT_CBv2);
693 		ddi_put32(state->is_reg_handle,
694 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
695 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
696 		ddi_put32(state->is_reg_handle,
697 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
698 		    (uint32_t)(ring->cr_phys_desc >> 32));
699 	}
700 
701 	return (DCOPY_SUCCESS);
702 
703 ringallocfail_addr_bind:
704 	ddi_dma_mem_free(&channel->ic_desc_handle);
705 ringallocfail_mem_alloc:
706 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
707 ringallocfail_alloc_handle:
708 	mutex_destroy(&ring->cr_desc_mutex);
709 	mutex_destroy(&ring->cr_cmpl_mutex);
710 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
711 
712 	return (DCOPY_FAILURE);
713 }
714 
715 
716 /*
717  * ioat_ring_free()
718  */
719 void
720 ioat_ring_free(ioat_channel_t channel)
721 {
722 	ioat_state_t *state;
723 
724 
725 	state = channel->ic_state;
726 
727 	/* reset the chain address register */
728 	if (channel->ic_ver == IOAT_CBv1) {
729 		ddi_put32(state->is_reg_handle,
730 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
731 		ddi_put32(state->is_reg_handle,
732 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
733 	} else {
734 		ASSERT(channel->ic_ver == IOAT_CBv2);
735 		ddi_put32(state->is_reg_handle,
736 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
737 		ddi_put32(state->is_reg_handle,
738 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
739 	}
740 
741 	/* unbind, then free up the memory, dma handle */
742 	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
743 	ddi_dma_mem_free(&channel->ic_desc_handle);
744 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
745 
746 	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
747 	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
748 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
749 
750 }
751 
752 
753 /*
754  * ioat_ring_seed()
755  *    write the first descriptor in the ring.
756  */
757 void
758 ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
759 {
760 	ioat_channel_ring_t *ring;
761 	ioat_chan_dma_desc_t *desc;
762 	ioat_chan_dma_desc_t *prev;
763 	ioat_state_t *state;
764 
765 
766 	state = channel->ic_state;
767 	ring = channel->ic_ring;
768 
769 	/* init the completion state */
770 	ring->cr_cmpl_gen = 0x0;
771 	ring->cr_cmpl_last = 0x0;
772 
773 	/* write in the descriptor and init the descriptor state */
774 	ring->cr_post_cnt++;
775 	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
776 	ring->cr_desc_gen = 0;
777 	ring->cr_desc_prev = 0;
778 	ring->cr_desc_next = 1;
779 
780 	if (channel->ic_ver == IOAT_CBv1) {
781 		/* hit the start bit */
782 		ddi_put8(state->is_reg_handle,
783 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
784 	} else {
785 		/*
786 		 * if this is CBv2, link the descriptor to an empty
787 		 * descriptor
788 		 */
789 		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
790 		desc = (ioat_chan_dma_desc_t *)
791 		    &ring->cr_desc[ring->cr_desc_next];
792 		prev = (ioat_chan_dma_desc_t *)
793 		    &ring->cr_desc[ring->cr_desc_prev];
794 
795 		desc->dd_ctrl = 0;
796 		desc->dd_next_desc = 0x0;
797 
798 		prev->dd_next_desc = ring->cr_phys_desc +
799 		    (ring->cr_desc_next << 6);
800 
801 		ddi_put16(state->is_reg_handle,
802 		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
803 		    (uint16_t)1);
804 	}
805 
806 }
807 
808 
809 /*
810  * ioat_cmd_alloc()
811  */
812 int
813 ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
814 {
815 	ioat_cmd_private_t *priv;
816 	ioat_channel_t channel;
817 	dcopy_cmd_t oldcmd;
818 	int kmflag;
819 
820 
821 	channel = (ioat_channel_t)private;
822 
823 	if (flags & DCOPY_NOSLEEP) {
824 		kmflag = KM_NOSLEEP;
825 	} else {
826 		kmflag = KM_SLEEP;
827 	}
828 
829 	/* save the command passed incase DCOPY_ALLOC_LINK is set */
830 	oldcmd = *cmd;
831 
832 	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
833 	if (*cmd == NULL) {
834 		return (DCOPY_NORESOURCES);
835 	}
836 
837 	/* setup the dcopy and ioat private state pointers */
838 	(*cmd)->dp_version = DCOPY_CMD_V0;
839 	(*cmd)->dp_cmd = 0;
840 	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
841 	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
842 	(*cmd)->dp_private->pr_device_cmd_private =
843 	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
844 	    sizeof (struct dcopy_cmd_priv_s));
845 
846 	/*
847 	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
848 	 * just allocated.
849 	 */
850 	priv = (*cmd)->dp_private->pr_device_cmd_private;
851 	if (flags & DCOPY_ALLOC_LINK) {
852 		priv->ip_next = oldcmd;
853 	} else {
854 		priv->ip_next = NULL;
855 	}
856 
857 	return (DCOPY_SUCCESS);
858 }
859 
860 
861 /*
862  * ioat_cmd_free()
863  */
864 void
865 ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
866 {
867 	ioat_cmd_private_t *priv;
868 	ioat_channel_t channel;
869 	dcopy_cmd_t next;
870 	dcopy_cmd_t cmd;
871 
872 
873 	channel = (ioat_channel_t)private;
874 	cmd = *(cmdp);
875 
876 	/*
877 	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
878 	 * ioat_cmd_alloc() for more info).
879 	 */
880 	while (cmd != NULL) {
881 		priv = cmd->dp_private->pr_device_cmd_private;
882 		next = priv->ip_next;
883 		kmem_cache_free(channel->ic_cmd_cache, cmd);
884 		cmd = next;
885 	}
886 	*cmdp = NULL;
887 }
888 
889 
890 /*
891  * ioat_cmd_post()
892  */
893 int
894 ioat_cmd_post(void *private, dcopy_cmd_t cmd)
895 {
896 	ioat_channel_ring_t *ring;
897 	ioat_cmd_private_t *priv;
898 	ioat_channel_t channel;
899 	ioat_state_t *state;
900 	uint64_t dest_paddr;
901 	uint64_t src_paddr;
902 	uint64_t dest_addr;
903 	uint32_t dest_size;
904 	uint64_t src_addr;
905 	uint32_t src_size;
906 	size_t xfer_size;
907 	uint32_t ctrl;
908 	size_t size;
909 	int e;
910 
911 
912 	channel = (ioat_channel_t)private;
913 	priv = cmd->dp_private->pr_device_cmd_private;
914 
915 	state = channel->ic_state;
916 	ring = channel->ic_ring;
917 
918 	mutex_enter(&ring->cr_desc_mutex);
919 
920 	/* if the channel has had a fatal failure, return failure */
921 	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
922 		mutex_exit(&ring->cr_desc_mutex);
923 		return (DCOPY_FAILURE);
924 	}
925 
926 	/* make sure we have space for the descriptors */
927 	e = ioat_ring_reserve(channel, ring, cmd);
928 	if (e != DCOPY_SUCCESS) {
929 		mutex_exit(&ring->cr_desc_mutex);
930 		return (DCOPY_NORESOURCES);
931 	}
932 
933 	/* if we support DCA, and the DCA flag is set, post a DCA desc */
934 	if ((channel->ic_ver == IOAT_CBv2) &&
935 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
936 		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
937 	}
938 
939 	/*
940 	 * the dma copy may have to be broken up into multiple descriptors
941 	 * since we can't cross a page boundary.
942 	 */
943 	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
944 	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
945 	src_addr = cmd->dp.copy.cc_source;
946 	dest_addr = cmd->dp.copy.cc_dest;
947 	size = cmd->dp.copy.cc_size;
948 	while (size > 0) {
949 		src_paddr = pa_to_ma(src_addr);
950 		dest_paddr = pa_to_ma(dest_addr);
951 
952 		/* adjust for any offset into the page */
953 		if ((src_addr & PAGEOFFSET) == 0) {
954 			src_size = PAGESIZE;
955 		} else {
956 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
957 		}
958 		if ((dest_addr & PAGEOFFSET) == 0) {
959 			dest_size = PAGESIZE;
960 		} else {
961 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
962 		}
963 
964 		/* take the smallest of the three */
965 		xfer_size = MIN(src_size, dest_size);
966 		xfer_size = MIN(xfer_size, size);
967 
968 		/*
969 		 * if this is the last descriptor, and we are supposed to
970 		 * generate a completion, generate a completion. same logic
971 		 * for interrupt.
972 		 */
973 		ctrl = 0;
974 		if (xfer_size == size) {
975 			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
976 				ctrl |= IOAT_DESC_CTRL_CMPL;
977 			}
978 			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
979 				ctrl |= IOAT_DESC_CTRL_INTR;
980 			}
981 		}
982 
983 		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
984 		    ctrl);
985 
986 		/* go to the next page */
987 		src_addr += xfer_size;
988 		dest_addr += xfer_size;
989 		size -= xfer_size;
990 	}
991 
992 	/*
993 	 * if we are going to create a completion, save away the state so we
994 	 * can poll on it.
995 	 */
996 	if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
997 		priv->ip_generation = ring->cr_desc_gen_prev;
998 		priv->ip_index = ring->cr_desc_prev;
999 	}
1000 
1001 	/* if queue not defined, tell the DMA engine about it */
1002 	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
1003 		if (channel->ic_ver == IOAT_CBv1) {
1004 			ddi_put8(state->is_reg_handle,
1005 			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
1006 			    0x2);
1007 		} else {
1008 			ASSERT(channel->ic_ver == IOAT_CBv2);
1009 			ddi_put16(state->is_reg_handle,
1010 			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
1011 			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
1012 		}
1013 	}
1014 
1015 	mutex_exit(&ring->cr_desc_mutex);
1016 
1017 	return (DCOPY_SUCCESS);
1018 }
1019 
1020 
1021 /*
1022  * ioat_cmd_post_dca()
1023  */
1024 static void
1025 ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
1026 {
1027 	ioat_chan_dca_desc_t *saved_prev;
1028 	ioat_chan_dca_desc_t *desc;
1029 	ioat_chan_dca_desc_t *prev;
1030 	ioat_channel_t channel;
1031 	uint64_t next_desc_phys;
1032 	off_t prev_offset;
1033 	off_t next_offset;
1034 
1035 
1036 	channel = ring->cr_chan;
1037 	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1038 	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1039 
1040 	/* keep track of the number of descs posted for cbv2 */
1041 	ring->cr_post_cnt++;
1042 
1043 	/*
1044 	 * post a context change desriptor. If dca has never been used on
1045 	 * this channel, or if the id doesn't match the last id used on this
1046 	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
1047 	 * and save away the id we're using.
1048 	 */
1049 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
1050 	desc->dd_next_desc = 0x0;
1051 	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
1052 		channel->ic_dca_active = B_TRUE;
1053 		channel->ic_dca_current = dca_id;
1054 		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
1055 		desc->dd_cntx = dca_id;
1056 	}
1057 
1058 	/*
1059 	 * save next desc and prev offset for when we link the two
1060 	 * descriptors together.
1061 	 */
1062 	saved_prev = prev;
1063 	prev_offset = ring->cr_desc_prev << 6;
1064 	next_offset = ring->cr_desc_next << 6;
1065 	next_desc_phys = ring->cr_phys_desc + next_offset;
1066 
1067 	/* save the current desc_next and desc_last for the completion */
1068 	ring->cr_desc_prev = ring->cr_desc_next;
1069 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1070 
1071 	/* increment next/gen so it points to the next free desc */
1072 	ring->cr_desc_next++;
1073 	if (ring->cr_desc_next > ring->cr_desc_last) {
1074 		ring->cr_desc_next = 0;
1075 		ring->cr_desc_gen++;
1076 	}
1077 
1078 	/*
1079 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1080 	 * we always leave on desc empty to detect full, this works out.
1081 	 */
1082 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1083 		desc = (ioat_chan_dca_desc_t *)
1084 		    &ring->cr_desc[ring->cr_desc_next];
1085 		prev = (ioat_chan_dca_desc_t *)
1086 		    &ring->cr_desc[ring->cr_desc_prev];
1087 		desc->dd_ctrl = 0;
1088 		desc->dd_next_desc = 0x0;
1089 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1090 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1091 		prev->dd_next_desc = ring->cr_phys_desc +
1092 		    (ring->cr_desc_next << 6);
1093 	}
1094 
1095 	/* Put the descriptors physical address in the previous descriptor */
1096 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1097 	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
1098 
1099 	/* sync the current desc */
1100 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
1101 	    DDI_DMA_SYNC_FORDEV);
1102 
1103 	/* update the previous desc and sync it too */
1104 	saved_prev->dd_next_desc = next_desc_phys;
1105 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
1106 	    DDI_DMA_SYNC_FORDEV);
1107 }
1108 
1109 
1110 /*
1111  * ioat_cmd_post_copy()
1112  *
1113  */
1114 static void
1115 ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
1116     uint64_t dest_addr, uint32_t size, uint32_t ctrl)
1117 {
1118 	ioat_chan_dma_desc_t *saved_prev;
1119 	ioat_chan_dma_desc_t *desc;
1120 	ioat_chan_dma_desc_t *prev;
1121 	ioat_channel_t channel;
1122 	uint64_t next_desc_phy;
1123 	off_t prev_offset;
1124 	off_t next_offset;
1125 
1126 
1127 	channel = ring->cr_chan;
1128 	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1129 	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1130 
1131 	/* keep track of the number of descs posted for cbv2 */
1132 	ring->cr_post_cnt++;
1133 
1134 	/* write in the DMA desc */
1135 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
1136 	desc->dd_size = size;
1137 	desc->dd_src_paddr = src_addr;
1138 	desc->dd_dest_paddr = dest_addr;
1139 	desc->dd_next_desc = 0x0;
1140 
1141 	/*
1142 	 * save next desc and prev offset for when we link the two
1143 	 * descriptors together.
1144 	 */
1145 	saved_prev = prev;
1146 	prev_offset = ring->cr_desc_prev << 6;
1147 	next_offset = ring->cr_desc_next << 6;
1148 	next_desc_phy = ring->cr_phys_desc + next_offset;
1149 
1150 	/* increment next/gen so it points to the next free desc */
1151 	ring->cr_desc_prev = ring->cr_desc_next;
1152 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1153 
1154 	/* increment next/gen so it points to the next free desc */
1155 	ring->cr_desc_next++;
1156 	if (ring->cr_desc_next > ring->cr_desc_last) {
1157 		ring->cr_desc_next = 0;
1158 		ring->cr_desc_gen++;
1159 	}
1160 
1161 	/*
1162 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1163 	 * we always leave on desc empty to detect full, this works out.
1164 	 */
1165 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1166 		desc = (ioat_chan_dma_desc_t *)
1167 		    &ring->cr_desc[ring->cr_desc_next];
1168 		prev = (ioat_chan_dma_desc_t *)
1169 		    &ring->cr_desc[ring->cr_desc_prev];
1170 		desc->dd_size = 0;
1171 		desc->dd_ctrl = 0;
1172 		desc->dd_next_desc = 0x0;
1173 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1174 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1175 		prev->dd_next_desc = ring->cr_phys_desc +
1176 		    (ring->cr_desc_next << 6);
1177 	}
1178 
1179 	/* Put the descriptors physical address in the previous descriptor */
1180 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1181 	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
1182 
1183 	/* sync the current desc */
1184 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
1185 	    DDI_DMA_SYNC_FORDEV);
1186 
1187 	/* update the previous desc and sync it too */
1188 	saved_prev->dd_next_desc = next_desc_phy;
1189 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
1190 	    DDI_DMA_SYNC_FORDEV);
1191 }
1192 
1193 
1194 /*
1195  * ioat_cmd_poll()
1196  */
1197 int
1198 ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
1199 {
1200 	ioat_channel_ring_t *ring;
1201 	ioat_cmd_private_t *priv;
1202 	ioat_channel_t channel;
1203 	uint64_t generation;
1204 	uint64_t last_cmpl;
1205 
1206 
1207 	channel = (ioat_channel_t)private;
1208 	priv = cmd->dp_private->pr_device_cmd_private;
1209 
1210 	ring = channel->ic_ring;
1211 	ASSERT(ring != NULL);
1212 
1213 	mutex_enter(&ring->cr_cmpl_mutex);
1214 
1215 	/* if the channel had a fatal failure, fail all polls */
1216 	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
1217 	    IOAT_CMPL_FAILED(channel)) {
1218 		mutex_exit(&ring->cr_cmpl_mutex);
1219 		return (DCOPY_FAILURE);
1220 	}
1221 
1222 	/*
1223 	 * if the current completion is the same as the last time we read one,
1224 	 * post is still pending, nothing further to do. We track completions
1225 	 * as indexes into the ring since post uses VAs and the H/W returns
1226 	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
1227 	 */
1228 	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
1229 	    DDI_DMA_SYNC_FORCPU);
1230 	last_cmpl = IOAT_CMPL_INDEX(channel);
1231 	if (last_cmpl != ring->cr_cmpl_last) {
1232 		/*
1233 		 * if we wrapped the ring, increment the generation. Store
1234 		 * the last cmpl. This logic assumes a physically contiguous
1235 		 * ring.
1236 		 */
1237 		if (last_cmpl < ring->cr_cmpl_last) {
1238 			ring->cr_cmpl_gen++;
1239 		}
1240 		ring->cr_cmpl_last = last_cmpl;
1241 		generation = ring->cr_cmpl_gen;
1242 
1243 	} else {
1244 		generation = ring->cr_cmpl_gen;
1245 	}
1246 
1247 	mutex_exit(&ring->cr_cmpl_mutex);
1248 
1249 	/*
1250 	 * if cmd isn't passed in, well return.  Useful for updating the
1251 	 * consumer pointer (ring->cr_cmpl_last).
1252 	 */
1253 	if (cmd == NULL) {
1254 		return (DCOPY_PENDING);
1255 	}
1256 
1257 	/*
1258 	 * if the post's generation is old, this post has completed. No reason
1259 	 * to go check the last completion. if the generation is the same
1260 	 * and if the post is before or = to the last completion processed,
1261 	 * the post has completed.
1262 	 */
1263 	if (priv->ip_generation < generation) {
1264 		return (DCOPY_COMPLETED);
1265 	} else if ((priv->ip_generation == generation) &&
1266 	    (priv->ip_index <= last_cmpl)) {
1267 		return (DCOPY_COMPLETED);
1268 	}
1269 
1270 	return (DCOPY_PENDING);
1271 }
1272 
1273 
1274 /*
1275  * ioat_ring_reserve()
1276  */
1277 int
1278 ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
1279     dcopy_cmd_t cmd)
1280 {
1281 	uint64_t dest_addr;
1282 	uint32_t dest_size;
1283 	uint64_t src_addr;
1284 	uint32_t src_size;
1285 	size_t xfer_size;
1286 	uint64_t desc;
1287 	int num_desc;
1288 	size_t size;
1289 	int i;
1290 
1291 
1292 	/*
1293 	 * figure out how many descriptors we need. This can include a dca
1294 	 * desc and multiple desc for a dma copy.
1295 	 */
1296 	num_desc = 0;
1297 	if ((channel->ic_ver == IOAT_CBv2) &&
1298 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
1299 		num_desc++;
1300 	}
1301 	src_addr = cmd->dp.copy.cc_source;
1302 	dest_addr = cmd->dp.copy.cc_dest;
1303 	size = cmd->dp.copy.cc_size;
1304 	while (size > 0) {
1305 		num_desc++;
1306 
1307 		/* adjust for any offset into the page */
1308 		if ((src_addr & PAGEOFFSET) == 0) {
1309 			src_size = PAGESIZE;
1310 		} else {
1311 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
1312 		}
1313 		if ((dest_addr & PAGEOFFSET) == 0) {
1314 			dest_size = PAGESIZE;
1315 		} else {
1316 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
1317 		}
1318 
1319 		/* take the smallest of the three */
1320 		xfer_size = MIN(src_size, dest_size);
1321 		xfer_size = MIN(xfer_size, size);
1322 
1323 		/* go to the next page */
1324 		src_addr += xfer_size;
1325 		dest_addr += xfer_size;
1326 		size -= xfer_size;
1327 	}
1328 
1329 	/* Make sure we have space for these descriptors */
1330 	desc = ring->cr_desc_next;
1331 	for (i = 0; i < num_desc; i++) {
1332 
1333 		/*
1334 		 * if this is the last descriptor in the ring, see if the
1335 		 * last completed descriptor is #0.
1336 		 */
1337 		if (desc == ring->cr_desc_last) {
1338 			if (ring->cr_cmpl_last == 0) {
1339 				/*
1340 				 * if we think the ring is full, update where
1341 				 * the H/W really is and check for full again.
1342 				 */
1343 				(void) ioat_cmd_poll(channel, NULL);
1344 				if (ring->cr_cmpl_last == 0) {
1345 					return (DCOPY_NORESOURCES);
1346 				}
1347 			}
1348 
1349 			/*
1350 			 * go to the next descriptor which is zero in this
1351 			 * case.
1352 			 */
1353 			desc = 0;
1354 
1355 		/*
1356 		 * if this is not the last descriptor in the ring, see if
1357 		 * the last completion we saw was the next descriptor.
1358 		 */
1359 		} else {
1360 			if ((desc + 1) == ring->cr_cmpl_last) {
1361 				/*
1362 				 * if we think the ring is full, update where
1363 				 * the H/W really is and check for full again.
1364 				 */
1365 				(void) ioat_cmd_poll(channel, NULL);
1366 				if ((desc + 1) == ring->cr_cmpl_last) {
1367 					return (DCOPY_NORESOURCES);
1368 				}
1369 			}
1370 
1371 			/* go to the next descriptor */
1372 			desc++;
1373 		}
1374 	}
1375 
1376 	return (DCOPY_SUCCESS);
1377 }
1378