xref: /illumos-gate/usr/src/uts/i86pc/io/ioat/ioat_chan.c (revision fec047081731fd77caf46ec0471c501b2cb33894)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2009, Intel Corporation.
29  * All rights reserved.
30  */
31 
32 #include <sys/errno.h>
33 #include <sys/types.h>
34 #include <sys/conf.h>
35 #include <sys/kmem.h>
36 #include <sys/ddi.h>
37 #include <sys/stat.h>
38 #include <sys/sunddi.h>
39 #include <sys/file.h>
40 #include <sys/open.h>
41 #include <sys/modctl.h>
42 #include <sys/ddi_impldefs.h>
43 #include <sys/sysmacros.h>
44 #include <vm/hat.h>
45 #include <vm/as.h>
46 #include <sys/mach_mmu.h>
47 #ifdef __xpv
48 #include <sys/hypervisor.h>
49 #endif
50 
51 #include <sys/ioat.h>
52 
53 
54 extern ddi_device_acc_attr_t ioat_acc_attr;
55 
56 /* dma attr for the descriptor rings */
57 ddi_dma_attr_t ioat_desc_dma_attr = {
58 	DMA_ATTR_V0,		/* dma_attr_version */
59 	0x0,			/* dma_attr_addr_lo */
60 	0xffffffffffffffff,	/* dma_attr_addr_hi */
61 	0xffffffff,		/* dma_attr_count_max */
62 	0x1000,			/* dma_attr_align */
63 	0x1,			/* dma_attr_burstsizes */
64 	0x1,			/* dma_attr_minxfer */
65 	0xffffffff,		/* dma_attr_maxxfer */
66 	0xffffffff,		/* dma_attr_seg */
67 	0x1,			/* dma_attr_sgllen */
68 	0x1,			/* dma_attr_granular */
69 	0x0,			/* dma_attr_flags */
70 };
71 
72 /* dma attr for the completion buffers */
73 ddi_dma_attr_t ioat_cmpl_dma_attr = {
74 	DMA_ATTR_V0,		/* dma_attr_version */
75 	0x0,			/* dma_attr_addr_lo */
76 	0xffffffffffffffff,	/* dma_attr_addr_hi */
77 	0xffffffff,		/* dma_attr_count_max */
78 	0x40,			/* dma_attr_align */
79 	0x1,			/* dma_attr_burstsizes */
80 	0x1,			/* dma_attr_minxfer */
81 	0xffffffff,		/* dma_attr_maxxfer */
82 	0xffffffff,		/* dma_attr_seg */
83 	0x1,			/* dma_attr_sgllen */
84 	0x1,			/* dma_attr_granular */
85 	0x0,			/* dma_attr_flags */
86 };
87 
88 static int ioat_completion_alloc(ioat_channel_t channel);
89 static void ioat_completion_free(ioat_channel_t channel);
90 static void ioat_channel_start(ioat_channel_t channel);
91 static void ioat_channel_reset(ioat_channel_t channel);
92 
93 int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
94 void ioat_ring_free(ioat_channel_t channel);
95 void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
96 int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
97     dcopy_cmd_t cmd);
98 
99 static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
100     uint64_t dest_addr, uint32_t size, uint32_t ctrl);
101 static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
102 
103 
104 /*
105  * ioat_channel_init()
106  */
107 int
108 ioat_channel_init(ioat_state_t *state)
109 {
110 	int i;
111 
112 	/*
113 	 * initialize each dma channel's state which doesn't change across
114 	 * channel alloc/free.
115 	 */
116 	state->is_chansize = sizeof (struct ioat_channel_s) *
117 	    state->is_num_channels;
118 	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
119 	for (i = 0; i < state->is_num_channels; i++) {
120 		state->is_channel[i].ic_state = state;
121 		state->is_channel[i].ic_regs = (uint8_t *)
122 		    ((uintptr_t)state->is_genregs +
123 		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
124 	}
125 
126 	/* initial the allocator (from 0 to state->is_num_channels) */
127 	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
128 
129 	return (DDI_SUCCESS);
130 }
131 
132 
133 /*
134  * ioat_channel_fini()
135  */
136 void
137 ioat_channel_fini(ioat_state_t *state)
138 {
139 	ioat_rs_fini(&state->is_channel_rs);
140 	kmem_free(state->is_channel, state->is_chansize);
141 }
142 
143 
144 /*
145  * ioat_channel_alloc()
146  *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
147  *	available)
148  */
149 /*ARGSUSED*/
150 int
151 ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
152     uint_t size, dcopy_query_channel_t *info, void *channel_private)
153 {
154 #define	CHANSTRSIZE	20
155 	struct ioat_channel_s *channel;
156 	char chanstr[CHANSTRSIZE];
157 	ioat_channel_t *chan;
158 	ioat_state_t *state;
159 	size_t cmd_size;
160 	uint_t chan_num;
161 	uint32_t estat;
162 	int e;
163 
164 
165 	state = (ioat_state_t *)device_private;
166 	chan = (ioat_channel_t *)channel_private;
167 
168 	/* allocate a H/W channel */
169 	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
170 	if (e != DDI_SUCCESS) {
171 		return (DCOPY_NORESOURCES);
172 	}
173 
174 	channel = &state->is_channel[chan_num];
175 	channel->ic_inuse = B_TRUE;
176 	channel->ic_chan_num = chan_num;
177 	channel->ic_ver = state->is_ver;
178 	channel->ic_dca_active = B_FALSE;
179 	channel->ic_channel_state = IOAT_CHANNEL_OK;
180 	channel->ic_dcopy_handle = handle;
181 
182 #ifdef	DEBUG
183 	{
184 		/* if we're cbv2, verify that the V2 compatibility bit is set */
185 		uint16_t reg;
186 		if (channel->ic_ver == IOAT_CBv2) {
187 			reg = ddi_get16(state->is_reg_handle,
188 			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
189 			ASSERT(reg & 0x2);
190 		}
191 	}
192 #endif
193 
194 	/*
195 	 * Configure DMA channel
196 	 *   Channel In Use
197 	 *   Error Interrupt Enable
198 	 *   Any Error Abort Enable
199 	 *   Error Completion Enable
200 	 */
201 	ddi_put16(state->is_reg_handle,
202 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
203 
204 	/* check channel error register, clear any errors */
205 	estat = ddi_get32(state->is_reg_handle,
206 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
207 	if (estat != 0) {
208 #ifdef	DEBUG
209 		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
210 		    "enable\n", estat, channel->ic_chan_num);
211 #endif
212 		ddi_put32(state->is_reg_handle,
213 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
214 	}
215 
216 	/* allocate and initialize the descriptor buf */
217 	e = ioat_ring_alloc(channel, size);
218 	if (e != DDI_SUCCESS) {
219 		goto chinitfail_desc_alloc;
220 	}
221 
222 	/* allocate and initialize the completion space */
223 	e = ioat_completion_alloc(channel);
224 	if (e != DDI_SUCCESS) {
225 		goto chinitfail_completion_alloc;
226 	}
227 
228 	/* setup kmem_cache for commands */
229 	cmd_size = sizeof (struct dcopy_cmd_s) +
230 	    sizeof (struct dcopy_cmd_priv_s) +
231 	    sizeof (struct ioat_cmd_private_s);
232 	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
233 	    state->is_instance, channel->ic_chan_num);
234 	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
235 	    NULL, NULL, NULL, NULL, NULL, 0);
236 	if (channel->ic_cmd_cache == NULL) {
237 		goto chinitfail_kmem_cache;
238 	}
239 
240 	/* start-up the channel */
241 	ioat_channel_start(channel);
242 
243 	/* fill in the channel info returned to dcopy */
244 	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
245 	info->qc_id = state->is_deviceinfo.di_id;
246 	info->qc_capabilities = (uint64_t)state->is_capabilities;
247 	info->qc_channel_size = (uint64_t)size;
248 	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
249 	if (channel->ic_ver == IOAT_CBv1) {
250 		info->qc_dca_supported = B_FALSE;
251 	} else {
252 		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
253 			info->qc_dca_supported = B_TRUE;
254 		} else {
255 			info->qc_dca_supported = B_FALSE;
256 		}
257 	}
258 
259 	*chan = channel;
260 
261 	return (DCOPY_SUCCESS);
262 
263 chinitfail_kmem_cache:
264 	ioat_completion_free(channel);
265 chinitfail_completion_alloc:
266 	ioat_ring_free(channel);
267 chinitfail_desc_alloc:
268 	return (DCOPY_FAILURE);
269 }
270 
271 
272 /*
273  * ioat_channel_suspend()
274  */
275 /*ARGSUSED*/
276 void
277 ioat_channel_suspend(ioat_state_t *state)
278 {
279 	/*
280 	 * normally you would disable interrupts and reset the H/W here. But
281 	 * since the suspend framework doesn't know who is using us, it may
282 	 * not suspend their I/O before us.  Since we won't actively be doing
283 	 * any DMA or interrupts unless someone asks us to, it's safe to not
284 	 * do anything here.
285 	 */
286 }
287 
288 
289 /*
290  * ioat_channel_resume()
291  */
292 int
293 ioat_channel_resume(ioat_state_t *state)
294 {
295 	ioat_channel_ring_t *ring;
296 	ioat_channel_t channel;
297 	uint32_t estat;
298 	int i;
299 
300 
301 	for (i = 0; i < state->is_num_channels; i++) {
302 		channel = &state->is_channel[i];
303 		ring = channel->ic_ring;
304 
305 		if (!channel->ic_inuse) {
306 			continue;
307 		}
308 
309 		/*
310 		 * Configure DMA channel
311 		 *   Channel In Use
312 		 *   Error Interrupt Enable
313 		 *   Any Error Abort Enable
314 		 *   Error Completion Enable
315 		 */
316 		ddi_put16(state->is_reg_handle,
317 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
318 
319 		/* check channel error register, clear any errors */
320 		estat = ddi_get32(state->is_reg_handle,
321 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
322 		if (estat != 0) {
323 #ifdef	DEBUG
324 			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
325 			    " (%d) enable\n", estat, channel->ic_chan_num);
326 #endif
327 			ddi_put32(state->is_reg_handle,
328 			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
329 			    estat);
330 		}
331 
332 		/* Re-initialize the ring */
333 		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
334 		/* write the physical address into the chain address register */
335 		if (channel->ic_ver == IOAT_CBv1) {
336 			ddi_put32(state->is_reg_handle,
337 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
338 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
339 			ddi_put32(state->is_reg_handle,
340 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
341 			    (uint32_t)(ring->cr_phys_desc >> 32));
342 		} else {
343 			ASSERT(channel->ic_ver == IOAT_CBv2);
344 			ddi_put32(state->is_reg_handle,
345 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
346 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
347 			ddi_put32(state->is_reg_handle,
348 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
349 			    (uint32_t)(ring->cr_phys_desc >> 32));
350 		}
351 
352 		/* re-initialize the completion buffer */
353 		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
354 		/* write the phys addr into the completion address register */
355 		ddi_put32(state->is_reg_handle,
356 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
357 		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
358 		ddi_put32(state->is_reg_handle,
359 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
360 		    (uint32_t)(channel->ic_phys_cmpl >> 32));
361 
362 		/* start-up the channel */
363 		ioat_channel_start(channel);
364 
365 	}
366 
367 	return (DDI_SUCCESS);
368 }
369 
370 /*
371  * quiesce(9E) entry point.
372  *
373  * This function is called when the system is single-threaded at high
374  * PIL with preemption disabled. Therefore, this function must not be
375  * blocked.
376  *
377  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
378  * DDI_FAILURE indicates an error condition and should almost never happen.
379  */
380 void
381 ioat_channel_quiesce(ioat_state_t *state)
382 {
383 	int i;
384 
385 	/*
386 	 * Walk through all channels and quiesce
387 	 */
388 	for (i = 0; i < state->is_num_channels; i++) {
389 
390 		ioat_channel_t	channel = state->is_channel + i;
391 
392 		if (!channel->ic_inuse)
393 			continue;
394 
395 		/* disable the interrupts */
396 		ddi_put16(state->is_reg_handle,
397 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL],
398 		    0x0);
399 
400 		ioat_channel_reset(channel);
401 	}
402 }
403 
404 
405 /*
406  * ioat_channel_free()
407  */
408 void
409 ioat_channel_free(void *channel_private)
410 {
411 	struct ioat_channel_s *channel;
412 	ioat_channel_t *chan;
413 	ioat_state_t *state;
414 	uint_t chan_num;
415 
416 
417 	chan = (ioat_channel_t *)channel_private;
418 	channel = *chan;
419 
420 	state = channel->ic_state;
421 	chan_num = channel->ic_chan_num;
422 
423 	/* disable the interrupts */
424 	ddi_put16(state->is_reg_handle,
425 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
426 
427 	ioat_channel_reset(channel);
428 
429 	/* cleanup command cache */
430 	kmem_cache_destroy(channel->ic_cmd_cache);
431 
432 	/* clean-up/free-up the completion space and descriptors */
433 	ioat_completion_free(channel);
434 	ioat_ring_free(channel);
435 
436 	channel->ic_inuse = B_FALSE;
437 
438 	/* free the H/W DMA engine */
439 	ioat_rs_free(state->is_channel_rs, chan_num);
440 
441 	*chan = NULL;
442 }
443 
444 
445 /*
446  * ioat_channel_intr()
447  */
448 void
449 ioat_channel_intr(ioat_channel_t channel)
450 {
451 	ioat_state_t *state;
452 	uint16_t chanctrl;
453 	uint32_t chanerr;
454 	uint32_t status;
455 
456 
457 	state = channel->ic_state;
458 
459 	if (channel->ic_ver == IOAT_CBv1) {
460 		status = ddi_get32(state->is_reg_handle,
461 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
462 	} else {
463 		ASSERT(channel->ic_ver == IOAT_CBv2);
464 		status = ddi_get32(state->is_reg_handle,
465 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
466 	}
467 
468 	/* if that status isn't ACTIVE or IDLE, the channel has failed */
469 	if (status & IOAT_CHAN_STS_FAIL_MASK) {
470 		chanerr = ddi_get32(state->is_reg_handle,
471 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
472 		cmn_err(CE_WARN, "channel(%d) fatal failure! "
473 		    "chanstat_lo=0x%X; chanerr=0x%X\n",
474 		    channel->ic_chan_num, status, chanerr);
475 		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
476 		ioat_channel_reset(channel);
477 
478 		return;
479 	}
480 
481 	/*
482 	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
483 	 * ensure the write completes.
484 	 */
485 	chanctrl = ddi_get16(state->is_reg_handle,
486 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
487 	ddi_put16(state->is_reg_handle,
488 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
489 	(void) ddi_get16(state->is_reg_handle,
490 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
491 
492 	/* tell dcopy we have seen a completion on this channel */
493 	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
494 }
495 
496 
497 /*
498  * ioat_channel_start()
499  */
500 void
501 ioat_channel_start(ioat_channel_t channel)
502 {
503 	ioat_chan_dma_desc_t desc;
504 
505 	/* set the first descriptor up as a NULL descriptor */
506 	bzero(&desc, sizeof (desc));
507 	desc.dd_size = 0;
508 	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
509 	    IOAT_DESC_CTRL_CMPL;
510 	desc.dd_next_desc = 0x0;
511 
512 	/* setup the very first descriptor */
513 	ioat_ring_seed(channel, &desc);
514 }
515 
516 
517 /*
518  * ioat_channel_reset()
519  */
520 void
521 ioat_channel_reset(ioat_channel_t channel)
522 {
523 	ioat_state_t *state;
524 
525 	state = channel->ic_state;
526 
527 	/* hit the reset bit */
528 	if (channel->ic_ver == IOAT_CBv1) {
529 		ddi_put8(state->is_reg_handle,
530 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
531 	} else {
532 		ASSERT(channel->ic_ver == IOAT_CBv2);
533 		ddi_put8(state->is_reg_handle,
534 		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
535 	}
536 }
537 
538 
539 /*
540  * ioat_completion_alloc()
541  */
542 int
543 ioat_completion_alloc(ioat_channel_t channel)
544 {
545 	ioat_state_t *state;
546 	size_t real_length;
547 	uint_t cookie_cnt;
548 	int e;
549 
550 
551 	state = channel->ic_state;
552 
553 	/*
554 	 * allocate memory for the completion status, zero it out, and get
555 	 * the paddr. We'll allocate a physically contiguous cache line.
556 	 */
557 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
558 	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
559 	if (e != DDI_SUCCESS) {
560 		goto cmplallocfail_alloc_handle;
561 	}
562 	channel->ic_cmpl_alloc_size = 64;
563 	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
564 	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
565 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
566 	    (caddr_t *)&channel->ic_cmpl, &real_length,
567 	    &channel->ic_cmpl_handle);
568 	if (e != DDI_SUCCESS) {
569 		goto cmplallocfail_mem_alloc;
570 	}
571 	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
572 	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
573 	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
574 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
575 	    &channel->ic_cmpl_cookie, &cookie_cnt);
576 	if (e != DDI_SUCCESS) {
577 		goto cmplallocfail_addr_bind;
578 	}
579 	ASSERT(cookie_cnt == 1);
580 	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
581 	    channel->ic_cmpl_alloc_size);
582 	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
583 
584 	/* write the physical address into the completion address register */
585 	ddi_put32(state->is_reg_handle,
586 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
587 	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
588 	ddi_put32(state->is_reg_handle,
589 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
590 	    (uint32_t)(channel->ic_phys_cmpl >> 32));
591 
592 	return (DDI_SUCCESS);
593 
594 cmplallocfail_addr_bind:
595 	ddi_dma_mem_free(&channel->ic_desc_handle);
596 cmplallocfail_mem_alloc:
597 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
598 cmplallocfail_alloc_handle:
599 	return (DDI_FAILURE);
600 }
601 
602 
603 /*
604  * ioat_completion_free()
605  */
606 void
607 ioat_completion_free(ioat_channel_t channel)
608 {
609 	ioat_state_t *state;
610 
611 	state = channel->ic_state;
612 
613 	/* reset the completion address register */
614 	ddi_put32(state->is_reg_handle,
615 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
616 	ddi_put32(state->is_reg_handle,
617 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
618 
619 	/* unbind, then free up the memory, dma handle */
620 	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
621 	ddi_dma_mem_free(&channel->ic_cmpl_handle);
622 	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
623 }
624 
625 /*
626  * ioat_ring_alloc()
627  */
628 int
629 ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
630 {
631 	ioat_channel_ring_t *ring;
632 	ioat_state_t *state;
633 	size_t real_length;
634 	uint_t cookie_cnt;
635 	int e;
636 
637 
638 	state = channel->ic_state;
639 
640 	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
641 	channel->ic_ring = ring;
642 	ring->cr_chan = channel;
643 	ring->cr_post_cnt = 0;
644 
645 	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
646 	    channel->ic_state->is_iblock_cookie);
647 	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
648 	    channel->ic_state->is_iblock_cookie);
649 
650 	/*
651 	 * allocate memory for the ring, zero it out, and get the paddr.
652 	 * We'll allocate a physically contiguous chunck of memory  which
653 	 * simplifies the completion logic.
654 	 */
655 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
656 	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
657 	if (e != DDI_SUCCESS) {
658 		goto ringallocfail_alloc_handle;
659 	}
660 	/*
661 	 * allocate one extra descriptor so we can simplify the empty/full
662 	 * logic. Then round that number up to a whole multiple of 4.
663 	 */
664 	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
665 	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
666 	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
667 	    sizeof (ioat_chan_desc_t);
668 	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
669 	    channel->ic_desc_alloc_size, &ioat_acc_attr,
670 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
671 	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
672 	if (e != DDI_SUCCESS) {
673 		goto ringallocfail_mem_alloc;
674 	}
675 	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
676 	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
677 	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
678 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
679 	    &channel->ic_desc_cookies, &cookie_cnt);
680 	if (e != DDI_SUCCESS) {
681 		goto ringallocfail_addr_bind;
682 	}
683 	ASSERT(cookie_cnt == 1);
684 	ASSERT(channel->ic_desc_cookies.dmac_size ==
685 	    channel->ic_desc_alloc_size);
686 	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
687 
688 	/* write the physical address into the chain address register */
689 	if (channel->ic_ver == IOAT_CBv1) {
690 		ddi_put32(state->is_reg_handle,
691 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
692 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
693 		ddi_put32(state->is_reg_handle,
694 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
695 		    (uint32_t)(ring->cr_phys_desc >> 32));
696 	} else {
697 		ASSERT(channel->ic_ver == IOAT_CBv2);
698 		ddi_put32(state->is_reg_handle,
699 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
700 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
701 		ddi_put32(state->is_reg_handle,
702 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
703 		    (uint32_t)(ring->cr_phys_desc >> 32));
704 	}
705 
706 	return (DCOPY_SUCCESS);
707 
708 ringallocfail_addr_bind:
709 	ddi_dma_mem_free(&channel->ic_desc_handle);
710 ringallocfail_mem_alloc:
711 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
712 ringallocfail_alloc_handle:
713 	mutex_destroy(&ring->cr_desc_mutex);
714 	mutex_destroy(&ring->cr_cmpl_mutex);
715 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
716 
717 	return (DCOPY_FAILURE);
718 }
719 
720 
721 /*
722  * ioat_ring_free()
723  */
724 void
725 ioat_ring_free(ioat_channel_t channel)
726 {
727 	ioat_state_t *state;
728 
729 
730 	state = channel->ic_state;
731 
732 	/* reset the chain address register */
733 	if (channel->ic_ver == IOAT_CBv1) {
734 		ddi_put32(state->is_reg_handle,
735 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
736 		ddi_put32(state->is_reg_handle,
737 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
738 	} else {
739 		ASSERT(channel->ic_ver == IOAT_CBv2);
740 		ddi_put32(state->is_reg_handle,
741 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
742 		ddi_put32(state->is_reg_handle,
743 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
744 	}
745 
746 	/* unbind, then free up the memory, dma handle */
747 	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
748 	ddi_dma_mem_free(&channel->ic_desc_handle);
749 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
750 
751 	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
752 	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
753 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
754 
755 }
756 
757 
758 /*
759  * ioat_ring_seed()
760  *    write the first descriptor in the ring.
761  */
762 void
763 ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
764 {
765 	ioat_channel_ring_t *ring;
766 	ioat_chan_dma_desc_t *desc;
767 	ioat_chan_dma_desc_t *prev;
768 	ioat_state_t *state;
769 
770 
771 	state = channel->ic_state;
772 	ring = channel->ic_ring;
773 
774 	/* init the completion state */
775 	ring->cr_cmpl_gen = 0x0;
776 	ring->cr_cmpl_last = 0x0;
777 
778 	/* write in the descriptor and init the descriptor state */
779 	ring->cr_post_cnt++;
780 	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
781 	ring->cr_desc_gen = 0;
782 	ring->cr_desc_prev = 0;
783 	ring->cr_desc_next = 1;
784 
785 	if (channel->ic_ver == IOAT_CBv1) {
786 		/* hit the start bit */
787 		ddi_put8(state->is_reg_handle,
788 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
789 	} else {
790 		/*
791 		 * if this is CBv2, link the descriptor to an empty
792 		 * descriptor
793 		 */
794 		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
795 		desc = (ioat_chan_dma_desc_t *)
796 		    &ring->cr_desc[ring->cr_desc_next];
797 		prev = (ioat_chan_dma_desc_t *)
798 		    &ring->cr_desc[ring->cr_desc_prev];
799 
800 		desc->dd_ctrl = 0;
801 		desc->dd_next_desc = 0x0;
802 
803 		prev->dd_next_desc = ring->cr_phys_desc +
804 		    (ring->cr_desc_next << 6);
805 
806 		ddi_put16(state->is_reg_handle,
807 		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
808 		    (uint16_t)1);
809 	}
810 
811 }
812 
813 /*
814  * ioat_ring_loop()
815  * Make the ring loop for CB v1
816  * This function assume we are in the ring->cr_desc_mutex mutex context
817  */
818 int
819 ioat_ring_loop(ioat_channel_ring_t *ring, dcopy_cmd_t cmd)
820 {
821 	uint64_t count;
822 	ioat_channel_t channel;
823 	ioat_chan_dma_desc_t *curr;
824 	ioat_cmd_private_t *prevpriv;
825 	ioat_cmd_private_t *currpriv;
826 
827 	currpriv = NULL;
828 	channel = ring->cr_chan;
829 	ASSERT(channel->ic_ver == IOAT_CBv1);
830 
831 	/*
832 	 * For each cmd in the command queue, check whether they are continuous
833 	 * in descriptor ring. Return error if not continuous.
834 	 */
835 	for (count = 0, prevpriv = NULL;
836 	    cmd != NULL && count <= channel->ic_chan_desc_cnt;
837 	    prevpriv = currpriv) {
838 		currpriv = cmd->dp_private->pr_device_cmd_private;
839 		if (prevpriv != NULL &&
840 		    currpriv->ip_index + 1 != prevpriv->ip_start &&
841 		    currpriv->ip_index + 1 != prevpriv->ip_start +
842 		    channel->ic_chan_desc_cnt) {
843 			/* Non-continuous, other commands get interleaved */
844 			return (DCOPY_FAILURE);
845 		}
846 		if (currpriv->ip_index < currpriv->ip_start) {
847 			count += channel->ic_chan_desc_cnt
848 			    + currpriv->ip_index - currpriv->ip_start + 1;
849 		} else {
850 			count += currpriv->ip_index - currpriv->ip_start + 1;
851 		}
852 		cmd = currpriv->ip_next;
853 	}
854 	/*
855 	 * Check for too many descriptors which would cause wrap around in
856 	 * descriptor ring. And make sure there is space for cancel operation.
857 	 */
858 	if (count >= channel->ic_chan_desc_cnt) {
859 		return (DCOPY_FAILURE);
860 	}
861 
862 	/* Point next descriptor to header of chain. */
863 	curr = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
864 	curr->dd_next_desc = ring->cr_phys_desc + (currpriv->ip_start << 6);
865 
866 	/* sync the last desc */
867 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
868 	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
869 
870 	return (DCOPY_SUCCESS);
871 }
872 
873 
874 /*
875  * ioat_cmd_alloc()
876  */
877 int
878 ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
879 {
880 	ioat_cmd_private_t *priv;
881 	ioat_channel_t channel;
882 	dcopy_cmd_t oldcmd;
883 	int kmflag;
884 
885 
886 	channel = (ioat_channel_t)private;
887 
888 	if (flags & DCOPY_NOSLEEP) {
889 		kmflag = KM_NOSLEEP;
890 	} else {
891 		kmflag = KM_SLEEP;
892 	}
893 
894 	/* save the command passed incase DCOPY_ALLOC_LINK is set */
895 	oldcmd = *cmd;
896 
897 	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
898 	if (*cmd == NULL) {
899 		return (DCOPY_NORESOURCES);
900 	}
901 
902 	/* setup the dcopy and ioat private state pointers */
903 	(*cmd)->dp_version = DCOPY_CMD_V0;
904 	(*cmd)->dp_cmd = 0;
905 	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
906 	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
907 	(*cmd)->dp_private->pr_device_cmd_private =
908 	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
909 	    sizeof (struct dcopy_cmd_priv_s));
910 
911 	/*
912 	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
913 	 * just allocated.
914 	 */
915 	priv = (*cmd)->dp_private->pr_device_cmd_private;
916 	if (flags & DCOPY_ALLOC_LINK) {
917 		priv->ip_next = oldcmd;
918 	} else {
919 		priv->ip_next = NULL;
920 	}
921 
922 	return (DCOPY_SUCCESS);
923 }
924 
925 
926 /*
927  * ioat_cmd_free()
928  */
929 void
930 ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
931 {
932 	ioat_cmd_private_t *priv;
933 	ioat_channel_t channel;
934 	dcopy_cmd_t next;
935 	dcopy_cmd_t cmd;
936 
937 
938 	channel = (ioat_channel_t)private;
939 	cmd = *(cmdp);
940 
941 	/*
942 	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
943 	 * ioat_cmd_alloc() for more info).
944 	 */
945 	while (cmd != NULL) {
946 		priv = cmd->dp_private->pr_device_cmd_private;
947 		next = priv->ip_next;
948 		kmem_cache_free(channel->ic_cmd_cache, cmd);
949 		cmd = next;
950 	}
951 	*cmdp = NULL;
952 }
953 
954 
955 /*
956  * ioat_cmd_post()
957  */
958 int
959 ioat_cmd_post(void *private, dcopy_cmd_t cmd)
960 {
961 	ioat_channel_ring_t *ring;
962 	ioat_cmd_private_t *priv;
963 	ioat_channel_t channel;
964 	ioat_state_t *state;
965 	uint64_t dest_paddr;
966 	uint64_t src_paddr;
967 	uint64_t dest_addr;
968 	uint32_t dest_size;
969 	uint64_t src_addr;
970 	uint32_t src_size;
971 	size_t xfer_size;
972 	uint32_t ctrl;
973 	size_t size;
974 	int e;
975 
976 
977 	channel = (ioat_channel_t)private;
978 	priv = cmd->dp_private->pr_device_cmd_private;
979 
980 	state = channel->ic_state;
981 	ring = channel->ic_ring;
982 
983 	/*
984 	 * Special support for DCOPY_CMD_LOOP option, only supported on CBv1.
985 	 * DCOPY_CMD_QUEUE should also be set if DCOPY_CMD_LOOP is set.
986 	 */
987 	if ((cmd->dp_flags & DCOPY_CMD_LOOP) &&
988 	    (channel->ic_ver != IOAT_CBv1 ||
989 	    (cmd->dp_flags & DCOPY_CMD_QUEUE))) {
990 		return (DCOPY_FAILURE);
991 	}
992 
993 	if ((cmd->dp_flags & DCOPY_CMD_NOWAIT) == 0) {
994 		mutex_enter(&ring->cr_desc_mutex);
995 
996 	/*
997 	 * Try to acquire mutex if NOWAIT flag is set.
998 	 * Return failure if failed to acquire mutex.
999 	 */
1000 	} else if (mutex_tryenter(&ring->cr_desc_mutex) == 0) {
1001 		return (DCOPY_FAILURE);
1002 	}
1003 
1004 	/* if the channel has had a fatal failure, return failure */
1005 	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
1006 		mutex_exit(&ring->cr_desc_mutex);
1007 		return (DCOPY_FAILURE);
1008 	}
1009 
1010 	/* make sure we have space for the descriptors */
1011 	e = ioat_ring_reserve(channel, ring, cmd);
1012 	if (e != DCOPY_SUCCESS) {
1013 		mutex_exit(&ring->cr_desc_mutex);
1014 		return (DCOPY_NORESOURCES);
1015 	}
1016 
1017 	/* if we support DCA, and the DCA flag is set, post a DCA desc */
1018 	if ((channel->ic_ver == IOAT_CBv2) &&
1019 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
1020 		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
1021 	}
1022 
1023 	/*
1024 	 * the dma copy may have to be broken up into multiple descriptors
1025 	 * since we can't cross a page boundary.
1026 	 */
1027 	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
1028 	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
1029 	src_addr = cmd->dp.copy.cc_source;
1030 	dest_addr = cmd->dp.copy.cc_dest;
1031 	size = cmd->dp.copy.cc_size;
1032 	priv->ip_start = ring->cr_desc_next;
1033 	while (size > 0) {
1034 		src_paddr = pa_to_ma(src_addr);
1035 		dest_paddr = pa_to_ma(dest_addr);
1036 
1037 		/* adjust for any offset into the page */
1038 		if ((src_addr & PAGEOFFSET) == 0) {
1039 			src_size = PAGESIZE;
1040 		} else {
1041 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
1042 		}
1043 		if ((dest_addr & PAGEOFFSET) == 0) {
1044 			dest_size = PAGESIZE;
1045 		} else {
1046 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
1047 		}
1048 
1049 		/* take the smallest of the three */
1050 		xfer_size = MIN(src_size, dest_size);
1051 		xfer_size = MIN(xfer_size, size);
1052 
1053 		/*
1054 		 * if this is the last descriptor, and we are supposed to
1055 		 * generate a completion, generate a completion. same logic
1056 		 * for interrupt.
1057 		 */
1058 		ctrl = 0;
1059 		if (cmd->dp_flags & DCOPY_CMD_NOSRCSNP) {
1060 			ctrl |= IOAT_DESC_CTRL_NOSRCSNP;
1061 		}
1062 		if (cmd->dp_flags & DCOPY_CMD_NODSTSNP) {
1063 			ctrl |= IOAT_DESC_CTRL_NODSTSNP;
1064 		}
1065 		if (xfer_size == size) {
1066 			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
1067 				ctrl |= IOAT_DESC_CTRL_CMPL;
1068 			}
1069 			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
1070 				ctrl |= IOAT_DESC_CTRL_INTR;
1071 			}
1072 		}
1073 
1074 		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
1075 		    ctrl);
1076 
1077 		/* go to the next page */
1078 		src_addr += xfer_size;
1079 		dest_addr += xfer_size;
1080 		size -= xfer_size;
1081 	}
1082 
1083 	/* save away the state so we can poll on it. */
1084 	priv->ip_generation = ring->cr_desc_gen_prev;
1085 	priv->ip_index = ring->cr_desc_prev;
1086 
1087 	/* if queue not defined, tell the DMA engine about it */
1088 	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
1089 		/*
1090 		 * Link the ring to a loop (currently only for FIPE).
1091 		 */
1092 		if (cmd->dp_flags & DCOPY_CMD_LOOP) {
1093 			e = ioat_ring_loop(ring, cmd);
1094 			if (e != DCOPY_SUCCESS) {
1095 				mutex_exit(&ring->cr_desc_mutex);
1096 				return (DCOPY_FAILURE);
1097 			}
1098 		}
1099 
1100 		if (channel->ic_ver == IOAT_CBv1) {
1101 			ddi_put8(state->is_reg_handle,
1102 			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
1103 			    0x2);
1104 		} else {
1105 			ASSERT(channel->ic_ver == IOAT_CBv2);
1106 			ddi_put16(state->is_reg_handle,
1107 			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
1108 			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
1109 		}
1110 	}
1111 
1112 	mutex_exit(&ring->cr_desc_mutex);
1113 
1114 	return (DCOPY_SUCCESS);
1115 }
1116 
1117 
1118 /*
1119  * ioat_cmd_post_dca()
1120  */
1121 static void
1122 ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
1123 {
1124 	ioat_chan_dca_desc_t *saved_prev;
1125 	ioat_chan_dca_desc_t *desc;
1126 	ioat_chan_dca_desc_t *prev;
1127 	ioat_channel_t channel;
1128 	uint64_t next_desc_phys;
1129 	off_t prev_offset;
1130 	off_t next_offset;
1131 
1132 
1133 	channel = ring->cr_chan;
1134 	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1135 	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1136 
1137 	/* keep track of the number of descs posted for cbv2 */
1138 	ring->cr_post_cnt++;
1139 
1140 	/*
1141 	 * post a context change desriptor. If dca has never been used on
1142 	 * this channel, or if the id doesn't match the last id used on this
1143 	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
1144 	 * and save away the id we're using.
1145 	 */
1146 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
1147 	desc->dd_next_desc = 0x0;
1148 	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
1149 		channel->ic_dca_active = B_TRUE;
1150 		channel->ic_dca_current = dca_id;
1151 		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
1152 		desc->dd_cntx = dca_id;
1153 	}
1154 
1155 	/*
1156 	 * save next desc and prev offset for when we link the two
1157 	 * descriptors together.
1158 	 */
1159 	saved_prev = prev;
1160 	prev_offset = ring->cr_desc_prev << 6;
1161 	next_offset = ring->cr_desc_next << 6;
1162 	next_desc_phys = ring->cr_phys_desc + next_offset;
1163 
1164 	/* save the current desc_next and desc_last for the completion */
1165 	ring->cr_desc_prev = ring->cr_desc_next;
1166 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1167 
1168 	/* increment next/gen so it points to the next free desc */
1169 	ring->cr_desc_next++;
1170 	if (ring->cr_desc_next > ring->cr_desc_last) {
1171 		ring->cr_desc_next = 0;
1172 		ring->cr_desc_gen++;
1173 	}
1174 
1175 	/*
1176 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1177 	 * we always leave on desc empty to detect full, this works out.
1178 	 */
1179 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1180 		desc = (ioat_chan_dca_desc_t *)
1181 		    &ring->cr_desc[ring->cr_desc_next];
1182 		prev = (ioat_chan_dca_desc_t *)
1183 		    &ring->cr_desc[ring->cr_desc_prev];
1184 		desc->dd_ctrl = 0;
1185 		desc->dd_next_desc = 0x0;
1186 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1187 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1188 		prev->dd_next_desc = ring->cr_phys_desc +
1189 		    (ring->cr_desc_next << 6);
1190 	}
1191 
1192 	/* Put the descriptors physical address in the previous descriptor */
1193 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1194 	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
1195 
1196 	/* sync the current desc */
1197 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
1198 	    DDI_DMA_SYNC_FORDEV);
1199 
1200 	/* update the previous desc and sync it too */
1201 	saved_prev->dd_next_desc = next_desc_phys;
1202 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
1203 	    DDI_DMA_SYNC_FORDEV);
1204 }
1205 
1206 
1207 /*
1208  * ioat_cmd_post_copy()
1209  *
1210  */
1211 static void
1212 ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
1213     uint64_t dest_addr, uint32_t size, uint32_t ctrl)
1214 {
1215 	ioat_chan_dma_desc_t *saved_prev;
1216 	ioat_chan_dma_desc_t *desc;
1217 	ioat_chan_dma_desc_t *prev;
1218 	ioat_channel_t channel;
1219 	uint64_t next_desc_phy;
1220 	off_t prev_offset;
1221 	off_t next_offset;
1222 
1223 
1224 	channel = ring->cr_chan;
1225 	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
1226 	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
1227 
1228 	/* keep track of the number of descs posted for cbv2 */
1229 	ring->cr_post_cnt++;
1230 
1231 	/* write in the DMA desc */
1232 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
1233 	desc->dd_size = size;
1234 	desc->dd_src_paddr = src_addr;
1235 	desc->dd_dest_paddr = dest_addr;
1236 	desc->dd_next_desc = 0x0;
1237 
1238 	/*
1239 	 * save next desc and prev offset for when we link the two
1240 	 * descriptors together.
1241 	 */
1242 	saved_prev = prev;
1243 	prev_offset = ring->cr_desc_prev << 6;
1244 	next_offset = ring->cr_desc_next << 6;
1245 	next_desc_phy = ring->cr_phys_desc + next_offset;
1246 
1247 	/* increment next/gen so it points to the next free desc */
1248 	ring->cr_desc_prev = ring->cr_desc_next;
1249 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
1250 
1251 	/* increment next/gen so it points to the next free desc */
1252 	ring->cr_desc_next++;
1253 	if (ring->cr_desc_next > ring->cr_desc_last) {
1254 		ring->cr_desc_next = 0;
1255 		ring->cr_desc_gen++;
1256 	}
1257 
1258 	/*
1259 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
1260 	 * we always leave on desc empty to detect full, this works out.
1261 	 */
1262 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
1263 		desc = (ioat_chan_dma_desc_t *)
1264 		    &ring->cr_desc[ring->cr_desc_next];
1265 		prev = (ioat_chan_dma_desc_t *)
1266 		    &ring->cr_desc[ring->cr_desc_prev];
1267 		desc->dd_size = 0;
1268 		desc->dd_ctrl = 0;
1269 		desc->dd_next_desc = 0x0;
1270 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
1271 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
1272 		prev->dd_next_desc = ring->cr_phys_desc +
1273 		    (ring->cr_desc_next << 6);
1274 	}
1275 
1276 	/* Put the descriptors physical address in the previous descriptor */
1277 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
1278 	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
1279 
1280 	/* sync the current desc */
1281 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
1282 	    DDI_DMA_SYNC_FORDEV);
1283 
1284 	/* update the previous desc and sync it too */
1285 	saved_prev->dd_next_desc = next_desc_phy;
1286 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
1287 	    DDI_DMA_SYNC_FORDEV);
1288 }
1289 
1290 
1291 /*
1292  * ioat_cmd_poll()
1293  */
1294 int
1295 ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
1296 {
1297 	ioat_channel_ring_t *ring;
1298 	ioat_cmd_private_t *priv;
1299 	ioat_channel_t channel;
1300 	uint64_t generation;
1301 	uint64_t last_cmpl;
1302 
1303 	ASSERT(cmd != NULL);
1304 	channel = (ioat_channel_t)private;
1305 	priv = cmd->dp_private->pr_device_cmd_private;
1306 
1307 	ring = channel->ic_ring;
1308 	ASSERT(ring != NULL);
1309 
1310 	if ((cmd->dp_flags & DCOPY_CMD_NOWAIT) == 0) {
1311 		mutex_enter(&ring->cr_cmpl_mutex);
1312 
1313 	/*
1314 	 * Try to acquire mutex if NOWAIT flag is set.
1315 	 * Return failure if failed to acquire mutex.
1316 	 */
1317 	} else if (mutex_tryenter(&ring->cr_cmpl_mutex) == 0) {
1318 		return (DCOPY_FAILURE);
1319 	}
1320 
1321 	/* if the channel had a fatal failure, fail all polls */
1322 	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
1323 	    IOAT_CMPL_FAILED(channel)) {
1324 		mutex_exit(&ring->cr_cmpl_mutex);
1325 		return (DCOPY_FAILURE);
1326 	}
1327 
1328 	/*
1329 	 * if the current completion is the same as the last time we read one,
1330 	 * post is still pending, nothing further to do. We track completions
1331 	 * as indexes into the ring since post uses VAs and the H/W returns
1332 	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
1333 	 */
1334 	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
1335 	    DDI_DMA_SYNC_FORCPU);
1336 	last_cmpl = IOAT_CMPL_INDEX(channel);
1337 	if (last_cmpl != ring->cr_cmpl_last) {
1338 		/*
1339 		 * if we wrapped the ring, increment the generation. Store
1340 		 * the last cmpl. This logic assumes a physically contiguous
1341 		 * ring.
1342 		 */
1343 		if (last_cmpl < ring->cr_cmpl_last) {
1344 			ring->cr_cmpl_gen++;
1345 		}
1346 		ring->cr_cmpl_last = last_cmpl;
1347 		generation = ring->cr_cmpl_gen;
1348 
1349 	} else {
1350 		generation = ring->cr_cmpl_gen;
1351 	}
1352 
1353 	mutex_exit(&ring->cr_cmpl_mutex);
1354 
1355 	/*
1356 	 * if cmd isn't passed in, well return.  Useful for updating the
1357 	 * consumer pointer (ring->cr_cmpl_last).
1358 	 */
1359 	if (cmd->dp_flags & DCOPY_CMD_SYNC) {
1360 		return (DCOPY_PENDING);
1361 	}
1362 
1363 	/*
1364 	 * if the post's generation is old, this post has completed. No reason
1365 	 * to go check the last completion. if the generation is the same
1366 	 * and if the post is before or = to the last completion processed,
1367 	 * the post has completed.
1368 	 */
1369 	if (priv->ip_generation < generation) {
1370 		return (DCOPY_COMPLETED);
1371 	} else if ((priv->ip_generation == generation) &&
1372 	    (priv->ip_index <= last_cmpl)) {
1373 		return (DCOPY_COMPLETED);
1374 	}
1375 
1376 	return (DCOPY_PENDING);
1377 }
1378 
1379 
1380 /*
1381  * ioat_ring_reserve()
1382  */
1383 int
1384 ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
1385     dcopy_cmd_t cmd)
1386 {
1387 	uint64_t dest_addr;
1388 	uint32_t dest_size;
1389 	uint64_t src_addr;
1390 	uint32_t src_size;
1391 	size_t xfer_size;
1392 	uint64_t desc;
1393 	int num_desc;
1394 	size_t size;
1395 	int i;
1396 
1397 
1398 	/*
1399 	 * figure out how many descriptors we need. This can include a dca
1400 	 * desc and multiple desc for a dma copy.
1401 	 */
1402 	num_desc = 0;
1403 	if ((channel->ic_ver == IOAT_CBv2) &&
1404 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
1405 		num_desc++;
1406 	}
1407 	src_addr = cmd->dp.copy.cc_source;
1408 	dest_addr = cmd->dp.copy.cc_dest;
1409 	size = cmd->dp.copy.cc_size;
1410 	while (size > 0) {
1411 		num_desc++;
1412 
1413 		/* adjust for any offset into the page */
1414 		if ((src_addr & PAGEOFFSET) == 0) {
1415 			src_size = PAGESIZE;
1416 		} else {
1417 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
1418 		}
1419 		if ((dest_addr & PAGEOFFSET) == 0) {
1420 			dest_size = PAGESIZE;
1421 		} else {
1422 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
1423 		}
1424 
1425 		/* take the smallest of the three */
1426 		xfer_size = MIN(src_size, dest_size);
1427 		xfer_size = MIN(xfer_size, size);
1428 
1429 		/* go to the next page */
1430 		src_addr += xfer_size;
1431 		dest_addr += xfer_size;
1432 		size -= xfer_size;
1433 	}
1434 
1435 	/* Make sure we have space for these descriptors */
1436 	desc = ring->cr_desc_next;
1437 	for (i = 0; i < num_desc; i++) {
1438 
1439 		/*
1440 		 * if this is the last descriptor in the ring, see if the
1441 		 * last completed descriptor is #0.
1442 		 */
1443 		if (desc == ring->cr_desc_last) {
1444 			if (ring->cr_cmpl_last == 0) {
1445 				/*
1446 				 * if we think the ring is full, update where
1447 				 * the H/W really is and check for full again.
1448 				 */
1449 				cmd->dp_flags |= DCOPY_CMD_SYNC;
1450 				(void) ioat_cmd_poll(channel, cmd);
1451 				cmd->dp_flags &= ~DCOPY_CMD_SYNC;
1452 				if (ring->cr_cmpl_last == 0) {
1453 					return (DCOPY_NORESOURCES);
1454 				}
1455 			}
1456 
1457 			/*
1458 			 * go to the next descriptor which is zero in this
1459 			 * case.
1460 			 */
1461 			desc = 0;
1462 
1463 		/*
1464 		 * if this is not the last descriptor in the ring, see if
1465 		 * the last completion we saw was the next descriptor.
1466 		 */
1467 		} else {
1468 			if ((desc + 1) == ring->cr_cmpl_last) {
1469 				/*
1470 				 * if we think the ring is full, update where
1471 				 * the H/W really is and check for full again.
1472 				 */
1473 				cmd->dp_flags |= DCOPY_CMD_SYNC;
1474 				(void) ioat_cmd_poll(channel, cmd);
1475 				cmd->dp_flags &= ~DCOPY_CMD_SYNC;
1476 				if ((desc + 1) == ring->cr_cmpl_last) {
1477 					return (DCOPY_NORESOURCES);
1478 				}
1479 			}
1480 
1481 			/* go to the next descriptor */
1482 			desc++;
1483 		}
1484 	}
1485 
1486 	return (DCOPY_SUCCESS);
1487 }
1488