xref: /freebsd/sys/dev/ioat/ioat.c (revision 731d06abf2105cc0873fa84e972178f9f37ca760)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  * Copyright (C) 2018 Alexander Motin <mav@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_ddb.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/conf.h>
37 #include <sys/fail.h>
38 #include <sys/ioccom.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/rman.h>
45 #include <sys/sbuf.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 #include <sys/time.h>
49 #include <dev/pci/pcireg.h>
50 #include <dev/pci/pcivar.h>
51 #include <machine/bus.h>
52 #include <machine/resource.h>
53 #include <machine/stdarg.h>
54 
55 #ifdef DDB
56 #include <ddb/ddb.h>
57 #endif
58 
59 #include "ioat.h"
60 #include "ioat_hw.h"
61 #include "ioat_internal.h"
62 
63 #ifndef	BUS_SPACE_MAXADDR_40BIT
64 #define	BUS_SPACE_MAXADDR_40BIT	0xFFFFFFFFFFULL
65 #endif
66 
67 static int ioat_probe(device_t device);
68 static int ioat_attach(device_t device);
69 static int ioat_detach(device_t device);
70 static int ioat_setup_intr(struct ioat_softc *ioat);
71 static int ioat_teardown_intr(struct ioat_softc *ioat);
72 static int ioat3_attach(device_t device);
73 static int ioat_start_channel(struct ioat_softc *ioat);
74 static int ioat_map_pci_bar(struct ioat_softc *ioat);
75 static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
76     int error);
77 static void ioat_interrupt_handler(void *arg);
78 static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
79 static int chanerr_to_errno(uint32_t);
80 static void ioat_process_events(struct ioat_softc *ioat, boolean_t intr);
81 static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
82 static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
83 static void ioat_free_ring(struct ioat_softc *, uint32_t size,
84     struct ioat_descriptor *);
85 static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags);
86 static union ioat_hw_descriptor *ioat_get_descriptor(struct ioat_softc *,
87     uint32_t index);
88 static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *,
89     uint32_t index);
90 static void ioat_halted_debug(struct ioat_softc *, uint32_t);
91 static void ioat_poll_timer_callback(void *arg);
92 static void dump_descriptor(void *hw_desc);
93 static void ioat_submit_single(struct ioat_softc *ioat);
94 static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
95     int error);
96 static int ioat_reset_hw(struct ioat_softc *ioat);
97 static void ioat_reset_hw_task(void *, int);
98 static void ioat_setup_sysctl(device_t device);
99 static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
100 static void ioat_get(struct ioat_softc *);
101 static void ioat_put(struct ioat_softc *);
102 static void ioat_drain_locked(struct ioat_softc *);
103 
104 #define	ioat_log_message(v, ...) do {					\
105 	if ((v) <= g_ioat_debug_level) {				\
106 		device_printf(ioat->device, __VA_ARGS__);		\
107 	}								\
108 } while (0)
109 
110 MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
111 SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node");
112 
113 static int g_force_legacy_interrupts;
114 SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
115     &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
116 
117 int g_ioat_debug_level = 0;
118 SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
119     0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
120 
121 unsigned g_ioat_ring_order = 13;
122 SYSCTL_UINT(_hw_ioat, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ioat_ring_order,
123     0, "Set IOAT ring order.  (1 << this) == ring size.");
124 
125 /*
126  * OS <-> Driver interface structures
127  */
128 static device_method_t ioat_pci_methods[] = {
129 	/* Device interface */
130 	DEVMETHOD(device_probe,     ioat_probe),
131 	DEVMETHOD(device_attach,    ioat_attach),
132 	DEVMETHOD(device_detach,    ioat_detach),
133 	DEVMETHOD_END
134 };
135 
136 static driver_t ioat_pci_driver = {
137 	"ioat",
138 	ioat_pci_methods,
139 	sizeof(struct ioat_softc),
140 };
141 
142 static devclass_t ioat_devclass;
143 DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0);
144 MODULE_VERSION(ioat, 1);
145 
146 /*
147  * Private data structures
148  */
149 static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
150 static unsigned ioat_channel_index = 0;
151 SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
152     "Number of IOAT channels attached");
153 static struct mtx ioat_list_mtx;
154 MTX_SYSINIT(ioat_list_mtx, &ioat_list_mtx, "ioat list mtx", MTX_DEF);
155 
156 static struct _pcsid
157 {
158 	u_int32_t   type;
159 	const char  *desc;
160 } pci_ids[] = {
161 	{ 0x34308086, "TBG IOAT Ch0" },
162 	{ 0x34318086, "TBG IOAT Ch1" },
163 	{ 0x34328086, "TBG IOAT Ch2" },
164 	{ 0x34338086, "TBG IOAT Ch3" },
165 	{ 0x34298086, "TBG IOAT Ch4" },
166 	{ 0x342a8086, "TBG IOAT Ch5" },
167 	{ 0x342b8086, "TBG IOAT Ch6" },
168 	{ 0x342c8086, "TBG IOAT Ch7" },
169 
170 	{ 0x37108086, "JSF IOAT Ch0" },
171 	{ 0x37118086, "JSF IOAT Ch1" },
172 	{ 0x37128086, "JSF IOAT Ch2" },
173 	{ 0x37138086, "JSF IOAT Ch3" },
174 	{ 0x37148086, "JSF IOAT Ch4" },
175 	{ 0x37158086, "JSF IOAT Ch5" },
176 	{ 0x37168086, "JSF IOAT Ch6" },
177 	{ 0x37178086, "JSF IOAT Ch7" },
178 	{ 0x37188086, "JSF IOAT Ch0 (RAID)" },
179 	{ 0x37198086, "JSF IOAT Ch1 (RAID)" },
180 
181 	{ 0x3c208086, "SNB IOAT Ch0" },
182 	{ 0x3c218086, "SNB IOAT Ch1" },
183 	{ 0x3c228086, "SNB IOAT Ch2" },
184 	{ 0x3c238086, "SNB IOAT Ch3" },
185 	{ 0x3c248086, "SNB IOAT Ch4" },
186 	{ 0x3c258086, "SNB IOAT Ch5" },
187 	{ 0x3c268086, "SNB IOAT Ch6" },
188 	{ 0x3c278086, "SNB IOAT Ch7" },
189 	{ 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
190 	{ 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
191 
192 	{ 0x0e208086, "IVB IOAT Ch0" },
193 	{ 0x0e218086, "IVB IOAT Ch1" },
194 	{ 0x0e228086, "IVB IOAT Ch2" },
195 	{ 0x0e238086, "IVB IOAT Ch3" },
196 	{ 0x0e248086, "IVB IOAT Ch4" },
197 	{ 0x0e258086, "IVB IOAT Ch5" },
198 	{ 0x0e268086, "IVB IOAT Ch6" },
199 	{ 0x0e278086, "IVB IOAT Ch7" },
200 	{ 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
201 	{ 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
202 
203 	{ 0x2f208086, "HSW IOAT Ch0" },
204 	{ 0x2f218086, "HSW IOAT Ch1" },
205 	{ 0x2f228086, "HSW IOAT Ch2" },
206 	{ 0x2f238086, "HSW IOAT Ch3" },
207 	{ 0x2f248086, "HSW IOAT Ch4" },
208 	{ 0x2f258086, "HSW IOAT Ch5" },
209 	{ 0x2f268086, "HSW IOAT Ch6" },
210 	{ 0x2f278086, "HSW IOAT Ch7" },
211 	{ 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
212 	{ 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
213 
214 	{ 0x0c508086, "BWD IOAT Ch0" },
215 	{ 0x0c518086, "BWD IOAT Ch1" },
216 	{ 0x0c528086, "BWD IOAT Ch2" },
217 	{ 0x0c538086, "BWD IOAT Ch3" },
218 
219 	{ 0x6f508086, "BDXDE IOAT Ch0" },
220 	{ 0x6f518086, "BDXDE IOAT Ch1" },
221 	{ 0x6f528086, "BDXDE IOAT Ch2" },
222 	{ 0x6f538086, "BDXDE IOAT Ch3" },
223 
224 	{ 0x6f208086, "BDX IOAT Ch0" },
225 	{ 0x6f218086, "BDX IOAT Ch1" },
226 	{ 0x6f228086, "BDX IOAT Ch2" },
227 	{ 0x6f238086, "BDX IOAT Ch3" },
228 	{ 0x6f248086, "BDX IOAT Ch4" },
229 	{ 0x6f258086, "BDX IOAT Ch5" },
230 	{ 0x6f268086, "BDX IOAT Ch6" },
231 	{ 0x6f278086, "BDX IOAT Ch7" },
232 	{ 0x6f2e8086, "BDX IOAT Ch0 (RAID)" },
233 	{ 0x6f2f8086, "BDX IOAT Ch1 (RAID)" },
234 
235 	{ 0x20218086, "SKX IOAT" },
236 };
237 
238 MODULE_PNP_INFO("W32:vendor/device;D:#", pci, ioat, pci_ids,
239     nitems(pci_ids));
240 
241 /*
242  * OS <-> Driver linkage functions
243  */
244 static int
245 ioat_probe(device_t device)
246 {
247 	struct _pcsid *ep;
248 	u_int32_t type;
249 
250 	type = pci_get_devid(device);
251 	for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) {
252 		if (ep->type == type) {
253 			device_set_desc(device, ep->desc);
254 			return (0);
255 		}
256 	}
257 	return (ENXIO);
258 }
259 
260 static int
261 ioat_attach(device_t device)
262 {
263 	struct ioat_softc *ioat;
264 	int error, i;
265 
266 	ioat = DEVICE2SOFTC(device);
267 	ioat->device = device;
268 
269 	error = ioat_map_pci_bar(ioat);
270 	if (error != 0)
271 		goto err;
272 
273 	ioat->version = ioat_read_cbver(ioat);
274 	if (ioat->version < IOAT_VER_3_0) {
275 		error = ENODEV;
276 		goto err;
277 	}
278 
279 	error = ioat3_attach(device);
280 	if (error != 0)
281 		goto err;
282 
283 	error = pci_enable_busmaster(device);
284 	if (error != 0)
285 		goto err;
286 
287 	error = ioat_setup_intr(ioat);
288 	if (error != 0)
289 		goto err;
290 
291 	error = ioat_reset_hw(ioat);
292 	if (error != 0)
293 		goto err;
294 
295 	ioat_process_events(ioat, FALSE);
296 	ioat_setup_sysctl(device);
297 
298 	mtx_lock(&ioat_list_mtx);
299 	for (i = 0; i < IOAT_MAX_CHANNELS; i++) {
300 		if (ioat_channel[i] == NULL)
301 			break;
302 	}
303 	if (i >= IOAT_MAX_CHANNELS) {
304 		mtx_unlock(&ioat_list_mtx);
305 		device_printf(device, "Too many I/OAT devices in system\n");
306 		error = ENXIO;
307 		goto err;
308 	}
309 	ioat->chan_idx = i;
310 	ioat_channel[i] = ioat;
311 	if (i >= ioat_channel_index)
312 		ioat_channel_index = i + 1;
313 	mtx_unlock(&ioat_list_mtx);
314 
315 	ioat_test_attach();
316 
317 err:
318 	if (error != 0)
319 		ioat_detach(device);
320 	return (error);
321 }
322 
323 static inline int
324 ioat_bus_dmamap_destroy(struct ioat_softc *ioat, const char *func,
325     bus_dma_tag_t dmat, bus_dmamap_t map)
326 {
327 	int error;
328 
329 	error = bus_dmamap_destroy(dmat, map);
330 	if (error != 0) {
331 		ioat_log_message(0,
332 		    "%s: bus_dmamap_destroy failed %d\n", func, error);
333 	}
334 
335 	return (error);
336 }
337 
338 static int
339 ioat_detach(device_t device)
340 {
341 	struct ioat_softc *ioat;
342 	int i, error;
343 
344 	ioat = DEVICE2SOFTC(device);
345 
346 	mtx_lock(&ioat_list_mtx);
347 	ioat_channel[ioat->chan_idx] = NULL;
348 	while (ioat_channel_index > 0 &&
349 	    ioat_channel[ioat_channel_index - 1] == NULL)
350 		ioat_channel_index--;
351 	mtx_unlock(&ioat_list_mtx);
352 
353 	ioat_test_detach();
354 	taskqueue_drain(taskqueue_thread, &ioat->reset_task);
355 
356 	mtx_lock(&ioat->submit_lock);
357 	ioat->quiescing = TRUE;
358 	ioat->destroying = TRUE;
359 	wakeup(&ioat->quiescing);
360 	wakeup(&ioat->resetting);
361 
362 	ioat_drain_locked(ioat);
363 	mtx_unlock(&ioat->submit_lock);
364 	mtx_lock(&ioat->cleanup_lock);
365 	while (ioat_get_active(ioat) > 0)
366 		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
367 	mtx_unlock(&ioat->cleanup_lock);
368 
369 	ioat_teardown_intr(ioat);
370 	callout_drain(&ioat->poll_timer);
371 
372 	pci_disable_busmaster(device);
373 
374 	if (ioat->pci_resource != NULL)
375 		bus_release_resource(device, SYS_RES_MEMORY,
376 		    ioat->pci_resource_id, ioat->pci_resource);
377 
378 	if (ioat->data_tag != NULL) {
379 		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
380 			error = ioat_bus_dmamap_destroy(ioat, __func__,
381 			    ioat->data_tag, ioat->ring[i].src_dmamap);
382 			if (error != 0)
383 				return (error);
384 		}
385 		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
386 			error = ioat_bus_dmamap_destroy(ioat, __func__,
387 			    ioat->data_tag, ioat->ring[i].dst_dmamap);
388 			if (error != 0)
389 				return (error);
390 		}
391 
392 		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
393 			error = ioat_bus_dmamap_destroy(ioat, __func__,
394 			    ioat->data_tag, ioat->ring[i].src2_dmamap);
395 			if (error != 0)
396 				return (error);
397 		}
398 		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
399 			error = ioat_bus_dmamap_destroy(ioat, __func__,
400 			    ioat->data_tag, ioat->ring[i].dst2_dmamap);
401 			if (error != 0)
402 				return (error);
403 		}
404 
405 		bus_dma_tag_destroy(ioat->data_tag);
406 	}
407 
408 	if (ioat->data_crc_tag != NULL) {
409 		for (i = 0; i < 1 << ioat->ring_size_order; i++) {
410 			error = ioat_bus_dmamap_destroy(ioat, __func__,
411 			    ioat->data_crc_tag, ioat->ring[i].crc_dmamap);
412 			if (error != 0)
413 				return (error);
414 		}
415 
416 		bus_dma_tag_destroy(ioat->data_crc_tag);
417 	}
418 
419 	if (ioat->ring != NULL)
420 		ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring);
421 
422 	if (ioat->comp_update != NULL) {
423 		bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
424 		bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
425 		    ioat->comp_update_map);
426 		bus_dma_tag_destroy(ioat->comp_update_tag);
427 	}
428 
429 	if (ioat->hw_desc_ring != NULL) {
430 		bus_dmamap_unload(ioat->hw_desc_tag, ioat->hw_desc_map);
431 		bus_dmamem_free(ioat->hw_desc_tag, ioat->hw_desc_ring,
432 		    ioat->hw_desc_map);
433 		bus_dma_tag_destroy(ioat->hw_desc_tag);
434 	}
435 
436 	return (0);
437 }
438 
439 static int
440 ioat_teardown_intr(struct ioat_softc *ioat)
441 {
442 
443 	if (ioat->tag != NULL)
444 		bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
445 
446 	if (ioat->res != NULL)
447 		bus_release_resource(ioat->device, SYS_RES_IRQ,
448 		    rman_get_rid(ioat->res), ioat->res);
449 
450 	pci_release_msi(ioat->device);
451 	return (0);
452 }
453 
454 static int
455 ioat_start_channel(struct ioat_softc *ioat)
456 {
457 	struct ioat_dma_hw_descriptor *hw_desc;
458 	struct ioat_descriptor *desc;
459 	struct bus_dmadesc *dmadesc;
460 	uint64_t status;
461 	uint32_t chanerr;
462 	int i;
463 
464 	ioat_acquire(&ioat->dmaengine);
465 
466 	/* Submit 'NULL' operation manually to avoid quiescing flag */
467 	desc = ioat_get_ring_entry(ioat, ioat->head);
468 	hw_desc = &ioat_get_descriptor(ioat, ioat->head)->dma;
469 	dmadesc = &desc->bus_dmadesc;
470 
471 	dmadesc->callback_fn = NULL;
472 	dmadesc->callback_arg = NULL;
473 
474 	hw_desc->u.control_raw = 0;
475 	hw_desc->u.control_generic.op = IOAT_OP_COPY;
476 	hw_desc->u.control_generic.completion_update = 1;
477 	hw_desc->size = 8;
478 	hw_desc->src_addr = 0;
479 	hw_desc->dest_addr = 0;
480 	hw_desc->u.control.null = 1;
481 
482 	ioat_submit_single(ioat);
483 	ioat_release(&ioat->dmaengine);
484 
485 	for (i = 0; i < 100; i++) {
486 		DELAY(1);
487 		status = ioat_get_chansts(ioat);
488 		if (is_ioat_idle(status))
489 			return (0);
490 	}
491 
492 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
493 	ioat_log_message(0, "could not start channel: "
494 	    "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr,
495 	    IOAT_CHANERR_STR);
496 	return (ENXIO);
497 }
498 
499 /*
500  * Initialize Hardware
501  */
502 static int
503 ioat3_attach(device_t device)
504 {
505 	struct ioat_softc *ioat;
506 	struct ioat_descriptor *ring;
507 	struct ioat_dma_hw_descriptor *dma_hw_desc;
508 	void *hw_desc;
509 	size_t ringsz;
510 	int i, num_descriptors;
511 	int error;
512 	uint8_t xfercap;
513 
514 	error = 0;
515 	ioat = DEVICE2SOFTC(device);
516 	ioat->capabilities = ioat_read_dmacapability(ioat);
517 
518 	ioat_log_message(0, "Capabilities: %b\n", (int)ioat->capabilities,
519 	    IOAT_DMACAP_STR);
520 
521 	xfercap = ioat_read_xfercap(ioat);
522 	ioat->max_xfer_size = 1 << xfercap;
523 
524 	ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) &
525 	    IOAT_INTRDELAY_SUPPORTED) != 0;
526 	if (ioat->intrdelay_supported)
527 		ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK;
528 
529 	/* TODO: need to check DCA here if we ever do XOR/PQ */
530 
531 	mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
532 	mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF);
533 	callout_init(&ioat->poll_timer, 1);
534 	TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat);
535 
536 	/* Establish lock order for Witness */
537 	mtx_lock(&ioat->cleanup_lock);
538 	mtx_lock(&ioat->submit_lock);
539 	mtx_unlock(&ioat->submit_lock);
540 	mtx_unlock(&ioat->cleanup_lock);
541 
542 	ioat->is_submitter_processing = FALSE;
543 
544 	bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
545 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
546 	    sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
547 	    &ioat->comp_update_tag);
548 
549 	error = bus_dmamem_alloc(ioat->comp_update_tag,
550 	    (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map);
551 	if (ioat->comp_update == NULL)
552 		return (ENOMEM);
553 
554 	error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
555 	    ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
556 	    0);
557 	if (error != 0)
558 		return (error);
559 
560 	ioat->ring_size_order = g_ioat_ring_order;
561 	num_descriptors = 1 << ioat->ring_size_order;
562 	ringsz = sizeof(struct ioat_dma_hw_descriptor) * num_descriptors;
563 
564 	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
565 	    2 * 1024 * 1024, 0x0, (bus_addr_t)BUS_SPACE_MAXADDR_40BIT,
566 	    BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, ringsz, 0, NULL, NULL,
567 	    &ioat->hw_desc_tag);
568 	if (error != 0)
569 		return (error);
570 
571 	error = bus_dmamem_alloc(ioat->hw_desc_tag, &hw_desc,
572 	    BUS_DMA_ZERO | BUS_DMA_WAITOK, &ioat->hw_desc_map);
573 	if (error != 0)
574 		return (error);
575 
576 	error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
577 	    ringsz, ioat_dmamap_cb, &ioat->hw_desc_bus_addr, BUS_DMA_WAITOK);
578 	if (error)
579 		return (error);
580 
581 	ioat->hw_desc_ring = hw_desc;
582 
583 	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
584 	    1, 0, BUS_SPACE_MAXADDR_40BIT, BUS_SPACE_MAXADDR, NULL, NULL,
585 	    ioat->max_xfer_size, 1, ioat->max_xfer_size, 0, NULL, NULL,
586 	    &ioat->data_crc_tag);
587 	if (error != 0) {
588 		ioat_log_message(0, "%s: bus_dma_tag_create failed %d\n",
589 		    __func__, error);
590 		return (error);
591 	}
592 
593 	error = bus_dma_tag_create(bus_get_dma_tag(ioat->device),
594 	    1, 0, BUS_SPACE_MAXADDR_48BIT, BUS_SPACE_MAXADDR, NULL, NULL,
595 	    ioat->max_xfer_size, 1, ioat->max_xfer_size, 0, NULL, NULL,
596 	    &ioat->data_tag);
597 	if (error != 0) {
598 		ioat_log_message(0, "%s: bus_dma_tag_create failed %d\n",
599 		    __func__, error);
600 		return (error);
601 	}
602 	ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
603 	    M_ZERO | M_WAITOK);
604 
605 	ring = ioat->ring;
606 	for (i = 0; i < num_descriptors; i++) {
607 		memset(&ring[i].bus_dmadesc, 0, sizeof(ring[i].bus_dmadesc));
608 		ring[i].id = i;
609 		error = bus_dmamap_create(ioat->data_tag, 0,
610                     &ring[i].src_dmamap);
611 		if (error != 0) {
612 			ioat_log_message(0,
613 			    "%s: bus_dmamap_create failed %d\n", __func__,
614 			    error);
615 			return (error);
616 		}
617 		error = bus_dmamap_create(ioat->data_tag, 0,
618                     &ring[i].dst_dmamap);
619 		if (error != 0) {
620 			ioat_log_message(0,
621 			    "%s: bus_dmamap_create failed %d\n", __func__,
622 			    error);
623 			return (error);
624 		}
625 		error = bus_dmamap_create(ioat->data_tag, 0,
626                     &ring[i].src2_dmamap);
627 		if (error != 0) {
628 			ioat_log_message(0,
629 			    "%s: bus_dmamap_create failed %d\n", __func__,
630 			    error);
631 			return (error);
632 		}
633 		error = bus_dmamap_create(ioat->data_tag, 0,
634                     &ring[i].dst2_dmamap);
635 		if (error != 0) {
636 			ioat_log_message(0,
637 			    "%s: bus_dmamap_create failed %d\n", __func__,
638 			    error);
639 			return (error);
640 		}
641 		error = bus_dmamap_create(ioat->data_crc_tag, 0,
642                     &ring[i].crc_dmamap);
643 		if (error != 0) {
644 			ioat_log_message(0,
645 			    "%s: bus_dmamap_create failed %d\n", __func__,
646 			    error);
647 			return (error);
648 		}
649 	}
650 
651 	for (i = 0; i < num_descriptors; i++) {
652 		dma_hw_desc = &ioat->hw_desc_ring[i].dma;
653 		dma_hw_desc->next = RING_PHYS_ADDR(ioat, i + 1);
654 	}
655 
656 	ioat->head = 0;
657 	ioat->tail = 0;
658 	ioat->last_seen = 0;
659 	*ioat->comp_update = 0;
660 	return (0);
661 }
662 
663 static int
664 ioat_map_pci_bar(struct ioat_softc *ioat)
665 {
666 
667 	ioat->pci_resource_id = PCIR_BAR(0);
668 	ioat->pci_resource = bus_alloc_resource_any(ioat->device,
669 	    SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE);
670 
671 	if (ioat->pci_resource == NULL) {
672 		ioat_log_message(0, "unable to allocate pci resource\n");
673 		return (ENODEV);
674 	}
675 
676 	ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
677 	ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
678 	return (0);
679 }
680 
681 static void
682 ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
683 {
684 	struct ioat_softc *ioat = arg;
685 
686 	KASSERT(error == 0, ("%s: error:%d", __func__, error));
687 	ioat->comp_update_bus_addr = seg[0].ds_addr;
688 }
689 
690 static void
691 ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
692 {
693 	bus_addr_t *baddr;
694 
695 	KASSERT(error == 0, ("%s: error:%d", __func__, error));
696 	baddr = arg;
697 	*baddr = segs->ds_addr;
698 }
699 
700 /*
701  * Interrupt setup and handlers
702  */
703 static int
704 ioat_setup_intr(struct ioat_softc *ioat)
705 {
706 	uint32_t num_vectors;
707 	int error;
708 	boolean_t use_msix;
709 	boolean_t force_legacy_interrupts;
710 
711 	use_msix = FALSE;
712 	force_legacy_interrupts = FALSE;
713 
714 	if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
715 		num_vectors = 1;
716 		pci_alloc_msix(ioat->device, &num_vectors);
717 		if (num_vectors == 1)
718 			use_msix = TRUE;
719 	}
720 
721 	if (use_msix) {
722 		ioat->rid = 1;
723 		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
724 		    &ioat->rid, RF_ACTIVE);
725 	} else {
726 		ioat->rid = 0;
727 		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
728 		    &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
729 	}
730 	if (ioat->res == NULL) {
731 		ioat_log_message(0, "bus_alloc_resource failed\n");
732 		return (ENOMEM);
733 	}
734 
735 	ioat->tag = NULL;
736 	error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
737 	    INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
738 	if (error != 0) {
739 		ioat_log_message(0, "bus_setup_intr failed\n");
740 		return (error);
741 	}
742 
743 	ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
744 	return (0);
745 }
746 
747 static boolean_t
748 ioat_model_resets_msix(struct ioat_softc *ioat)
749 {
750 	u_int32_t pciid;
751 
752 	pciid = pci_get_devid(ioat->device);
753 	switch (pciid) {
754 		/* BWD: */
755 	case 0x0c508086:
756 	case 0x0c518086:
757 	case 0x0c528086:
758 	case 0x0c538086:
759 		/* BDXDE: */
760 	case 0x6f508086:
761 	case 0x6f518086:
762 	case 0x6f528086:
763 	case 0x6f538086:
764 		return (TRUE);
765 	}
766 
767 	return (FALSE);
768 }
769 
770 static void
771 ioat_interrupt_handler(void *arg)
772 {
773 	struct ioat_softc *ioat = arg;
774 
775 	ioat->stats.interrupts++;
776 	ioat_process_events(ioat, TRUE);
777 }
778 
779 static int
780 chanerr_to_errno(uint32_t chanerr)
781 {
782 
783 	if (chanerr == 0)
784 		return (0);
785 	if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0)
786 		return (EFAULT);
787 	if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0)
788 		return (EIO);
789 	/* This one is probably our fault: */
790 	if ((chanerr & IOAT_CHANERR_NDADDERR) != 0)
791 		return (EIO);
792 	return (EIO);
793 }
794 
795 static void
796 ioat_process_events(struct ioat_softc *ioat, boolean_t intr)
797 {
798 	struct ioat_descriptor *desc;
799 	struct bus_dmadesc *dmadesc;
800 	uint64_t comp_update, status;
801 	uint32_t completed, chanerr;
802 	int error;
803 
804 	mtx_lock(&ioat->cleanup_lock);
805 
806 	/*
807 	 * Don't run while the hardware is being reset.  Reset is responsible
808 	 * for blocking new work and draining & completing existing work, so
809 	 * there is nothing to do until new work is queued after reset anyway.
810 	 */
811 	if (ioat->resetting_cleanup) {
812 		mtx_unlock(&ioat->cleanup_lock);
813 		return;
814 	}
815 
816 	completed = 0;
817 	comp_update = *ioat->comp_update;
818 	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
819 
820 	if (status < ioat->hw_desc_bus_addr ||
821 	    status >= ioat->hw_desc_bus_addr + (1 << ioat->ring_size_order) *
822 	    sizeof(struct ioat_generic_hw_descriptor))
823 		panic("Bogus completion address %jx (channel %u)",
824 		    (uintmax_t)status, ioat->chan_idx);
825 
826 	if (status == ioat->last_seen) {
827 		/*
828 		 * If we landed in process_events and nothing has been
829 		 * completed, check for a timeout due to channel halt.
830 		 */
831 		goto out;
832 	}
833 	CTR4(KTR_IOAT, "%s channel=%u hw_status=0x%lx last_seen=0x%lx",
834 	    __func__, ioat->chan_idx, comp_update, ioat->last_seen);
835 
836 	while (RING_PHYS_ADDR(ioat, ioat->tail - 1) != status) {
837 		desc = ioat_get_ring_entry(ioat, ioat->tail);
838 		dmadesc = &desc->bus_dmadesc;
839 		CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) ok  cb %p(%p)",
840 		    ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn,
841 		    dmadesc->callback_arg);
842 
843 		bus_dmamap_unload(ioat->data_tag, desc->src_dmamap);
844 		bus_dmamap_unload(ioat->data_tag, desc->dst_dmamap);
845 		bus_dmamap_unload(ioat->data_tag, desc->src2_dmamap);
846 		bus_dmamap_unload(ioat->data_tag, desc->dst2_dmamap);
847 		bus_dmamap_unload(ioat->data_crc_tag, desc->crc_dmamap);
848 
849 		if (dmadesc->callback_fn != NULL)
850 			dmadesc->callback_fn(dmadesc->callback_arg, 0);
851 
852 		completed++;
853 		ioat->tail++;
854 	}
855 	CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__,
856 	    ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat));
857 
858 	if (completed != 0) {
859 		ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1);
860 		ioat->stats.descriptors_processed += completed;
861 		wakeup(&ioat->tail);
862 	}
863 
864 out:
865 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
866 	mtx_unlock(&ioat->cleanup_lock);
867 
868 	/*
869 	 * The device doesn't seem to reliably push suspend/halt statuses to
870 	 * the channel completion memory address, so poll the device register
871 	 * here.  For performance reasons skip it on interrupts, do it only
872 	 * on much more rare polling events.
873 	 */
874 	if (!intr)
875 		comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
876 	if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update))
877 		return;
878 
879 	ioat->stats.channel_halts++;
880 
881 	/*
882 	 * Fatal programming error on this DMA channel.  Flush any outstanding
883 	 * work with error status and restart the engine.
884 	 */
885 	mtx_lock(&ioat->submit_lock);
886 	ioat->quiescing = TRUE;
887 	mtx_unlock(&ioat->submit_lock);
888 
889 	/*
890 	 * This is safe to do here because the submit queue is quiesced.  We
891 	 * know that we will drain all outstanding events, so ioat_reset_hw
892 	 * can't deadlock. It is necessary to protect other ioat_process_event
893 	 * threads from racing ioat_reset_hw, reading an indeterminate hw
894 	 * state, and attempting to continue issuing completions.
895 	 */
896 	mtx_lock(&ioat->cleanup_lock);
897 	ioat->resetting_cleanup = TRUE;
898 
899 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
900 	if (1 <= g_ioat_debug_level)
901 		ioat_halted_debug(ioat, chanerr);
902 	ioat->stats.last_halt_chanerr = chanerr;
903 
904 	while (ioat_get_active(ioat) > 0) {
905 		desc = ioat_get_ring_entry(ioat, ioat->tail);
906 		dmadesc = &desc->bus_dmadesc;
907 		CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) err cb %p(%p)",
908 		    ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn,
909 		    dmadesc->callback_arg);
910 
911 		if (dmadesc->callback_fn != NULL)
912 			dmadesc->callback_fn(dmadesc->callback_arg,
913 			    chanerr_to_errno(chanerr));
914 
915 		ioat->tail++;
916 		ioat->stats.descriptors_processed++;
917 		ioat->stats.descriptors_error++;
918 	}
919 	CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__,
920 	    ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat));
921 
922 	/* Clear error status */
923 	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
924 
925 	mtx_unlock(&ioat->cleanup_lock);
926 
927 	ioat_log_message(0, "Resetting channel to recover from error\n");
928 	error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task);
929 	KASSERT(error == 0,
930 	    ("%s: taskqueue_enqueue failed: %d", __func__, error));
931 }
932 
933 static void
934 ioat_reset_hw_task(void *ctx, int pending __unused)
935 {
936 	struct ioat_softc *ioat;
937 	int error;
938 
939 	ioat = ctx;
940 	ioat_log_message(1, "%s: Resetting channel\n", __func__);
941 
942 	error = ioat_reset_hw(ioat);
943 	KASSERT(error == 0, ("%s: reset failed: %d", __func__, error));
944 	(void)error;
945 }
946 
947 /*
948  * User API functions
949  */
950 unsigned
951 ioat_get_nchannels(void)
952 {
953 
954 	return (ioat_channel_index);
955 }
956 
957 bus_dmaengine_t
958 ioat_get_dmaengine(uint32_t index, int flags)
959 {
960 	struct ioat_softc *ioat;
961 
962 	KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0,
963 	    ("invalid flags: 0x%08x", flags));
964 	KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK),
965 	    ("invalid wait | nowait"));
966 
967 	mtx_lock(&ioat_list_mtx);
968 	if (index >= ioat_channel_index ||
969 	    (ioat = ioat_channel[index]) == NULL) {
970 		mtx_unlock(&ioat_list_mtx);
971 		return (NULL);
972 	}
973 	mtx_lock(&ioat->submit_lock);
974 	mtx_unlock(&ioat_list_mtx);
975 
976 	if (ioat->destroying) {
977 		mtx_unlock(&ioat->submit_lock);
978 		return (NULL);
979 	}
980 
981 	ioat_get(ioat);
982 	if (ioat->quiescing) {
983 		if ((flags & M_NOWAIT) != 0) {
984 			ioat_put(ioat);
985 			mtx_unlock(&ioat->submit_lock);
986 			return (NULL);
987 		}
988 
989 		while (ioat->quiescing && !ioat->destroying)
990 			msleep(&ioat->quiescing, &ioat->submit_lock, 0, "getdma", 0);
991 
992 		if (ioat->destroying) {
993 			ioat_put(ioat);
994 			mtx_unlock(&ioat->submit_lock);
995 			return (NULL);
996 		}
997 	}
998 	mtx_unlock(&ioat->submit_lock);
999 	return (&ioat->dmaengine);
1000 }
1001 
1002 void
1003 ioat_put_dmaengine(bus_dmaengine_t dmaengine)
1004 {
1005 	struct ioat_softc *ioat;
1006 
1007 	ioat = to_ioat_softc(dmaengine);
1008 	mtx_lock(&ioat->submit_lock);
1009 	ioat_put(ioat);
1010 	mtx_unlock(&ioat->submit_lock);
1011 }
1012 
1013 int
1014 ioat_get_hwversion(bus_dmaengine_t dmaengine)
1015 {
1016 	struct ioat_softc *ioat;
1017 
1018 	ioat = to_ioat_softc(dmaengine);
1019 	return (ioat->version);
1020 }
1021 
1022 size_t
1023 ioat_get_max_io_size(bus_dmaengine_t dmaengine)
1024 {
1025 	struct ioat_softc *ioat;
1026 
1027 	ioat = to_ioat_softc(dmaengine);
1028 	return (ioat->max_xfer_size);
1029 }
1030 
1031 uint32_t
1032 ioat_get_capabilities(bus_dmaengine_t dmaengine)
1033 {
1034 	struct ioat_softc *ioat;
1035 
1036 	ioat = to_ioat_softc(dmaengine);
1037 	return (ioat->capabilities);
1038 }
1039 
1040 int
1041 ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay)
1042 {
1043 	struct ioat_softc *ioat;
1044 
1045 	ioat = to_ioat_softc(dmaengine);
1046 	if (!ioat->intrdelay_supported)
1047 		return (ENODEV);
1048 	if (delay > ioat->intrdelay_max)
1049 		return (ERANGE);
1050 
1051 	ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay);
1052 	ioat->cached_intrdelay =
1053 	    ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK;
1054 	return (0);
1055 }
1056 
1057 uint16_t
1058 ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine)
1059 {
1060 	struct ioat_softc *ioat;
1061 
1062 	ioat = to_ioat_softc(dmaengine);
1063 	return (ioat->intrdelay_max);
1064 }
1065 
1066 void
1067 ioat_acquire(bus_dmaengine_t dmaengine)
1068 {
1069 	struct ioat_softc *ioat;
1070 
1071 	ioat = to_ioat_softc(dmaengine);
1072 	mtx_lock(&ioat->submit_lock);
1073 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1074 	ioat->acq_head = ioat->head;
1075 }
1076 
1077 int
1078 ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags)
1079 {
1080 	struct ioat_softc *ioat;
1081 	int error;
1082 
1083 	ioat = to_ioat_softc(dmaengine);
1084 	ioat_acquire(dmaengine);
1085 
1086 	error = ioat_reserve_space(ioat, n, mflags);
1087 	if (error != 0)
1088 		ioat_release(dmaengine);
1089 	return (error);
1090 }
1091 
1092 void
1093 ioat_release(bus_dmaengine_t dmaengine)
1094 {
1095 	struct ioat_softc *ioat;
1096 
1097 	ioat = to_ioat_softc(dmaengine);
1098 	CTR3(KTR_IOAT, "%s channel=%u dispatch1 head=%u", __func__,
1099 	    ioat->chan_idx, ioat->head);
1100 	KFAIL_POINT_CODE(DEBUG_FP, ioat_release, /* do nothing */);
1101 	CTR3(KTR_IOAT, "%s channel=%u dispatch2 head=%u", __func__,
1102 	    ioat->chan_idx, ioat->head);
1103 
1104 	if (ioat->acq_head != ioat->head) {
1105 		ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET,
1106 		    (uint16_t)ioat->head);
1107 
1108 		if (!callout_pending(&ioat->poll_timer)) {
1109 			callout_reset(&ioat->poll_timer, 1,
1110 			    ioat_poll_timer_callback, ioat);
1111 		}
1112 	}
1113 	mtx_unlock(&ioat->submit_lock);
1114 }
1115 
1116 static struct ioat_descriptor *
1117 ioat_op_generic(struct ioat_softc *ioat, uint8_t op,
1118     uint32_t size, uint64_t src, uint64_t dst,
1119     bus_dmaengine_callback_t callback_fn, void *callback_arg,
1120     uint32_t flags)
1121 {
1122 	struct ioat_generic_hw_descriptor *hw_desc;
1123 	struct ioat_descriptor *desc;
1124 	bus_dma_segment_t seg;
1125 	int mflags, nseg, error;
1126 
1127 	mtx_assert(&ioat->submit_lock, MA_OWNED);
1128 
1129 	KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0,
1130 	    ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS));
1131 	if ((flags & DMA_NO_WAIT) != 0)
1132 		mflags = M_NOWAIT;
1133 	else
1134 		mflags = M_WAITOK;
1135 
1136 	if (size > ioat->max_xfer_size) {
1137 		ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n",
1138 		    __func__, ioat->max_xfer_size, (unsigned)size);
1139 		return (NULL);
1140 	}
1141 
1142 	if (ioat_reserve_space(ioat, 1, mflags) != 0)
1143 		return (NULL);
1144 
1145 	desc = ioat_get_ring_entry(ioat, ioat->head);
1146 	hw_desc = &ioat_get_descriptor(ioat, ioat->head)->generic;
1147 
1148 	hw_desc->u.control_raw = 0;
1149 	hw_desc->u.control_generic.op = op;
1150 	hw_desc->u.control_generic.completion_update = 1;
1151 
1152 	if ((flags & DMA_INT_EN) != 0)
1153 		hw_desc->u.control_generic.int_enable = 1;
1154 	if ((flags & DMA_FENCE) != 0)
1155 		hw_desc->u.control_generic.fence = 1;
1156 
1157 	hw_desc->size = size;
1158 
1159 	if (src != 0) {
1160 		nseg = -1;
1161 		error = _bus_dmamap_load_phys(ioat->data_tag, desc->src_dmamap,
1162 		    src, size, 0, &seg, &nseg);
1163 		if (error != 0) {
1164 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1165 			    " failed %d\n", __func__, error);
1166 			return (NULL);
1167 		}
1168 		hw_desc->src_addr = seg.ds_addr;
1169 	}
1170 
1171 	if (dst != 0) {
1172 		nseg = -1;
1173 		error = _bus_dmamap_load_phys(ioat->data_tag, desc->dst_dmamap,
1174 		    dst, size, 0, &seg, &nseg);
1175 		if (error != 0) {
1176 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1177 			    " failed %d\n", __func__, error);
1178 			return (NULL);
1179 		}
1180 		hw_desc->dest_addr = seg.ds_addr;
1181 	}
1182 
1183 	desc->bus_dmadesc.callback_fn = callback_fn;
1184 	desc->bus_dmadesc.callback_arg = callback_arg;
1185 	return (desc);
1186 }
1187 
1188 struct bus_dmadesc *
1189 ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
1190     void *callback_arg, uint32_t flags)
1191 {
1192 	struct ioat_dma_hw_descriptor *hw_desc;
1193 	struct ioat_descriptor *desc;
1194 	struct ioat_softc *ioat;
1195 
1196 	ioat = to_ioat_softc(dmaengine);
1197 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1198 
1199 	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn,
1200 	    callback_arg, flags);
1201 	if (desc == NULL)
1202 		return (NULL);
1203 
1204 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1205 	hw_desc->u.control.null = 1;
1206 	ioat_submit_single(ioat);
1207 	return (&desc->bus_dmadesc);
1208 }
1209 
1210 struct bus_dmadesc *
1211 ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
1212     bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
1213     void *callback_arg, uint32_t flags)
1214 {
1215 	struct ioat_dma_hw_descriptor *hw_desc;
1216 	struct ioat_descriptor *desc;
1217 	struct ioat_softc *ioat;
1218 
1219 	ioat = to_ioat_softc(dmaengine);
1220 
1221 	if (((src | dst) & (0xffffull << 48)) != 0) {
1222 		ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
1223 		    __func__);
1224 		return (NULL);
1225 	}
1226 
1227 	desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn,
1228 	    callback_arg, flags);
1229 	if (desc == NULL)
1230 		return (NULL);
1231 
1232 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1233 	if (g_ioat_debug_level >= 3)
1234 		dump_descriptor(hw_desc);
1235 
1236 	ioat_submit_single(ioat);
1237 	CTR6(KTR_IOAT, "%s channel=%u desc=%p dest=%lx src=%lx len=%lx",
1238 	    __func__, ioat->chan_idx, &desc->bus_dmadesc, dst, src, len);
1239 	return (&desc->bus_dmadesc);
1240 }
1241 
1242 struct bus_dmadesc *
1243 ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1,
1244     bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
1245     bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1246 {
1247 	struct ioat_dma_hw_descriptor *hw_desc;
1248 	struct ioat_descriptor *desc;
1249 	struct ioat_softc *ioat;
1250 	bus_size_t src1_len, dst1_len;
1251 	bus_dma_segment_t seg;
1252 	int nseg, error;
1253 
1254 	ioat = to_ioat_softc(dmaengine);
1255 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1256 
1257 	if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) {
1258 		ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
1259 		    __func__);
1260 		return (NULL);
1261 	}
1262 	if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) {
1263 		ioat_log_message(0, "%s: Addresses must be page-aligned\n",
1264 		    __func__);
1265 		return (NULL);
1266 	}
1267 
1268 	desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, 0, 0,
1269 	    callback_fn, callback_arg, flags);
1270 	if (desc == NULL)
1271 		return (NULL);
1272 
1273 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma;
1274 
1275 	src1_len = (src2 != src1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE;
1276 	nseg = -1;
1277 	error = _bus_dmamap_load_phys(ioat->data_tag,
1278 	    desc->src_dmamap, src1, src1_len, 0, &seg, &nseg);
1279 	if (error != 0) {
1280 		ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1281 		    " failed %d\n", __func__, error);
1282 		return (NULL);
1283 	}
1284 	hw_desc->src_addr = seg.ds_addr;
1285 	if (src1_len != 2 * PAGE_SIZE) {
1286 		hw_desc->u.control.src_page_break = 1;
1287 		nseg = -1;
1288 		error = _bus_dmamap_load_phys(ioat->data_tag,
1289 		    desc->src2_dmamap, src2, PAGE_SIZE, 0, &seg, &nseg);
1290 		if (error != 0) {
1291 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1292 			    " failed %d\n", __func__, error);
1293 			return (NULL);
1294 		}
1295 		hw_desc->next_src_addr = seg.ds_addr;
1296 	}
1297 
1298 	dst1_len = (dst2 != dst1 + PAGE_SIZE) ? PAGE_SIZE : 2 * PAGE_SIZE;
1299 	nseg = -1;
1300 	error = _bus_dmamap_load_phys(ioat->data_tag,
1301 	    desc->dst_dmamap, dst1, dst1_len, 0, &seg, &nseg);
1302 	if (error != 0) {
1303 		ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1304 		    " failed %d\n", __func__, error);
1305 		return (NULL);
1306 	}
1307 	hw_desc->dest_addr = seg.ds_addr;
1308 	if (dst1_len != 2 * PAGE_SIZE) {
1309 		hw_desc->u.control.dest_page_break = 1;
1310 		nseg = -1;
1311 		error = _bus_dmamap_load_phys(ioat->data_tag,
1312 		    desc->dst2_dmamap, dst2, PAGE_SIZE, 0, &seg, &nseg);
1313 		if (error != 0) {
1314 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1315 			    " failed %d\n", __func__, error);
1316 			return (NULL);
1317 		}
1318 		hw_desc->next_dest_addr = seg.ds_addr;
1319 	}
1320 
1321 	if (g_ioat_debug_level >= 3)
1322 		dump_descriptor(hw_desc);
1323 
1324 	ioat_submit_single(ioat);
1325 	return (&desc->bus_dmadesc);
1326 }
1327 
1328 struct bus_dmadesc *
1329 ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src,
1330     bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
1331     bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1332 {
1333 	struct ioat_crc32_hw_descriptor *hw_desc;
1334 	struct ioat_descriptor *desc;
1335 	struct ioat_softc *ioat;
1336 	uint32_t teststore;
1337 	uint8_t op;
1338 	bus_dma_segment_t seg;
1339 	int nseg, error;
1340 
1341 	ioat = to_ioat_softc(dmaengine);
1342 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1343 
1344 	if ((ioat->capabilities & IOAT_DMACAP_MOVECRC) == 0) {
1345 		ioat_log_message(0, "%s: Device lacks MOVECRC capability\n",
1346 		    __func__);
1347 		return (NULL);
1348 	}
1349 	if (((src | dst) & (0xffffffull << 40)) != 0) {
1350 		ioat_log_message(0, "%s: High 24 bits of src/dst invalid\n",
1351 		    __func__);
1352 		return (NULL);
1353 	}
1354 	teststore = (flags & _DMA_CRC_TESTSTORE);
1355 	if (teststore == _DMA_CRC_TESTSTORE) {
1356 		ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
1357 		return (NULL);
1358 	}
1359 	if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
1360 		ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
1361 		    __func__);
1362 		return (NULL);
1363 	}
1364 
1365 	switch (teststore) {
1366 	case DMA_CRC_STORE:
1367 		op = IOAT_OP_MOVECRC_STORE;
1368 		break;
1369 	case DMA_CRC_TEST:
1370 		op = IOAT_OP_MOVECRC_TEST;
1371 		break;
1372 	default:
1373 		KASSERT(teststore == 0, ("bogus"));
1374 		op = IOAT_OP_MOVECRC;
1375 		break;
1376 	}
1377 
1378 	if ((flags & DMA_CRC_INLINE) == 0 &&
1379 	    (crcptr & (0xffffffull << 40)) != 0) {
1380 		ioat_log_message(0,
1381 		    "%s: High 24 bits of crcptr invalid\n", __func__);
1382 		return (NULL);
1383 	}
1384 
1385 	desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn,
1386 	    callback_arg, flags & ~_DMA_CRC_FLAGS);
1387 	if (desc == NULL)
1388 		return (NULL);
1389 
1390 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32;
1391 
1392 	if ((flags & DMA_CRC_INLINE) == 0) {
1393 		nseg = -1;
1394 		error = _bus_dmamap_load_phys(ioat->data_crc_tag,
1395 		    desc->crc_dmamap, crcptr, sizeof(uint32_t), 0,
1396 		    &seg, &nseg);
1397 		if (error != 0) {
1398 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1399 			    " failed %d\n", __func__, error);
1400 			return (NULL);
1401 		}
1402 		hw_desc->crc_address = seg.ds_addr;
1403 	} else
1404 		hw_desc->u.control.crc_location = 1;
1405 
1406 	if (initialseed != NULL) {
1407 		hw_desc->u.control.use_seed = 1;
1408 		hw_desc->seed = *initialseed;
1409 	}
1410 
1411 	if (g_ioat_debug_level >= 3)
1412 		dump_descriptor(hw_desc);
1413 
1414 	ioat_submit_single(ioat);
1415 	return (&desc->bus_dmadesc);
1416 }
1417 
1418 struct bus_dmadesc *
1419 ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len,
1420     uint32_t *initialseed, bus_addr_t crcptr,
1421     bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
1422 {
1423 	struct ioat_crc32_hw_descriptor *hw_desc;
1424 	struct ioat_descriptor *desc;
1425 	struct ioat_softc *ioat;
1426 	uint32_t teststore;
1427 	uint8_t op;
1428 	bus_dma_segment_t seg;
1429 	int nseg, error;
1430 
1431 	ioat = to_ioat_softc(dmaengine);
1432 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1433 
1434 	if ((ioat->capabilities & IOAT_DMACAP_CRC) == 0) {
1435 		ioat_log_message(0, "%s: Device lacks CRC capability\n",
1436 		    __func__);
1437 		return (NULL);
1438 	}
1439 	if ((src & (0xffffffull << 40)) != 0) {
1440 		ioat_log_message(0, "%s: High 24 bits of src invalid\n",
1441 		    __func__);
1442 		return (NULL);
1443 	}
1444 	teststore = (flags & _DMA_CRC_TESTSTORE);
1445 	if (teststore == _DMA_CRC_TESTSTORE) {
1446 		ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
1447 		return (NULL);
1448 	}
1449 	if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
1450 		ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
1451 		    __func__);
1452 		return (NULL);
1453 	}
1454 
1455 	switch (teststore) {
1456 	case DMA_CRC_STORE:
1457 		op = IOAT_OP_CRC_STORE;
1458 		break;
1459 	case DMA_CRC_TEST:
1460 		op = IOAT_OP_CRC_TEST;
1461 		break;
1462 	default:
1463 		KASSERT(teststore == 0, ("bogus"));
1464 		op = IOAT_OP_CRC;
1465 		break;
1466 	}
1467 
1468 	if ((flags & DMA_CRC_INLINE) == 0 &&
1469 	    (crcptr & (0xffffffull << 40)) != 0) {
1470 		ioat_log_message(0,
1471 		    "%s: High 24 bits of crcptr invalid\n", __func__);
1472 		return (NULL);
1473 	}
1474 
1475 	desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn,
1476 	    callback_arg, flags & ~_DMA_CRC_FLAGS);
1477 	if (desc == NULL)
1478 		return (NULL);
1479 
1480 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32;
1481 
1482 	if ((flags & DMA_CRC_INLINE) == 0) {
1483 		nseg = -1;
1484 		error = _bus_dmamap_load_phys(ioat->data_crc_tag,
1485 		    desc->crc_dmamap, crcptr, sizeof(uint32_t), 0,
1486 		    &seg, &nseg);
1487 		if (error != 0) {
1488 			ioat_log_message(0, "%s: _bus_dmamap_load_phys"
1489 			    " failed %d\n", __func__, error);
1490 			return (NULL);
1491 		}
1492 		hw_desc->crc_address = seg.ds_addr;
1493 	} else
1494 		hw_desc->u.control.crc_location = 1;
1495 
1496 	if (initialseed != NULL) {
1497 		hw_desc->u.control.use_seed = 1;
1498 		hw_desc->seed = *initialseed;
1499 	}
1500 
1501 	if (g_ioat_debug_level >= 3)
1502 		dump_descriptor(hw_desc);
1503 
1504 	ioat_submit_single(ioat);
1505 	return (&desc->bus_dmadesc);
1506 }
1507 
1508 struct bus_dmadesc *
1509 ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern,
1510     bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg,
1511     uint32_t flags)
1512 {
1513 	struct ioat_fill_hw_descriptor *hw_desc;
1514 	struct ioat_descriptor *desc;
1515 	struct ioat_softc *ioat;
1516 
1517 	ioat = to_ioat_softc(dmaengine);
1518 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1519 
1520 	if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) {
1521 		ioat_log_message(0, "%s: Device lacks BFILL capability\n",
1522 		    __func__);
1523 		return (NULL);
1524 	}
1525 
1526 	if ((dst & (0xffffull << 48)) != 0) {
1527 		ioat_log_message(0, "%s: High 16 bits of dst invalid\n",
1528 		    __func__);
1529 		return (NULL);
1530 	}
1531 
1532 	desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, 0, dst,
1533 	    callback_fn, callback_arg, flags);
1534 	if (desc == NULL)
1535 		return (NULL);
1536 
1537 	hw_desc = &ioat_get_descriptor(ioat, desc->id)->fill;
1538 	hw_desc->src_data = fillpattern;
1539 	if (g_ioat_debug_level >= 3)
1540 		dump_descriptor(hw_desc);
1541 
1542 	ioat_submit_single(ioat);
1543 	return (&desc->bus_dmadesc);
1544 }
1545 
1546 /*
1547  * Ring Management
1548  */
1549 static inline uint32_t
1550 ioat_get_active(struct ioat_softc *ioat)
1551 {
1552 
1553 	return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
1554 }
1555 
1556 static inline uint32_t
1557 ioat_get_ring_space(struct ioat_softc *ioat)
1558 {
1559 
1560 	return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
1561 }
1562 
1563 /*
1564  * Reserves space in this IOAT descriptor ring by ensuring enough slots remain
1565  * for 'num_descs'.
1566  *
1567  * If mflags contains M_WAITOK, blocks until enough space is available.
1568  *
1569  * Returns zero on success, or an errno on error.  If num_descs is beyond the
1570  * maximum ring size, returns EINVAl; if allocation would block and mflags
1571  * contains M_NOWAIT, returns EAGAIN.
1572  *
1573  * Must be called with the submit_lock held; returns with the lock held.  The
1574  * lock may be dropped to allocate the ring.
1575  *
1576  * (The submit_lock is needed to add any entries to the ring, so callers are
1577  * assured enough room is available.)
1578  */
1579 static int
1580 ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags)
1581 {
1582 	boolean_t dug;
1583 	int error;
1584 
1585 	mtx_assert(&ioat->submit_lock, MA_OWNED);
1586 	error = 0;
1587 	dug = FALSE;
1588 
1589 	if (num_descs < 1 || num_descs >= (1 << ioat->ring_size_order)) {
1590 		error = EINVAL;
1591 		goto out;
1592 	}
1593 
1594 	for (;;) {
1595 		if (ioat->quiescing) {
1596 			error = ENXIO;
1597 			goto out;
1598 		}
1599 
1600 		if (ioat_get_ring_space(ioat) >= num_descs)
1601 			goto out;
1602 
1603 		CTR3(KTR_IOAT, "%s channel=%u starved (%u)", __func__,
1604 		    ioat->chan_idx, num_descs);
1605 
1606 		if (!dug && !ioat->is_submitter_processing) {
1607 			ioat->is_submitter_processing = TRUE;
1608 			mtx_unlock(&ioat->submit_lock);
1609 
1610 			CTR2(KTR_IOAT, "%s channel=%u attempting to process events",
1611 			    __func__, ioat->chan_idx);
1612 			ioat_process_events(ioat, FALSE);
1613 
1614 			mtx_lock(&ioat->submit_lock);
1615 			dug = TRUE;
1616 			KASSERT(ioat->is_submitter_processing == TRUE,
1617 			    ("is_submitter_processing"));
1618 			ioat->is_submitter_processing = FALSE;
1619 			wakeup(&ioat->tail);
1620 			continue;
1621 		}
1622 
1623 		if ((mflags & M_WAITOK) == 0) {
1624 			error = EAGAIN;
1625 			break;
1626 		}
1627 		CTR2(KTR_IOAT, "%s channel=%u blocking on completions",
1628 		    __func__, ioat->chan_idx);
1629 		msleep(&ioat->tail, &ioat->submit_lock, 0,
1630 		    "ioat_full", 0);
1631 		continue;
1632 	}
1633 
1634 out:
1635 	mtx_assert(&ioat->submit_lock, MA_OWNED);
1636 	KASSERT(!ioat->quiescing || error == ENXIO,
1637 	    ("reserved during quiesce"));
1638 	return (error);
1639 }
1640 
1641 static void
1642 ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
1643     struct ioat_descriptor *ring)
1644 {
1645 
1646 	free(ring, M_IOAT);
1647 }
1648 
1649 static struct ioat_descriptor *
1650 ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
1651 {
1652 
1653 	return (&ioat->ring[index % (1 << ioat->ring_size_order)]);
1654 }
1655 
1656 static union ioat_hw_descriptor *
1657 ioat_get_descriptor(struct ioat_softc *ioat, uint32_t index)
1658 {
1659 
1660 	return (&ioat->hw_desc_ring[index % (1 << ioat->ring_size_order)]);
1661 }
1662 
1663 static void
1664 ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr)
1665 {
1666 	union ioat_hw_descriptor *desc;
1667 
1668 	ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr,
1669 	    IOAT_CHANERR_STR);
1670 	if (chanerr == 0)
1671 		return;
1672 
1673 	mtx_assert(&ioat->cleanup_lock, MA_OWNED);
1674 
1675 	desc = ioat_get_descriptor(ioat, ioat->tail + 0);
1676 	dump_descriptor(desc);
1677 
1678 	desc = ioat_get_descriptor(ioat, ioat->tail + 1);
1679 	dump_descriptor(desc);
1680 }
1681 
1682 static void
1683 ioat_poll_timer_callback(void *arg)
1684 {
1685 	struct ioat_softc *ioat;
1686 
1687 	ioat = arg;
1688 	ioat_log_message(3, "%s\n", __func__);
1689 
1690 	ioat_process_events(ioat, FALSE);
1691 
1692 	mtx_lock(&ioat->submit_lock);
1693 	if (ioat_get_active(ioat) > 0)
1694 		callout_schedule(&ioat->poll_timer, 1);
1695 	mtx_unlock(&ioat->submit_lock);
1696 }
1697 
1698 /*
1699  * Support Functions
1700  */
1701 static void
1702 ioat_submit_single(struct ioat_softc *ioat)
1703 {
1704 
1705 	mtx_assert(&ioat->submit_lock, MA_OWNED);
1706 
1707 	ioat->head++;
1708 	CTR4(KTR_IOAT, "%s channel=%u head=%u tail=%u", __func__,
1709 	    ioat->chan_idx, ioat->head, ioat->tail);
1710 
1711 	ioat->stats.descriptors_submitted++;
1712 }
1713 
1714 static int
1715 ioat_reset_hw(struct ioat_softc *ioat)
1716 {
1717 	uint64_t status;
1718 	uint32_t chanerr;
1719 	unsigned timeout;
1720 	int error;
1721 
1722 	CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx);
1723 
1724 	mtx_lock(&ioat->submit_lock);
1725 	while (ioat->resetting && !ioat->destroying)
1726 		msleep(&ioat->resetting, &ioat->submit_lock, 0, "IRH_drain", 0);
1727 	if (ioat->destroying) {
1728 		mtx_unlock(&ioat->submit_lock);
1729 		return (ENXIO);
1730 	}
1731 	ioat->resetting = TRUE;
1732 	ioat->quiescing = TRUE;
1733 	mtx_unlock(&ioat->submit_lock);
1734 	mtx_lock(&ioat->cleanup_lock);
1735 	while (ioat_get_active(ioat) > 0)
1736 		msleep(&ioat->tail, &ioat->cleanup_lock, 0, "ioat_drain", 1);
1737 
1738 	/*
1739 	 * Suspend ioat_process_events while the hardware and softc are in an
1740 	 * indeterminate state.
1741 	 */
1742 	ioat->resetting_cleanup = TRUE;
1743 	mtx_unlock(&ioat->cleanup_lock);
1744 
1745 	CTR2(KTR_IOAT, "%s channel=%u quiesced and drained", __func__,
1746 	    ioat->chan_idx);
1747 
1748 	status = ioat_get_chansts(ioat);
1749 	if (is_ioat_active(status) || is_ioat_idle(status))
1750 		ioat_suspend(ioat);
1751 
1752 	/* Wait at most 20 ms */
1753 	for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
1754 	    timeout < 20; timeout++) {
1755 		DELAY(1000);
1756 		status = ioat_get_chansts(ioat);
1757 	}
1758 	if (timeout == 20) {
1759 		error = ETIMEDOUT;
1760 		goto out;
1761 	}
1762 
1763 	KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce"));
1764 
1765 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1766 	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
1767 
1768 	CTR2(KTR_IOAT, "%s channel=%u hardware suspended", __func__,
1769 	    ioat->chan_idx);
1770 
1771 	/*
1772 	 * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
1773 	 *  that can cause stability issues for IOAT v3.
1774 	 */
1775 	pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
1776 	    4);
1777 	chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
1778 	pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
1779 
1780 	/*
1781 	 * BDXDE and BWD models reset MSI-X registers on device reset.
1782 	 * Save/restore their contents manually.
1783 	 */
1784 	if (ioat_model_resets_msix(ioat)) {
1785 		ioat_log_message(1, "device resets MSI-X registers; saving\n");
1786 		pci_save_state(ioat->device);
1787 	}
1788 
1789 	ioat_reset(ioat);
1790 	CTR2(KTR_IOAT, "%s channel=%u hardware reset", __func__,
1791 	    ioat->chan_idx);
1792 
1793 	/* Wait at most 20 ms */
1794 	for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
1795 		DELAY(1000);
1796 	if (timeout == 20) {
1797 		error = ETIMEDOUT;
1798 		goto out;
1799 	}
1800 
1801 	if (ioat_model_resets_msix(ioat)) {
1802 		ioat_log_message(1, "device resets registers; restored\n");
1803 		pci_restore_state(ioat->device);
1804 	}
1805 
1806 	/* Reset attempts to return the hardware to "halted." */
1807 	status = ioat_get_chansts(ioat);
1808 	if (is_ioat_active(status) || is_ioat_idle(status)) {
1809 		/* So this really shouldn't happen... */
1810 		ioat_log_message(0, "Device is active after a reset?\n");
1811 		ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1812 		error = 0;
1813 		goto out;
1814 	}
1815 
1816 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
1817 	if (chanerr != 0) {
1818 		mtx_lock(&ioat->cleanup_lock);
1819 		ioat_halted_debug(ioat, chanerr);
1820 		mtx_unlock(&ioat->cleanup_lock);
1821 		error = EIO;
1822 		goto out;
1823 	}
1824 
1825 	/*
1826 	 * Bring device back online after reset.  Writing CHAINADDR brings the
1827 	 * device back to active.
1828 	 *
1829 	 * The internal ring counter resets to zero, so we have to start over
1830 	 * at zero as well.
1831 	 */
1832 	ioat->tail = ioat->head = 0;
1833 	ioat->last_seen = 0;
1834 	*ioat->comp_update = 0;
1835 
1836 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
1837 	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
1838 	ioat_write_chainaddr(ioat, RING_PHYS_ADDR(ioat, 0));
1839 	error = 0;
1840 	CTR2(KTR_IOAT, "%s channel=%u configured channel", __func__,
1841 	    ioat->chan_idx);
1842 
1843 out:
1844 	/* Enqueues a null operation and ensures it completes. */
1845 	if (error == 0) {
1846 		error = ioat_start_channel(ioat);
1847 		CTR2(KTR_IOAT, "%s channel=%u started channel", __func__,
1848 		    ioat->chan_idx);
1849 	}
1850 
1851 	/*
1852 	 * Resume completions now that ring state is consistent.
1853 	 */
1854 	mtx_lock(&ioat->cleanup_lock);
1855 	ioat->resetting_cleanup = FALSE;
1856 	mtx_unlock(&ioat->cleanup_lock);
1857 
1858 	/* Unblock submission of new work */
1859 	mtx_lock(&ioat->submit_lock);
1860 	ioat->quiescing = FALSE;
1861 	wakeup(&ioat->quiescing);
1862 
1863 	ioat->resetting = FALSE;
1864 	wakeup(&ioat->resetting);
1865 
1866 	CTR2(KTR_IOAT, "%s channel=%u reset done", __func__, ioat->chan_idx);
1867 	mtx_unlock(&ioat->submit_lock);
1868 
1869 	return (error);
1870 }
1871 
1872 static int
1873 sysctl_handle_chansts(SYSCTL_HANDLER_ARGS)
1874 {
1875 	struct ioat_softc *ioat;
1876 	struct sbuf sb;
1877 	uint64_t status;
1878 	int error;
1879 
1880 	ioat = arg1;
1881 
1882 	status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
1883 
1884 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
1885 	switch (status) {
1886 	case IOAT_CHANSTS_ACTIVE:
1887 		sbuf_printf(&sb, "ACTIVE");
1888 		break;
1889 	case IOAT_CHANSTS_IDLE:
1890 		sbuf_printf(&sb, "IDLE");
1891 		break;
1892 	case IOAT_CHANSTS_SUSPENDED:
1893 		sbuf_printf(&sb, "SUSPENDED");
1894 		break;
1895 	case IOAT_CHANSTS_HALTED:
1896 		sbuf_printf(&sb, "HALTED");
1897 		break;
1898 	case IOAT_CHANSTS_ARMED:
1899 		sbuf_printf(&sb, "ARMED");
1900 		break;
1901 	default:
1902 		sbuf_printf(&sb, "UNKNOWN");
1903 		break;
1904 	}
1905 	error = sbuf_finish(&sb);
1906 	sbuf_delete(&sb);
1907 
1908 	if (error != 0 || req->newptr == NULL)
1909 		return (error);
1910 	return (EINVAL);
1911 }
1912 
1913 static int
1914 sysctl_handle_dpi(SYSCTL_HANDLER_ARGS)
1915 {
1916 	struct ioat_softc *ioat;
1917 	struct sbuf sb;
1918 #define	PRECISION	"1"
1919 	const uintmax_t factor = 10;
1920 	uintmax_t rate;
1921 	int error;
1922 
1923 	ioat = arg1;
1924 	sbuf_new_for_sysctl(&sb, NULL, 16, req);
1925 
1926 	if (ioat->stats.interrupts == 0) {
1927 		sbuf_printf(&sb, "NaN");
1928 		goto out;
1929 	}
1930 	rate = ioat->stats.descriptors_processed * factor /
1931 	    ioat->stats.interrupts;
1932 	sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor,
1933 	    rate % factor);
1934 #undef	PRECISION
1935 out:
1936 	error = sbuf_finish(&sb);
1937 	sbuf_delete(&sb);
1938 	if (error != 0 || req->newptr == NULL)
1939 		return (error);
1940 	return (EINVAL);
1941 }
1942 
1943 static int
1944 sysctl_handle_reset(SYSCTL_HANDLER_ARGS)
1945 {
1946 	struct ioat_softc *ioat;
1947 	int error, arg;
1948 
1949 	ioat = arg1;
1950 
1951 	arg = 0;
1952 	error = SYSCTL_OUT(req, &arg, sizeof(arg));
1953 	if (error != 0 || req->newptr == NULL)
1954 		return (error);
1955 
1956 	error = SYSCTL_IN(req, &arg, sizeof(arg));
1957 	if (error != 0)
1958 		return (error);
1959 
1960 	if (arg != 0)
1961 		error = ioat_reset_hw(ioat);
1962 
1963 	return (error);
1964 }
1965 
1966 static void
1967 dump_descriptor(void *hw_desc)
1968 {
1969 	int i, j;
1970 
1971 	for (i = 0; i < 2; i++) {
1972 		for (j = 0; j < 8; j++)
1973 			printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
1974 		printf("\n");
1975 	}
1976 }
1977 
1978 static void
1979 ioat_setup_sysctl(device_t device)
1980 {
1981 	struct sysctl_oid_list *par, *statpar, *state, *hammer;
1982 	struct sysctl_ctx_list *ctx;
1983 	struct sysctl_oid *tree, *tmp;
1984 	struct ioat_softc *ioat;
1985 
1986 	ioat = DEVICE2SOFTC(device);
1987 	ctx = device_get_sysctl_ctx(device);
1988 	tree = device_get_sysctl_tree(device);
1989 	par = SYSCTL_CHILDREN(tree);
1990 
1991 	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD,
1992 	    &ioat->version, 0, "HW version (0xMM form)");
1993 	SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD,
1994 	    &ioat->max_xfer_size, 0, "HW maximum transfer size");
1995 	SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD,
1996 	    &ioat->intrdelay_supported, 0, "Is INTRDELAY supported");
1997 	SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD,
1998 	    &ioat->intrdelay_max, 0,
1999 	    "Maximum configurable INTRDELAY on this channel (microseconds)");
2000 
2001 	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL,
2002 	    "IOAT channel internal state");
2003 	state = SYSCTL_CHILDREN(tmp);
2004 
2005 	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD,
2006 	    &ioat->ring_size_order, 0, "SW descriptor ring size order");
2007 	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head,
2008 	    0, "SW descriptor head pointer index");
2009 	SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail,
2010 	    0, "SW descriptor tail pointer index");
2011 
2012 	SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD,
2013 	    ioat->comp_update, "HW addr of last completion");
2014 
2015 	SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_submitter_processing",
2016 	    CTLFLAG_RD, &ioat->is_submitter_processing, 0,
2017 	    "submitter processing");
2018 
2019 	SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts",
2020 	    CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A",
2021 	    "String of the channel status");
2022 
2023 	SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD,
2024 	    &ioat->cached_intrdelay, 0,
2025 	    "Current INTRDELAY on this channel (cached, microseconds)");
2026 
2027 	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL,
2028 	    "Big hammers (mostly for testing)");
2029 	hammer = SYSCTL_CHILDREN(tmp);
2030 
2031 	SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset",
2032 	    CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I",
2033 	    "Set to non-zero to reset the hardware");
2034 
2035 	tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL,
2036 	    "IOAT channel statistics");
2037 	statpar = SYSCTL_CHILDREN(tmp);
2038 
2039 	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW,
2040 	    &ioat->stats.interrupts,
2041 	    "Number of interrupts processed on this channel");
2042 	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW,
2043 	    &ioat->stats.descriptors_processed,
2044 	    "Number of descriptors processed on this channel");
2045 	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW,
2046 	    &ioat->stats.descriptors_submitted,
2047 	    "Number of descriptors submitted to this channel");
2048 	SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW,
2049 	    &ioat->stats.descriptors_error,
2050 	    "Number of descriptors failed by channel errors");
2051 	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW,
2052 	    &ioat->stats.channel_halts, 0,
2053 	    "Number of times the channel has halted");
2054 	SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW,
2055 	    &ioat->stats.last_halt_chanerr, 0,
2056 	    "The raw CHANERR when the channel was last halted");
2057 
2058 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt",
2059 	    CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A",
2060 	    "Descriptors per interrupt");
2061 }
2062 
2063 static void
2064 ioat_get(struct ioat_softc *ioat)
2065 {
2066 
2067 	mtx_assert(&ioat->submit_lock, MA_OWNED);
2068 	KASSERT(ioat->refcnt < UINT32_MAX, ("refcnt overflow"));
2069 
2070 	ioat->refcnt++;
2071 }
2072 
2073 static void
2074 ioat_put(struct ioat_softc *ioat)
2075 {
2076 
2077 	mtx_assert(&ioat->submit_lock, MA_OWNED);
2078 	KASSERT(ioat->refcnt >= 1, ("refcnt error"));
2079 
2080 	if (--ioat->refcnt == 0)
2081 		wakeup(&ioat->refcnt);
2082 }
2083 
2084 static void
2085 ioat_drain_locked(struct ioat_softc *ioat)
2086 {
2087 
2088 	mtx_assert(&ioat->submit_lock, MA_OWNED);
2089 
2090 	while (ioat->refcnt > 0)
2091 		msleep(&ioat->refcnt, &ioat->submit_lock, 0, "ioat_drain", 0);
2092 }
2093 
2094 #ifdef DDB
2095 #define	_db_show_lock(lo)	LOCK_CLASS(lo)->lc_ddb_show(lo)
2096 #define	db_show_lock(lk)	_db_show_lock(&(lk)->lock_object)
2097 DB_SHOW_COMMAND(ioat, db_show_ioat)
2098 {
2099 	struct ioat_softc *sc;
2100 	unsigned idx;
2101 
2102 	if (!have_addr)
2103 		goto usage;
2104 	idx = (unsigned)addr;
2105 	if (idx >= ioat_channel_index)
2106 		goto usage;
2107 
2108 	sc = ioat_channel[idx];
2109 	db_printf("ioat softc at %p\n", sc);
2110 	if (sc == NULL)
2111 		return;
2112 
2113 	db_printf(" version: %d\n", sc->version);
2114 	db_printf(" chan_idx: %u\n", sc->chan_idx);
2115 	db_printf(" submit_lock: ");
2116 	db_show_lock(&sc->submit_lock);
2117 
2118 	db_printf(" capabilities: %b\n", (int)sc->capabilities,
2119 	    IOAT_DMACAP_STR);
2120 	db_printf(" cached_intrdelay: %u\n", sc->cached_intrdelay);
2121 	db_printf(" *comp_update: 0x%jx\n", (uintmax_t)*sc->comp_update);
2122 
2123 	db_printf(" poll_timer:\n");
2124 	db_printf("  c_time: %ju\n", (uintmax_t)sc->poll_timer.c_time);
2125 	db_printf("  c_arg: %p\n", sc->poll_timer.c_arg);
2126 	db_printf("  c_func: %p\n", sc->poll_timer.c_func);
2127 	db_printf("  c_lock: %p\n", sc->poll_timer.c_lock);
2128 	db_printf("  c_flags: 0x%x\n", (unsigned)sc->poll_timer.c_flags);
2129 
2130 	db_printf(" quiescing: %d\n", (int)sc->quiescing);
2131 	db_printf(" destroying: %d\n", (int)sc->destroying);
2132 	db_printf(" is_submitter_processing: %d\n",
2133 	    (int)sc->is_submitter_processing);
2134 	db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported);
2135 	db_printf(" resetting: %d\n", (int)sc->resetting);
2136 
2137 	db_printf(" head: %u\n", sc->head);
2138 	db_printf(" tail: %u\n", sc->tail);
2139 	db_printf(" ring_size_order: %u\n", sc->ring_size_order);
2140 	db_printf(" last_seen: 0x%lx\n", sc->last_seen);
2141 	db_printf(" ring: %p\n", sc->ring);
2142 	db_printf(" descriptors: %p\n", sc->hw_desc_ring);
2143 	db_printf(" descriptors (phys): 0x%jx\n",
2144 	    (uintmax_t)sc->hw_desc_bus_addr);
2145 
2146 	db_printf("  ring[%u] (tail):\n", sc->tail %
2147 	    (1 << sc->ring_size_order));
2148 	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->tail)->id);
2149 	db_printf("   addr: 0x%lx\n",
2150 	    RING_PHYS_ADDR(sc, sc->tail));
2151 	db_printf("   next: 0x%lx\n",
2152 	     ioat_get_descriptor(sc, sc->tail)->generic.next);
2153 
2154 	db_printf("  ring[%u] (head - 1):\n", (sc->head - 1) %
2155 	    (1 << sc->ring_size_order));
2156 	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->head - 1)->id);
2157 	db_printf("   addr: 0x%lx\n",
2158 	    RING_PHYS_ADDR(sc, sc->head - 1));
2159 	db_printf("   next: 0x%lx\n",
2160 	     ioat_get_descriptor(sc, sc->head - 1)->generic.next);
2161 
2162 	db_printf("  ring[%u] (head):\n", (sc->head) %
2163 	    (1 << sc->ring_size_order));
2164 	db_printf("   id: %u\n", ioat_get_ring_entry(sc, sc->head)->id);
2165 	db_printf("   addr: 0x%lx\n",
2166 	    RING_PHYS_ADDR(sc, sc->head));
2167 	db_printf("   next: 0x%lx\n",
2168 	     ioat_get_descriptor(sc, sc->head)->generic.next);
2169 
2170 	for (idx = 0; idx < (1 << sc->ring_size_order); idx++)
2171 		if ((*sc->comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK)
2172 		    == RING_PHYS_ADDR(sc, idx))
2173 			db_printf("  ring[%u] == hardware tail\n", idx);
2174 
2175 	db_printf(" cleanup_lock: ");
2176 	db_show_lock(&sc->cleanup_lock);
2177 
2178 	db_printf(" refcnt: %u\n", sc->refcnt);
2179 	db_printf(" stats:\n");
2180 	db_printf("  interrupts: %lu\n", sc->stats.interrupts);
2181 	db_printf("  descriptors_processed: %lu\n", sc->stats.descriptors_processed);
2182 	db_printf("  descriptors_error: %lu\n", sc->stats.descriptors_error);
2183 	db_printf("  descriptors_submitted: %lu\n", sc->stats.descriptors_submitted);
2184 
2185 	db_printf("  channel_halts: %u\n", sc->stats.channel_halts);
2186 	db_printf("  last_halt_chanerr: %u\n", sc->stats.last_halt_chanerr);
2187 
2188 	if (db_pager_quit)
2189 		return;
2190 
2191 	db_printf(" hw status:\n");
2192 	db_printf("  status: 0x%lx\n", ioat_get_chansts(sc));
2193 	db_printf("  chanctrl: 0x%x\n",
2194 	    (unsigned)ioat_read_2(sc, IOAT_CHANCTRL_OFFSET));
2195 	db_printf("  chancmd: 0x%x\n",
2196 	    (unsigned)ioat_read_1(sc, IOAT_CHANCMD_OFFSET));
2197 	db_printf("  dmacount: 0x%x\n",
2198 	    (unsigned)ioat_read_2(sc, IOAT_DMACOUNT_OFFSET));
2199 	db_printf("  chainaddr: 0x%lx\n",
2200 	    ioat_read_double_4(sc, IOAT_CHAINADDR_OFFSET_LOW));
2201 	db_printf("  chancmp: 0x%lx\n",
2202 	    ioat_read_double_4(sc, IOAT_CHANCMP_OFFSET_LOW));
2203 	db_printf("  chanerr: %b\n",
2204 	    (int)ioat_read_4(sc, IOAT_CHANERR_OFFSET), IOAT_CHANERR_STR);
2205 	return;
2206 usage:
2207 	db_printf("usage: show ioat <0-%u>\n", ioat_channel_index);
2208 	return;
2209 }
2210 #endif /* DDB */
2211