xref: /freebsd/sys/dev/ioat/ioat_internal.h (revision 69718b786d3943ea9a99eeeb5f5f6162f11c78b7)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 __FBSDID("$FreeBSD$");
28 
29 #ifndef __IOAT_INTERNAL_H__
30 #define __IOAT_INTERNAL_H__
31 
32 #include <sys/_task.h>
33 
34 #define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
35 #define	KTR_IOAT		KTR_SPARE3
36 
37 #define	ioat_read_chancnt(ioat) \
38 	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
39 
40 #define	ioat_read_xfercap(ioat) \
41 	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
42 
43 #define	ioat_write_intrctrl(ioat, value) \
44 	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
45 
46 #define	ioat_read_cbver(ioat) \
47 	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
48 
49 #define	ioat_read_dmacapability(ioat) \
50 	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
51 
52 #define	ioat_write_chanctrl(ioat, value) \
53 	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
54 
55 static __inline uint64_t
56 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
57     bus_space_handle_t handle, bus_size_t offset)
58 {
59 	return (bus_space_read_4(tag, handle, offset) |
60 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
61 }
62 
63 static __inline void
64 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
65     bus_space_handle_t handle, bus_size_t offset, uint64_t val)
66 {
67 	bus_space_write_4(tag, handle, offset, val);
68 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
69 }
70 
71 #ifdef __i386__
72 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
73 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
74 #else
75 #define ioat_bus_space_read_8(tag, handle, offset) \
76 	bus_space_read_8((tag), (handle), (offset))
77 #define ioat_bus_space_write_8(tag, handle, offset, val) \
78 	bus_space_write_8((tag), (handle), (offset), (val))
79 #endif
80 
81 #define ioat_read_1(ioat, offset) \
82 	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
83 	    (offset))
84 
85 #define ioat_read_2(ioat, offset) \
86 	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
87 	    (offset))
88 
89 #define ioat_read_4(ioat, offset) \
90 	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
91 	    (offset))
92 
93 #define ioat_read_8(ioat, offset) \
94 	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
95 	    (offset))
96 
97 #define ioat_read_double_4(ioat, offset) \
98 	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
99 	    (ioat)->pci_bus_handle, (offset))
100 
101 #define ioat_write_1(ioat, offset, value) \
102 	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
103 	    (offset), (value))
104 
105 #define ioat_write_2(ioat, offset, value) \
106 	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
107 	    (offset), (value))
108 
109 #define ioat_write_4(ioat, offset, value) \
110 	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
111 	    (offset), (value))
112 
113 #define ioat_write_8(ioat, offset, value) \
114 	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
115 	    (offset), (value))
116 
117 #define ioat_write_double_4(ioat, offset, value) \
118 	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
119 	    (ioat)->pci_bus_handle, (offset), (value))
120 
121 MALLOC_DECLARE(M_IOAT);
122 
123 SYSCTL_DECL(_hw_ioat);
124 
125 extern int g_ioat_debug_level;
126 
127 struct generic_dma_control {
128 	uint32_t int_enable:1;
129 	uint32_t src_snoop_disable:1;
130 	uint32_t dest_snoop_disable:1;
131 	uint32_t completion_update:1;
132 	uint32_t fence:1;
133 	uint32_t reserved1:1;
134 	uint32_t src_page_break:1;
135 	uint32_t dest_page_break:1;
136 	uint32_t bundle:1;
137 	uint32_t dest_dca:1;
138 	uint32_t hint:1;
139 	uint32_t reserved2:13;
140 	uint32_t op:8;
141 };
142 
143 struct ioat_generic_hw_descriptor {
144 	uint32_t size;
145 	union {
146 		uint32_t control_raw;
147 		struct generic_dma_control control_generic;
148 	} u;
149 	uint64_t src_addr;
150 	uint64_t dest_addr;
151 	uint64_t next;
152 	uint64_t reserved[4];
153 };
154 
155 struct ioat_dma_hw_descriptor {
156 	uint32_t size;
157 	union {
158 		uint32_t control_raw;
159 		struct generic_dma_control control_generic;
160 		struct {
161 			uint32_t int_enable:1;
162 			uint32_t src_snoop_disable:1;
163 			uint32_t dest_snoop_disable:1;
164 			uint32_t completion_update:1;
165 			uint32_t fence:1;
166 			uint32_t null:1;
167 			uint32_t src_page_break:1;
168 			uint32_t dest_page_break:1;
169 			uint32_t bundle:1;
170 			uint32_t dest_dca:1;
171 			uint32_t hint:1;
172 			uint32_t reserved:13;
173 			#define IOAT_OP_COPY 0x00
174 			uint32_t op:8;
175 		} control;
176 	} u;
177 	uint64_t src_addr;
178 	uint64_t dest_addr;
179 	uint64_t next;
180 	uint64_t next_src_addr;
181 	uint64_t next_dest_addr;
182 	uint64_t user1;
183 	uint64_t user2;
184 };
185 
186 struct ioat_fill_hw_descriptor {
187 	uint32_t size;
188 	union {
189 		uint32_t control_raw;
190 		struct generic_dma_control control_generic;
191 		struct {
192 			uint32_t int_enable:1;
193 			uint32_t reserved:1;
194 			uint32_t dest_snoop_disable:1;
195 			uint32_t completion_update:1;
196 			uint32_t fence:1;
197 			uint32_t reserved2:2;
198 			uint32_t dest_page_break:1;
199 			uint32_t bundle:1;
200 			uint32_t reserved3:15;
201 			#define IOAT_OP_FILL 0x01
202 			uint32_t op:8;
203 		} control;
204 	} u;
205 	uint64_t src_data;
206 	uint64_t dest_addr;
207 	uint64_t next;
208 	uint64_t reserved;
209 	uint64_t next_dest_addr;
210 	uint64_t user1;
211 	uint64_t user2;
212 };
213 
214 struct ioat_crc32_hw_descriptor {
215 	uint32_t size;
216 	union {
217 		uint32_t control_raw;
218 		struct generic_dma_control control_generic;
219 		struct {
220 			uint32_t int_enable:1;
221 			uint32_t src_snoop_disable:1;
222 			uint32_t dest_snoop_disable:1;
223 			uint32_t completion_update:1;
224 			uint32_t fence:1;
225 			uint32_t reserved1:3;
226 			uint32_t bundle:1;
227 			uint32_t dest_dca:1;
228 			uint32_t hint:1;
229 			uint32_t use_seed:1;
230 			/*
231 			 * crc_location:
232 			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
233 			 * 0: comparison value is pointed to by CRC Address
234 			 *    field.
235 			 * 1: comparison value follows data in wire format
236 			 *    ("inverted reflected bit order") in the 4 bytes
237 			 *    following the source data.
238 			 *
239 			 * For IOAT_OP_CRC_STORE:
240 			 * 0: Result will be stored at location pointed to by
241 			 *    CRC Address field (in wire format).
242 			 * 1: Result will be stored directly following the
243 			 *    source data.
244 			 *
245 			 * For IOAT_OP_MOVECRC_STORE:
246 			 * 0: Result will be stored at location pointed to by
247 			 *    CRC Address field (in wire format).
248 			 * 1: Result will be stored directly following the
249 			 *    *destination* data.
250 			 */
251 			uint32_t crc_location:1;
252 			uint32_t reserved2:11;
253 			/*
254 			 * MOVECRC - Move data in the same way as standard copy
255 			 * operation, but also compute CRC32.
256 			 *
257 			 * CRC - Only compute CRC on source data.
258 			 *
259 			 * There is a CRC accumulator register in the hardware.
260 			 * If 'initial' is set, it is initialized to the value
261 			 * in 'seed.'
262 			 *
263 			 * In all modes, these operators accumulate size bytes
264 			 * at src_addr into the running CRC32C.
265 			 *
266 			 * Store mode emits the accumulated CRC, in wire
267 			 * format, as specified by the crc_location bit above.
268 			 *
269 			 * Test mode compares the accumulated CRC against the
270 			 * reference CRC, as described in crc_location above.
271 			 * On failure, halts the DMA engine with a CRC error
272 			 * status.
273 			 */
274 			#define	IOAT_OP_MOVECRC		0x41
275 			#define	IOAT_OP_MOVECRC_TEST	0x42
276 			#define	IOAT_OP_MOVECRC_STORE	0x43
277 			#define	IOAT_OP_CRC		0x81
278 			#define	IOAT_OP_CRC_TEST	0x82
279 			#define	IOAT_OP_CRC_STORE	0x83
280 			uint32_t op:8;
281 		} control;
282 	} u;
283 	uint64_t src_addr;
284 	uint64_t dest_addr;
285 	uint64_t next;
286 	uint64_t next_src_addr;
287 	uint64_t next_dest_addr;
288 	uint32_t seed;
289 	uint32_t reserved;
290 	uint64_t crc_address;
291 };
292 
293 struct ioat_xor_hw_descriptor {
294 	uint32_t size;
295 	union {
296 		uint32_t control_raw;
297 		struct generic_dma_control control_generic;
298 		struct {
299 			uint32_t int_enable:1;
300 			uint32_t src_snoop_disable:1;
301 			uint32_t dest_snoop_disable:1;
302 			uint32_t completion_update:1;
303 			uint32_t fence:1;
304 			uint32_t src_count:3;
305 			uint32_t bundle:1;
306 			uint32_t dest_dca:1;
307 			uint32_t hint:1;
308 			uint32_t reserved:13;
309 			#define IOAT_OP_XOR 0x87
310 			#define IOAT_OP_XOR_VAL 0x88
311 			uint32_t op:8;
312 		} control;
313 	} u;
314 	uint64_t src_addr;
315 	uint64_t dest_addr;
316 	uint64_t next;
317 	uint64_t src_addr2;
318 	uint64_t src_addr3;
319 	uint64_t src_addr4;
320 	uint64_t src_addr5;
321 };
322 
323 struct ioat_xor_ext_hw_descriptor {
324 	uint64_t src_addr6;
325 	uint64_t src_addr7;
326 	uint64_t src_addr8;
327 	uint64_t next;
328 	uint64_t reserved[4];
329 };
330 
331 struct ioat_pq_hw_descriptor {
332 	uint32_t size;
333 	union {
334 		uint32_t control_raw;
335 		struct generic_dma_control control_generic;
336 		struct {
337 			uint32_t int_enable:1;
338 			uint32_t src_snoop_disable:1;
339 			uint32_t dest_snoop_disable:1;
340 			uint32_t completion_update:1;
341 			uint32_t fence:1;
342 			uint32_t src_count:3;
343 			uint32_t bundle:1;
344 			uint32_t dest_dca:1;
345 			uint32_t hint:1;
346 			uint32_t p_disable:1;
347 			uint32_t q_disable:1;
348 			uint32_t reserved:11;
349 			#define IOAT_OP_PQ 0x89
350 			#define IOAT_OP_PQ_VAL 0x8a
351 			uint32_t op:8;
352 		} control;
353 	} u;
354 	uint64_t src_addr;
355 	uint64_t p_addr;
356 	uint64_t next;
357 	uint64_t src_addr2;
358 	uint64_t src_addr3;
359 	uint8_t  coef[8];
360 	uint64_t q_addr;
361 };
362 
363 struct ioat_pq_ext_hw_descriptor {
364 	uint64_t src_addr4;
365 	uint64_t src_addr5;
366 	uint64_t src_addr6;
367 	uint64_t next;
368 	uint64_t src_addr7;
369 	uint64_t src_addr8;
370 	uint64_t reserved[2];
371 };
372 
373 struct ioat_pq_update_hw_descriptor {
374 	uint32_t size;
375 	union {
376 		uint32_t control_raw;
377 		struct generic_dma_control control_generic;
378 		struct {
379 			uint32_t int_enable:1;
380 			uint32_t src_snoop_disable:1;
381 			uint32_t dest_snoop_disable:1;
382 			uint32_t completion_update:1;
383 			uint32_t fence:1;
384 			uint32_t src_cnt:3;
385 			uint32_t bundle:1;
386 			uint32_t dest_dca:1;
387 			uint32_t hint:1;
388 			uint32_t p_disable:1;
389 			uint32_t q_disable:1;
390 			uint32_t reserved:3;
391 			uint32_t coef:8;
392 			#define IOAT_OP_PQ_UP 0x8b
393 			uint32_t op:8;
394 		} control;
395 	} u;
396 	uint64_t src_addr;
397 	uint64_t p_addr;
398 	uint64_t next;
399 	uint64_t src_addr2;
400 	uint64_t p_src;
401 	uint64_t q_src;
402 	uint64_t q_addr;
403 };
404 
405 struct ioat_raw_hw_descriptor {
406 	uint64_t field[8];
407 };
408 
409 struct bus_dmadesc {
410 	bus_dmaengine_callback_t callback_fn;
411 	void			 *callback_arg;
412 };
413 
414 struct ioat_descriptor {
415 	struct bus_dmadesc	bus_dmadesc;
416 	uint32_t		id;
417 };
418 
419 /* Unused by this driver at this time. */
420 #define	IOAT_OP_MARKER		0x84
421 
422 /*
423  * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
424  * doesn't support anything older than v3.
425  */
426 #define	IOAT_OP_OLD_XOR		0x85
427 #define	IOAT_OP_OLD_XOR_VAL	0x86
428 
429 enum ioat_ref_kind {
430 	IOAT_DMAENGINE_REF = 0,
431 	IOAT_ACTIVE_DESCR_REF,
432 	IOAT_NUM_REF_KINDS
433 };
434 
435 /* One of these per allocated PCI device. */
436 struct ioat_softc {
437 	bus_dmaengine_t		dmaengine;
438 #define	to_ioat_softc(_dmaeng)						\
439 ({									\
440 	bus_dmaengine_t *_p = (_dmaeng);				\
441 	(struct ioat_softc *)((char *)_p -				\
442 	    offsetof(struct ioat_softc, dmaengine));			\
443 })
444 
445 	int			version;
446 	unsigned		chan_idx;
447 
448 	struct mtx		submit_lock;
449 	device_t		device;
450 	bus_space_tag_t		pci_bus_tag;
451 	bus_space_handle_t	pci_bus_handle;
452 	int			pci_resource_id;
453 	struct resource		*pci_resource;
454 	uint32_t		max_xfer_size;
455 	uint32_t		capabilities;
456 	uint16_t		intrdelay_max;
457 	uint16_t		cached_intrdelay;
458 
459 	struct resource		*res;
460 	int			rid;
461 	void			*tag;
462 
463 	bus_dma_tag_t		hw_desc_tag;
464 	bus_dmamap_t		hw_desc_map;
465 
466 	bus_dma_tag_t		comp_update_tag;
467 	bus_dmamap_t		comp_update_map;
468 	uint64_t		*comp_update;
469 	bus_addr_t		comp_update_bus_addr;
470 
471 	struct callout		poll_timer;
472 	struct callout		shrink_timer;
473 	struct task		reset_task;
474 
475 	boolean_t		quiescing;
476 	boolean_t		destroying;
477 	boolean_t		is_submitter_processing;
478 	boolean_t		is_completion_pending;	/* submit_lock */
479 	boolean_t		is_reset_pending;
480 	boolean_t		is_channel_running;
481 	boolean_t		intrdelay_supported;
482 	boolean_t		resetting;		/* submit_lock */
483 	boolean_t		resetting_cleanup;	/* cleanup_lock */
484 
485 	uint32_t		head;
486 	uint32_t		acq_head;
487 	uint32_t		tail;
488 	uint32_t		hw_head;
489 	uint32_t		ring_size_order;
490 	bus_addr_t		last_seen;
491 
492 	struct ioat_descriptor	*ring;
493 
494 	union ioat_hw_descriptor {
495 		struct ioat_generic_hw_descriptor	generic;
496 		struct ioat_dma_hw_descriptor		dma;
497 		struct ioat_fill_hw_descriptor		fill;
498 		struct ioat_crc32_hw_descriptor		crc32;
499 		struct ioat_xor_hw_descriptor		xor;
500 		struct ioat_xor_ext_hw_descriptor	xor_ext;
501 		struct ioat_pq_hw_descriptor		pq;
502 		struct ioat_pq_ext_hw_descriptor	pq_ext;
503 		struct ioat_raw_hw_descriptor		raw;
504 	} *hw_desc_ring;
505 	bus_addr_t		hw_desc_bus_addr;
506 #define	RING_PHYS_ADDR(sc, i)	(sc)->hw_desc_bus_addr + \
507     (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
508 
509 	struct mtx		cleanup_lock;
510 	volatile uint32_t	refcnt;
511 #ifdef INVARIANTS
512 	volatile uint32_t	refkinds[IOAT_NUM_REF_KINDS];
513 #endif
514 
515 	struct {
516 		uint64_t	interrupts;
517 		uint64_t	descriptors_processed;
518 		uint64_t	descriptors_error;
519 		uint64_t	descriptors_submitted;
520 
521 		uint32_t	channel_halts;
522 		uint32_t	last_halt_chanerr;
523 	} stats;
524 };
525 
526 void ioat_test_attach(void);
527 void ioat_test_detach(void);
528 
529 /*
530  * XXX DO NOT USE this routine for obtaining the current completed descriptor.
531  *
532  * The double_4 read on ioat<3.3 appears to result in torn reads.  And v3.2
533  * hardware is still commonplace (Broadwell Xeon has it).  Instead, use the
534  * device-pushed *comp_update.
535  *
536  * It is safe to use ioat_get_chansts() for the low status bits.
537  */
538 static inline uint64_t
539 ioat_get_chansts(struct ioat_softc *ioat)
540 {
541 	uint64_t status;
542 
543 	if (ioat->version >= IOAT_VER_3_3)
544 		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
545 	else
546 		/* Must read lower 4 bytes before upper 4 bytes. */
547 		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
548 	return (status);
549 }
550 
551 static inline void
552 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
553 {
554 
555 	if (ioat->version >= IOAT_VER_3_3)
556 		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
557 	else
558 		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
559 }
560 
561 static inline void
562 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
563 {
564 
565 	if (ioat->version >= IOAT_VER_3_3)
566 		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
567 	else
568 		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
569 }
570 
571 static inline boolean_t
572 is_ioat_active(uint64_t status)
573 {
574 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
575 }
576 
577 static inline boolean_t
578 is_ioat_idle(uint64_t status)
579 {
580 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
581 }
582 
583 static inline boolean_t
584 is_ioat_halted(uint64_t status)
585 {
586 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
587 }
588 
589 static inline boolean_t
590 is_ioat_suspended(uint64_t status)
591 {
592 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
593 }
594 
595 static inline void
596 ioat_suspend(struct ioat_softc *ioat)
597 {
598 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
599 }
600 
601 static inline void
602 ioat_reset(struct ioat_softc *ioat)
603 {
604 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
605 }
606 
607 static inline boolean_t
608 ioat_reset_pending(struct ioat_softc *ioat)
609 {
610 	uint8_t cmd;
611 
612 	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
613 	return ((cmd & IOAT_CHANCMD_RESET) != 0);
614 }
615 
616 #endif /* __IOAT_INTERNAL_H__ */
617