xref: /freebsd/sys/dev/ioat/ioat_internal.h (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 __FBSDID("$FreeBSD$");
28 
29 #ifndef __IOAT_INTERNAL_H__
30 #define __IOAT_INTERNAL_H__
31 
32 #include <sys/_task.h>
33 
34 #define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
35 #define	KTR_IOAT		KTR_SPARE3
36 
37 #define	ioat_read_chancnt(ioat) \
38 	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
39 
40 #define	ioat_read_xfercap(ioat) \
41 	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
42 
43 #define	ioat_write_intrctrl(ioat, value) \
44 	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
45 
46 #define	ioat_read_cbver(ioat) \
47 	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
48 
49 #define	ioat_read_dmacapability(ioat) \
50 	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
51 
52 #define	ioat_write_chanctrl(ioat, value) \
53 	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
54 
55 static __inline uint64_t
56 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
57     bus_space_handle_t handle, bus_size_t offset)
58 {
59 	return (bus_space_read_4(tag, handle, offset) |
60 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
61 }
62 
63 static __inline void
64 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
65     bus_space_handle_t handle, bus_size_t offset, uint64_t val)
66 {
67 	bus_space_write_4(tag, handle, offset, val);
68 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
69 }
70 
71 #ifdef __i386__
72 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
73 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
74 #else
75 #define ioat_bus_space_read_8(tag, handle, offset) \
76 	bus_space_read_8((tag), (handle), (offset))
77 #define ioat_bus_space_write_8(tag, handle, offset, val) \
78 	bus_space_write_8((tag), (handle), (offset), (val))
79 #endif
80 
81 #define ioat_read_1(ioat, offset) \
82 	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
83 	    (offset))
84 
85 #define ioat_read_2(ioat, offset) \
86 	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
87 	    (offset))
88 
89 #define ioat_read_4(ioat, offset) \
90 	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
91 	    (offset))
92 
93 #define ioat_read_8(ioat, offset) \
94 	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
95 	    (offset))
96 
97 #define ioat_read_double_4(ioat, offset) \
98 	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
99 	    (ioat)->pci_bus_handle, (offset))
100 
101 #define ioat_write_1(ioat, offset, value) \
102 	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
103 	    (offset), (value))
104 
105 #define ioat_write_2(ioat, offset, value) \
106 	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
107 	    (offset), (value))
108 
109 #define ioat_write_4(ioat, offset, value) \
110 	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
111 	    (offset), (value))
112 
113 #define ioat_write_8(ioat, offset, value) \
114 	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
115 	    (offset), (value))
116 
117 #define ioat_write_double_4(ioat, offset, value) \
118 	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
119 	    (ioat)->pci_bus_handle, (offset), (value))
120 
121 MALLOC_DECLARE(M_IOAT);
122 
123 SYSCTL_DECL(_hw_ioat);
124 
125 extern int g_ioat_debug_level;
126 
127 struct generic_dma_control {
128 	uint32_t int_enable:1;
129 	uint32_t src_snoop_disable:1;
130 	uint32_t dest_snoop_disable:1;
131 	uint32_t completion_update:1;
132 	uint32_t fence:1;
133 	uint32_t reserved1:1;
134 	uint32_t src_page_break:1;
135 	uint32_t dest_page_break:1;
136 	uint32_t bundle:1;
137 	uint32_t dest_dca:1;
138 	uint32_t hint:1;
139 	uint32_t reserved2:13;
140 	uint32_t op:8;
141 };
142 
143 struct ioat_generic_hw_descriptor {
144 	uint32_t size;
145 	union {
146 		uint32_t control_raw;
147 		struct generic_dma_control control_generic;
148 	} u;
149 	uint64_t src_addr;
150 	uint64_t dest_addr;
151 	uint64_t next;
152 	uint64_t reserved[4];
153 };
154 
155 struct ioat_dma_hw_descriptor {
156 	uint32_t size;
157 	union {
158 		uint32_t control_raw;
159 		struct generic_dma_control control_generic;
160 		struct {
161 			uint32_t int_enable:1;
162 			uint32_t src_snoop_disable:1;
163 			uint32_t dest_snoop_disable:1;
164 			uint32_t completion_update:1;
165 			uint32_t fence:1;
166 			uint32_t null:1;
167 			uint32_t src_page_break:1;
168 			uint32_t dest_page_break:1;
169 			uint32_t bundle:1;
170 			uint32_t dest_dca:1;
171 			uint32_t hint:1;
172 			uint32_t reserved:13;
173 			#define IOAT_OP_COPY 0x00
174 			uint32_t op:8;
175 		} control;
176 	} u;
177 	uint64_t src_addr;
178 	uint64_t dest_addr;
179 	uint64_t next;
180 	uint64_t next_src_addr;
181 	uint64_t next_dest_addr;
182 	uint64_t user1;
183 	uint64_t user2;
184 };
185 
186 struct ioat_fill_hw_descriptor {
187 	uint32_t size;
188 	union {
189 		uint32_t control_raw;
190 		struct generic_dma_control control_generic;
191 		struct {
192 			uint32_t int_enable:1;
193 			uint32_t reserved:1;
194 			uint32_t dest_snoop_disable:1;
195 			uint32_t completion_update:1;
196 			uint32_t fence:1;
197 			uint32_t reserved2:2;
198 			uint32_t dest_page_break:1;
199 			uint32_t bundle:1;
200 			uint32_t reserved3:15;
201 			#define IOAT_OP_FILL 0x01
202 			uint32_t op:8;
203 		} control;
204 	} u;
205 	uint64_t src_data;
206 	uint64_t dest_addr;
207 	uint64_t next;
208 	uint64_t reserved;
209 	uint64_t next_dest_addr;
210 	uint64_t user1;
211 	uint64_t user2;
212 };
213 
214 struct ioat_crc32_hw_descriptor {
215 	uint32_t size;
216 	union {
217 		uint32_t control_raw;
218 		struct generic_dma_control control_generic;
219 		struct {
220 			uint32_t int_enable:1;
221 			uint32_t src_snoop_disable:1;
222 			uint32_t dest_snoop_disable:1;
223 			uint32_t completion_update:1;
224 			uint32_t fence:1;
225 			uint32_t reserved1:3;
226 			uint32_t bundle:1;
227 			uint32_t dest_dca:1;
228 			uint32_t hint:1;
229 			uint32_t use_seed:1;
230 			/*
231 			 * crc_location:
232 			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
233 			 * 0: comparison value is pointed to by CRC Address
234 			 *    field.
235 			 * 1: comparison value follows data in wire format
236 			 *    ("inverted reflected bit order") in the 4 bytes
237 			 *    following the source data.
238 			 *
239 			 * For IOAT_OP_CRC_STORE:
240 			 * 0: Result will be stored at location pointed to by
241 			 *    CRC Address field (in wire format).
242 			 * 1: Result will be stored directly following the
243 			 *    source data.
244 			 *
245 			 * For IOAT_OP_MOVECRC_STORE:
246 			 * 0: Result will be stored at location pointed to by
247 			 *    CRC Address field (in wire format).
248 			 * 1: Result will be stored directly following the
249 			 *    *destination* data.
250 			 */
251 			uint32_t crc_location:1;
252 			uint32_t reserved2:11;
253 			/*
254 			 * MOVECRC - Move data in the same way as standard copy
255 			 * operation, but also compute CRC32.
256 			 *
257 			 * CRC - Only compute CRC on source data.
258 			 *
259 			 * There is a CRC accumulator register in the hardware.
260 			 * If 'initial' is set, it is initialized to the value
261 			 * in 'seed.'
262 			 *
263 			 * In all modes, these operators accumulate size bytes
264 			 * at src_addr into the running CRC32C.
265 			 *
266 			 * Store mode emits the accumulated CRC, in wire
267 			 * format, as specified by the crc_location bit above.
268 			 *
269 			 * Test mode compares the accumulated CRC against the
270 			 * reference CRC, as described in crc_location above.
271 			 * On failure, halts the DMA engine with a CRC error
272 			 * status.
273 			 */
274 			#define	IOAT_OP_MOVECRC		0x41
275 			#define	IOAT_OP_MOVECRC_TEST	0x42
276 			#define	IOAT_OP_MOVECRC_STORE	0x43
277 			#define	IOAT_OP_CRC		0x81
278 			#define	IOAT_OP_CRC_TEST	0x82
279 			#define	IOAT_OP_CRC_STORE	0x83
280 			uint32_t op:8;
281 		} control;
282 	} u;
283 	uint64_t src_addr;
284 	uint64_t dest_addr;
285 	uint64_t next;
286 	uint64_t next_src_addr;
287 	uint64_t next_dest_addr;
288 	uint32_t seed;
289 	uint32_t reserved;
290 	uint64_t crc_address;
291 };
292 
293 struct ioat_xor_hw_descriptor {
294 	uint32_t size;
295 	union {
296 		uint32_t control_raw;
297 		struct generic_dma_control control_generic;
298 		struct {
299 			uint32_t int_enable:1;
300 			uint32_t src_snoop_disable:1;
301 			uint32_t dest_snoop_disable:1;
302 			uint32_t completion_update:1;
303 			uint32_t fence:1;
304 			uint32_t src_count:3;
305 			uint32_t bundle:1;
306 			uint32_t dest_dca:1;
307 			uint32_t hint:1;
308 			uint32_t reserved:13;
309 			#define IOAT_OP_XOR 0x87
310 			#define IOAT_OP_XOR_VAL 0x88
311 			uint32_t op:8;
312 		} control;
313 	} u;
314 	uint64_t src_addr;
315 	uint64_t dest_addr;
316 	uint64_t next;
317 	uint64_t src_addr2;
318 	uint64_t src_addr3;
319 	uint64_t src_addr4;
320 	uint64_t src_addr5;
321 };
322 
323 struct ioat_xor_ext_hw_descriptor {
324 	uint64_t src_addr6;
325 	uint64_t src_addr7;
326 	uint64_t src_addr8;
327 	uint64_t next;
328 	uint64_t reserved[4];
329 };
330 
331 struct ioat_pq_hw_descriptor {
332 	uint32_t size;
333 	union {
334 		uint32_t control_raw;
335 		struct generic_dma_control control_generic;
336 		struct {
337 			uint32_t int_enable:1;
338 			uint32_t src_snoop_disable:1;
339 			uint32_t dest_snoop_disable:1;
340 			uint32_t completion_update:1;
341 			uint32_t fence:1;
342 			uint32_t src_count:3;
343 			uint32_t bundle:1;
344 			uint32_t dest_dca:1;
345 			uint32_t hint:1;
346 			uint32_t p_disable:1;
347 			uint32_t q_disable:1;
348 			uint32_t reserved:11;
349 			#define IOAT_OP_PQ 0x89
350 			#define IOAT_OP_PQ_VAL 0x8a
351 			uint32_t op:8;
352 		} control;
353 	} u;
354 	uint64_t src_addr;
355 	uint64_t p_addr;
356 	uint64_t next;
357 	uint64_t src_addr2;
358 	uint64_t src_addr3;
359 	uint8_t  coef[8];
360 	uint64_t q_addr;
361 };
362 
363 struct ioat_pq_ext_hw_descriptor {
364 	uint64_t src_addr4;
365 	uint64_t src_addr5;
366 	uint64_t src_addr6;
367 	uint64_t next;
368 	uint64_t src_addr7;
369 	uint64_t src_addr8;
370 	uint64_t reserved[2];
371 };
372 
373 struct ioat_pq_update_hw_descriptor {
374 	uint32_t size;
375 	union {
376 		uint32_t control_raw;
377 		struct generic_dma_control control_generic;
378 		struct {
379 			uint32_t int_enable:1;
380 			uint32_t src_snoop_disable:1;
381 			uint32_t dest_snoop_disable:1;
382 			uint32_t completion_update:1;
383 			uint32_t fence:1;
384 			uint32_t src_cnt:3;
385 			uint32_t bundle:1;
386 			uint32_t dest_dca:1;
387 			uint32_t hint:1;
388 			uint32_t p_disable:1;
389 			uint32_t q_disable:1;
390 			uint32_t reserved:3;
391 			uint32_t coef:8;
392 			#define IOAT_OP_PQ_UP 0x8b
393 			uint32_t op:8;
394 		} control;
395 	} u;
396 	uint64_t src_addr;
397 	uint64_t p_addr;
398 	uint64_t next;
399 	uint64_t src_addr2;
400 	uint64_t p_src;
401 	uint64_t q_src;
402 	uint64_t q_addr;
403 };
404 
405 struct ioat_raw_hw_descriptor {
406 	uint64_t field[8];
407 };
408 
409 struct bus_dmadesc {
410 	bus_dmaengine_callback_t callback_fn;
411 	void			 *callback_arg;
412 };
413 
414 struct ioat_descriptor {
415 	struct bus_dmadesc	bus_dmadesc;
416 	union {
417 		struct ioat_generic_hw_descriptor	*generic;
418 		struct ioat_dma_hw_descriptor		*dma;
419 		struct ioat_fill_hw_descriptor		*fill;
420 		struct ioat_crc32_hw_descriptor		*crc32;
421 		struct ioat_xor_hw_descriptor		*xor;
422 		struct ioat_xor_ext_hw_descriptor	*xor_ext;
423 		struct ioat_pq_hw_descriptor		*pq;
424 		struct ioat_pq_ext_hw_descriptor	*pq_ext;
425 		struct ioat_raw_hw_descriptor		*raw;
426 	} u;
427 	uint32_t		id;
428 	bus_addr_t		hw_desc_bus_addr;
429 };
430 
431 /* Unused by this driver at this time. */
432 #define	IOAT_OP_MARKER		0x84
433 
434 /*
435  * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
436  * doesn't support anything older than v3.
437  */
438 #define	IOAT_OP_OLD_XOR		0x85
439 #define	IOAT_OP_OLD_XOR_VAL	0x86
440 
441 enum ioat_ref_kind {
442 	IOAT_DMAENGINE_REF = 0,
443 	IOAT_ACTIVE_DESCR_REF,
444 	IOAT_NUM_REF_KINDS
445 };
446 
447 /* One of these per allocated PCI device. */
448 struct ioat_softc {
449 	bus_dmaengine_t		dmaengine;
450 #define	to_ioat_softc(_dmaeng)						\
451 ({									\
452 	bus_dmaengine_t *_p = (_dmaeng);				\
453 	(struct ioat_softc *)((char *)_p -				\
454 	    offsetof(struct ioat_softc, dmaengine));			\
455 })
456 
457 	int			version;
458 	unsigned		chan_idx;
459 
460 	struct mtx		submit_lock;
461 	device_t		device;
462 	bus_space_tag_t		pci_bus_tag;
463 	bus_space_handle_t	pci_bus_handle;
464 	int			pci_resource_id;
465 	struct resource		*pci_resource;
466 	uint32_t		max_xfer_size;
467 	uint32_t		capabilities;
468 	uint16_t		intrdelay_max;
469 	uint16_t		cached_intrdelay;
470 
471 	struct resource		*res;
472 	int			rid;
473 	void			*tag;
474 
475 	bus_dma_tag_t		hw_desc_tag;
476 	bus_dmamap_t		hw_desc_map;
477 
478 	bus_dma_tag_t		comp_update_tag;
479 	bus_dmamap_t		comp_update_map;
480 	uint64_t		*comp_update;
481 	bus_addr_t		comp_update_bus_addr;
482 
483 	struct callout		poll_timer;
484 	struct callout		shrink_timer;
485 	struct task		reset_task;
486 
487 	boolean_t		quiescing;
488 	boolean_t		destroying;
489 	boolean_t		is_submitter_processing;
490 	boolean_t		is_resize_pending;
491 	boolean_t		is_completion_pending;	/* submit_lock */
492 	boolean_t		is_reset_pending;
493 	boolean_t		is_channel_running;
494 	boolean_t		intrdelay_supported;
495 	boolean_t		resetting;		/* submit_lock */
496 	boolean_t		resetting_cleanup;	/* cleanup_lock */
497 
498 	uint32_t		head;
499 	uint32_t		tail;
500 	uint32_t		hw_head;
501 	uint32_t		ring_size_order;
502 	bus_addr_t		last_seen;
503 
504 	struct ioat_descriptor	**ring;
505 
506 	struct mtx		cleanup_lock;
507 	volatile uint32_t	refcnt;
508 #ifdef INVARIANTS
509 	volatile uint32_t	refkinds[IOAT_NUM_REF_KINDS];
510 #endif
511 
512 	struct {
513 		uint64_t	interrupts;
514 		uint64_t	descriptors_processed;
515 		uint64_t	descriptors_error;
516 		uint64_t	descriptors_submitted;
517 
518 		uint32_t	channel_halts;
519 		uint32_t	last_halt_chanerr;
520 	} stats;
521 };
522 
523 void ioat_test_attach(void);
524 void ioat_test_detach(void);
525 
526 /*
527  * XXX DO NOT USE this routine for obtaining the current completed descriptor.
528  *
529  * The double_4 read on ioat<3.3 appears to result in torn reads.  And v3.2
530  * hardware is still commonplace (Broadwell Xeon has it).  Instead, use the
531  * device-pushed *comp_update.
532  *
533  * It is safe to use ioat_get_chansts() for the low status bits.
534  */
535 static inline uint64_t
536 ioat_get_chansts(struct ioat_softc *ioat)
537 {
538 	uint64_t status;
539 
540 	if (ioat->version >= IOAT_VER_3_3)
541 		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
542 	else
543 		/* Must read lower 4 bytes before upper 4 bytes. */
544 		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
545 	return (status);
546 }
547 
548 static inline void
549 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
550 {
551 
552 	if (ioat->version >= IOAT_VER_3_3)
553 		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
554 	else
555 		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
556 }
557 
558 static inline void
559 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
560 {
561 
562 	if (ioat->version >= IOAT_VER_3_3)
563 		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
564 	else
565 		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
566 }
567 
568 static inline boolean_t
569 is_ioat_active(uint64_t status)
570 {
571 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
572 }
573 
574 static inline boolean_t
575 is_ioat_idle(uint64_t status)
576 {
577 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
578 }
579 
580 static inline boolean_t
581 is_ioat_halted(uint64_t status)
582 {
583 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
584 }
585 
586 static inline boolean_t
587 is_ioat_suspended(uint64_t status)
588 {
589 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
590 }
591 
592 static inline void
593 ioat_suspend(struct ioat_softc *ioat)
594 {
595 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
596 }
597 
598 static inline void
599 ioat_reset(struct ioat_softc *ioat)
600 {
601 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
602 }
603 
604 static inline boolean_t
605 ioat_reset_pending(struct ioat_softc *ioat)
606 {
607 	uint8_t cmd;
608 
609 	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
610 	return ((cmd & IOAT_CHANCMD_RESET) != 0);
611 }
612 
613 #endif /* __IOAT_INTERNAL_H__ */
614