xref: /freebsd/sys/dev/ioat/ioat_internal.h (revision fe75646a0234a261c0013bf1840fdac4acaf0cec)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 #ifndef __IOAT_INTERNAL_H__
27 #define __IOAT_INTERNAL_H__
28 
29 #include <sys/_task.h>
30 
31 #define	DEVICE2SOFTC(dev)	((struct ioat_softc *) device_get_softc(dev))
32 #define	KTR_IOAT		KTR_SPARE3
33 
34 #define	ioat_read_chancnt(ioat) \
35 	ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
36 
37 #define	ioat_read_xfercap(ioat) \
38 	(ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
39 
40 #define	ioat_write_intrctrl(ioat, value) \
41 	ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
42 
43 #define	ioat_read_cbver(ioat) \
44 	(ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
45 
46 #define	ioat_read_dmacapability(ioat) \
47 	ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
48 
49 #define	ioat_write_chanctrl(ioat, value) \
50 	ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
51 
52 static __inline uint64_t
53 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
54     bus_space_handle_t handle, bus_size_t offset)
55 {
56 	return (bus_space_read_4(tag, handle, offset) |
57 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
58 }
59 
60 static __inline void
61 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
62     bus_space_handle_t handle, bus_size_t offset, uint64_t val)
63 {
64 	bus_space_write_4(tag, handle, offset, val);
65 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
66 }
67 
68 #ifdef __i386__
69 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
70 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
71 #else
72 #define ioat_bus_space_read_8(tag, handle, offset) \
73 	bus_space_read_8((tag), (handle), (offset))
74 #define ioat_bus_space_write_8(tag, handle, offset, val) \
75 	bus_space_write_8((tag), (handle), (offset), (val))
76 #endif
77 
78 #define ioat_read_1(ioat, offset) \
79 	bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
80 	    (offset))
81 
82 #define ioat_read_2(ioat, offset) \
83 	bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
84 	    (offset))
85 
86 #define ioat_read_4(ioat, offset) \
87 	bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
88 	    (offset))
89 
90 #define ioat_read_8(ioat, offset) \
91 	ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
92 	    (offset))
93 
94 #define ioat_read_double_4(ioat, offset) \
95 	ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
96 	    (ioat)->pci_bus_handle, (offset))
97 
98 #define ioat_write_1(ioat, offset, value) \
99 	bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
100 	    (offset), (value))
101 
102 #define ioat_write_2(ioat, offset, value) \
103 	bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
104 	    (offset), (value))
105 
106 #define ioat_write_4(ioat, offset, value) \
107 	bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
108 	    (offset), (value))
109 
110 #define ioat_write_8(ioat, offset, value) \
111 	ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
112 	    (offset), (value))
113 
114 #define ioat_write_double_4(ioat, offset, value) \
115 	ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
116 	    (ioat)->pci_bus_handle, (offset), (value))
117 
118 MALLOC_DECLARE(M_IOAT);
119 
120 SYSCTL_DECL(_hw_ioat);
121 
122 extern int g_ioat_debug_level;
123 
124 struct generic_dma_control {
125 	uint32_t int_enable:1;
126 	uint32_t src_snoop_disable:1;
127 	uint32_t dest_snoop_disable:1;
128 	uint32_t completion_update:1;
129 	uint32_t fence:1;
130 	uint32_t reserved1:1;
131 	uint32_t src_page_break:1;
132 	uint32_t dest_page_break:1;
133 	uint32_t bundle:1;
134 	uint32_t dest_dca:1;
135 	uint32_t hint:1;
136 	uint32_t reserved2:13;
137 	uint32_t op:8;
138 };
139 
140 struct ioat_generic_hw_descriptor {
141 	uint32_t size;
142 	union {
143 		uint32_t control_raw;
144 		struct generic_dma_control control_generic;
145 	} u;
146 	uint64_t src_addr;
147 	uint64_t dest_addr;
148 	uint64_t next;
149 	uint64_t reserved[4];
150 };
151 
152 struct ioat_dma_hw_descriptor {
153 	uint32_t size;
154 	union {
155 		uint32_t control_raw;
156 		struct generic_dma_control control_generic;
157 		struct {
158 			uint32_t int_enable:1;
159 			uint32_t src_snoop_disable:1;
160 			uint32_t dest_snoop_disable:1;
161 			uint32_t completion_update:1;
162 			uint32_t fence:1;
163 			uint32_t null:1;
164 			uint32_t src_page_break:1;
165 			uint32_t dest_page_break:1;
166 			uint32_t bundle:1;
167 			uint32_t dest_dca:1;
168 			uint32_t hint:1;
169 			uint32_t reserved:13;
170 			#define IOAT_OP_COPY 0x00
171 			uint32_t op:8;
172 		} control;
173 	} u;
174 	uint64_t src_addr;
175 	uint64_t dest_addr;
176 	uint64_t next;
177 	uint64_t next_src_addr;
178 	uint64_t next_dest_addr;
179 	uint64_t user1;
180 	uint64_t user2;
181 };
182 
183 struct ioat_fill_hw_descriptor {
184 	uint32_t size;
185 	union {
186 		uint32_t control_raw;
187 		struct generic_dma_control control_generic;
188 		struct {
189 			uint32_t int_enable:1;
190 			uint32_t reserved:1;
191 			uint32_t dest_snoop_disable:1;
192 			uint32_t completion_update:1;
193 			uint32_t fence:1;
194 			uint32_t reserved2:2;
195 			uint32_t dest_page_break:1;
196 			uint32_t bundle:1;
197 			uint32_t reserved3:15;
198 			#define IOAT_OP_FILL 0x01
199 			uint32_t op:8;
200 		} control;
201 	} u;
202 	uint64_t src_data;
203 	uint64_t dest_addr;
204 	uint64_t next;
205 	uint64_t reserved;
206 	uint64_t next_dest_addr;
207 	uint64_t user1;
208 	uint64_t user2;
209 };
210 
211 struct ioat_crc32_hw_descriptor {
212 	uint32_t size;
213 	union {
214 		uint32_t control_raw;
215 		struct generic_dma_control control_generic;
216 		struct {
217 			uint32_t int_enable:1;
218 			uint32_t src_snoop_disable:1;
219 			uint32_t dest_snoop_disable:1;
220 			uint32_t completion_update:1;
221 			uint32_t fence:1;
222 			uint32_t reserved1:3;
223 			uint32_t bundle:1;
224 			uint32_t dest_dca:1;
225 			uint32_t hint:1;
226 			uint32_t use_seed:1;
227 			/*
228 			 * crc_location:
229 			 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
230 			 * 0: comparison value is pointed to by CRC Address
231 			 *    field.
232 			 * 1: comparison value follows data in wire format
233 			 *    ("inverted reflected bit order") in the 4 bytes
234 			 *    following the source data.
235 			 *
236 			 * For IOAT_OP_CRC_STORE:
237 			 * 0: Result will be stored at location pointed to by
238 			 *    CRC Address field (in wire format).
239 			 * 1: Result will be stored directly following the
240 			 *    source data.
241 			 *
242 			 * For IOAT_OP_MOVECRC_STORE:
243 			 * 0: Result will be stored at location pointed to by
244 			 *    CRC Address field (in wire format).
245 			 * 1: Result will be stored directly following the
246 			 *    *destination* data.
247 			 */
248 			uint32_t crc_location:1;
249 			uint32_t reserved2:11;
250 			/*
251 			 * MOVECRC - Move data in the same way as standard copy
252 			 * operation, but also compute CRC32.
253 			 *
254 			 * CRC - Only compute CRC on source data.
255 			 *
256 			 * There is a CRC accumulator register in the hardware.
257 			 * If 'initial' is set, it is initialized to the value
258 			 * in 'seed.'
259 			 *
260 			 * In all modes, these operators accumulate size bytes
261 			 * at src_addr into the running CRC32C.
262 			 *
263 			 * Store mode emits the accumulated CRC, in wire
264 			 * format, as specified by the crc_location bit above.
265 			 *
266 			 * Test mode compares the accumulated CRC against the
267 			 * reference CRC, as described in crc_location above.
268 			 * On failure, halts the DMA engine with a CRC error
269 			 * status.
270 			 */
271 			#define	IOAT_OP_MOVECRC		0x41
272 			#define	IOAT_OP_MOVECRC_TEST	0x42
273 			#define	IOAT_OP_MOVECRC_STORE	0x43
274 			#define	IOAT_OP_CRC		0x81
275 			#define	IOAT_OP_CRC_TEST	0x82
276 			#define	IOAT_OP_CRC_STORE	0x83
277 			uint32_t op:8;
278 		} control;
279 	} u;
280 	uint64_t src_addr;
281 	uint64_t dest_addr;
282 	uint64_t next;
283 	uint64_t next_src_addr;
284 	uint64_t next_dest_addr;
285 	uint32_t seed;
286 	uint32_t reserved;
287 	uint64_t crc_address;
288 };
289 
290 struct ioat_xor_hw_descriptor {
291 	uint32_t size;
292 	union {
293 		uint32_t control_raw;
294 		struct generic_dma_control control_generic;
295 		struct {
296 			uint32_t int_enable:1;
297 			uint32_t src_snoop_disable:1;
298 			uint32_t dest_snoop_disable:1;
299 			uint32_t completion_update:1;
300 			uint32_t fence:1;
301 			uint32_t src_count:3;
302 			uint32_t bundle:1;
303 			uint32_t dest_dca:1;
304 			uint32_t hint:1;
305 			uint32_t reserved:13;
306 			#define IOAT_OP_XOR 0x87
307 			#define IOAT_OP_XOR_VAL 0x88
308 			uint32_t op:8;
309 		} control;
310 	} u;
311 	uint64_t src_addr;
312 	uint64_t dest_addr;
313 	uint64_t next;
314 	uint64_t src_addr2;
315 	uint64_t src_addr3;
316 	uint64_t src_addr4;
317 	uint64_t src_addr5;
318 };
319 
320 struct ioat_xor_ext_hw_descriptor {
321 	uint64_t src_addr6;
322 	uint64_t src_addr7;
323 	uint64_t src_addr8;
324 	uint64_t next;
325 	uint64_t reserved[4];
326 };
327 
328 struct ioat_pq_hw_descriptor {
329 	uint32_t size;
330 	union {
331 		uint32_t control_raw;
332 		struct generic_dma_control control_generic;
333 		struct {
334 			uint32_t int_enable:1;
335 			uint32_t src_snoop_disable:1;
336 			uint32_t dest_snoop_disable:1;
337 			uint32_t completion_update:1;
338 			uint32_t fence:1;
339 			uint32_t src_count:3;
340 			uint32_t bundle:1;
341 			uint32_t dest_dca:1;
342 			uint32_t hint:1;
343 			uint32_t p_disable:1;
344 			uint32_t q_disable:1;
345 			uint32_t reserved:11;
346 			#define IOAT_OP_PQ 0x89
347 			#define IOAT_OP_PQ_VAL 0x8a
348 			uint32_t op:8;
349 		} control;
350 	} u;
351 	uint64_t src_addr;
352 	uint64_t p_addr;
353 	uint64_t next;
354 	uint64_t src_addr2;
355 	uint64_t src_addr3;
356 	uint8_t  coef[8];
357 	uint64_t q_addr;
358 };
359 
360 struct ioat_pq_ext_hw_descriptor {
361 	uint64_t src_addr4;
362 	uint64_t src_addr5;
363 	uint64_t src_addr6;
364 	uint64_t next;
365 	uint64_t src_addr7;
366 	uint64_t src_addr8;
367 	uint64_t reserved[2];
368 };
369 
370 struct ioat_pq_update_hw_descriptor {
371 	uint32_t size;
372 	union {
373 		uint32_t control_raw;
374 		struct generic_dma_control control_generic;
375 		struct {
376 			uint32_t int_enable:1;
377 			uint32_t src_snoop_disable:1;
378 			uint32_t dest_snoop_disable:1;
379 			uint32_t completion_update:1;
380 			uint32_t fence:1;
381 			uint32_t src_cnt:3;
382 			uint32_t bundle:1;
383 			uint32_t dest_dca:1;
384 			uint32_t hint:1;
385 			uint32_t p_disable:1;
386 			uint32_t q_disable:1;
387 			uint32_t reserved:3;
388 			uint32_t coef:8;
389 			#define IOAT_OP_PQ_UP 0x8b
390 			uint32_t op:8;
391 		} control;
392 	} u;
393 	uint64_t src_addr;
394 	uint64_t p_addr;
395 	uint64_t next;
396 	uint64_t src_addr2;
397 	uint64_t p_src;
398 	uint64_t q_src;
399 	uint64_t q_addr;
400 };
401 
402 struct ioat_raw_hw_descriptor {
403 	uint64_t field[8];
404 };
405 
406 struct bus_dmadesc {
407 	bus_dmaengine_callback_t callback_fn;
408 	void			 *callback_arg;
409 };
410 
411 struct ioat_descriptor {
412 	struct bus_dmadesc	bus_dmadesc;
413 	uint32_t		id;
414 	bus_dmamap_t		src_dmamap;
415 	bus_dmamap_t		dst_dmamap;
416 	bus_dmamap_t		src2_dmamap;
417 	bus_dmamap_t		dst2_dmamap;
418 };
419 
420 /* Unused by this driver at this time. */
421 #define	IOAT_OP_MARKER		0x84
422 
423 /*
424  * Deprecated OPs -- v3 DMA generates an abort if given these.  And this driver
425  * doesn't support anything older than v3.
426  */
427 #define	IOAT_OP_OLD_XOR		0x85
428 #define	IOAT_OP_OLD_XOR_VAL	0x86
429 
430 /* One of these per allocated PCI device. */
431 struct ioat_softc {
432 	bus_dmaengine_t		dmaengine;
433 #define	to_ioat_softc(_dmaeng)						\
434 ({									\
435 	bus_dmaengine_t *_p = (_dmaeng);				\
436 	(struct ioat_softc *)((char *)_p -				\
437 	    offsetof(struct ioat_softc, dmaengine));			\
438 })
439 
440 	device_t		device;
441 	int			domain;
442 	int			cpu;
443 	int			version;
444 	unsigned		chan_idx;
445 
446 	bus_space_tag_t		pci_bus_tag;
447 	bus_space_handle_t	pci_bus_handle;
448 	struct resource		*pci_resource;
449 	int			pci_resource_id;
450 	uint32_t		max_xfer_size;
451 	uint32_t		capabilities;
452 	uint32_t		ring_size_order;
453 	uint16_t		intrdelay_max;
454 	uint16_t		cached_intrdelay;
455 
456 	int			rid;
457 	struct resource		*res;
458 	void			*tag;
459 
460 	bus_dma_tag_t		hw_desc_tag;
461 	bus_dmamap_t		hw_desc_map;
462 
463 	bus_dma_tag_t		data_tag;
464 
465 	bus_dma_tag_t		comp_update_tag;
466 	bus_dmamap_t		comp_update_map;
467 	uint64_t		*comp_update;
468 	bus_addr_t		comp_update_bus_addr;
469 
470 	boolean_t		quiescing;
471 	boolean_t		destroying;
472 	boolean_t		is_submitter_processing;
473 	boolean_t		intrdelay_supported;
474 	boolean_t		resetting;		/* submit_lock */
475 	boolean_t		resetting_cleanup;	/* cleanup_lock */
476 
477 	struct ioat_descriptor	*ring;
478 
479 	union ioat_hw_descriptor {
480 		struct ioat_generic_hw_descriptor	generic;
481 		struct ioat_dma_hw_descriptor		dma;
482 		struct ioat_fill_hw_descriptor		fill;
483 		struct ioat_crc32_hw_descriptor		crc32;
484 		struct ioat_xor_hw_descriptor		xor;
485 		struct ioat_xor_ext_hw_descriptor	xor_ext;
486 		struct ioat_pq_hw_descriptor		pq;
487 		struct ioat_pq_ext_hw_descriptor	pq_ext;
488 		struct ioat_raw_hw_descriptor		raw;
489 	} *hw_desc_ring;
490 	bus_addr_t		hw_desc_bus_addr;
491 #define	RING_PHYS_ADDR(sc, i)	(sc)->hw_desc_bus_addr + \
492     (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
493 
494 	struct mtx_padalign	submit_lock;
495 	struct callout		poll_timer;
496 	struct task		reset_task;
497 	struct mtx_padalign	cleanup_lock;
498 
499 	uint32_t		refcnt;
500 	uint32_t		head;
501 	uint32_t		acq_head;
502 	uint32_t		tail;
503 	bus_addr_t		last_seen;
504 
505 	struct {
506 		uint64_t	interrupts;
507 		uint64_t	descriptors_processed;
508 		uint64_t	descriptors_error;
509 		uint64_t	descriptors_submitted;
510 
511 		uint32_t	channel_halts;
512 		uint32_t	last_halt_chanerr;
513 	} stats;
514 };
515 
516 void ioat_test_attach(void);
517 void ioat_test_detach(void);
518 
519 /*
520  * XXX DO NOT USE this routine for obtaining the current completed descriptor.
521  *
522  * The double_4 read on ioat<3.3 appears to result in torn reads.  And v3.2
523  * hardware is still commonplace (Broadwell Xeon has it).  Instead, use the
524  * device-pushed *comp_update.
525  *
526  * It is safe to use ioat_get_chansts() for the low status bits.
527  */
528 static inline uint64_t
529 ioat_get_chansts(struct ioat_softc *ioat)
530 {
531 	uint64_t status;
532 
533 	if (ioat->version >= IOAT_VER_3_3)
534 		status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
535 	else
536 		/* Must read lower 4 bytes before upper 4 bytes. */
537 		status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
538 	return (status);
539 }
540 
541 static inline void
542 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
543 {
544 
545 	if (ioat->version >= IOAT_VER_3_3)
546 		ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
547 	else
548 		ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
549 }
550 
551 static inline void
552 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
553 {
554 
555 	if (ioat->version >= IOAT_VER_3_3)
556 		ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
557 	else
558 		ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
559 }
560 
561 static inline boolean_t
562 is_ioat_active(uint64_t status)
563 {
564 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
565 }
566 
567 static inline boolean_t
568 is_ioat_idle(uint64_t status)
569 {
570 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
571 }
572 
573 static inline boolean_t
574 is_ioat_halted(uint64_t status)
575 {
576 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
577 }
578 
579 static inline boolean_t
580 is_ioat_suspended(uint64_t status)
581 {
582 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
583 }
584 
585 static inline void
586 ioat_suspend(struct ioat_softc *ioat)
587 {
588 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
589 }
590 
591 static inline void
592 ioat_reset(struct ioat_softc *ioat)
593 {
594 	ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
595 }
596 
597 static inline boolean_t
598 ioat_reset_pending(struct ioat_softc *ioat)
599 {
600 	uint8_t cmd;
601 
602 	cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
603 	return ((cmd & IOAT_CHANCMD_RESET) != 0);
604 }
605 
606 #endif /* __IOAT_INTERNAL_H__ */
607