1 /*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26 #ifndef __IOAT_INTERNAL_H__
27 #define __IOAT_INTERNAL_H__
28
29 #include <sys/_task.h>
30
31 #define DEVICE2SOFTC(dev) ((struct ioat_softc *) device_get_softc(dev))
32 #define KTR_IOAT KTR_SPARE3
33
34 #define ioat_read_chancnt(ioat) \
35 ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
36
37 #define ioat_read_xfercap(ioat) \
38 (ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
39
40 #define ioat_write_intrctrl(ioat, value) \
41 ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
42
43 #define ioat_read_cbver(ioat) \
44 (ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
45
46 #define ioat_read_dmacapability(ioat) \
47 ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
48
49 #define ioat_write_chanctrl(ioat, value) \
50 ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
51
52 static __inline uint64_t
ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,bus_space_handle_t handle,bus_size_t offset)53 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
54 bus_space_handle_t handle, bus_size_t offset)
55 {
56 return (bus_space_read_4(tag, handle, offset) |
57 ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
58 }
59
60 static __inline void
ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,bus_space_handle_t handle,bus_size_t offset,uint64_t val)61 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
62 bus_space_handle_t handle, bus_size_t offset, uint64_t val)
63 {
64 bus_space_write_4(tag, handle, offset, val);
65 bus_space_write_4(tag, handle, offset + 4, val >> 32);
66 }
67
68 #ifdef __i386__
69 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
70 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
71 #else
72 #define ioat_bus_space_read_8(tag, handle, offset) \
73 bus_space_read_8((tag), (handle), (offset))
74 #define ioat_bus_space_write_8(tag, handle, offset, val) \
75 bus_space_write_8((tag), (handle), (offset), (val))
76 #endif
77
78 #define ioat_read_1(ioat, offset) \
79 bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
80 (offset))
81
82 #define ioat_read_2(ioat, offset) \
83 bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
84 (offset))
85
86 #define ioat_read_4(ioat, offset) \
87 bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
88 (offset))
89
90 #define ioat_read_8(ioat, offset) \
91 ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
92 (offset))
93
94 #define ioat_read_double_4(ioat, offset) \
95 ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
96 (ioat)->pci_bus_handle, (offset))
97
98 #define ioat_write_1(ioat, offset, value) \
99 bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
100 (offset), (value))
101
102 #define ioat_write_2(ioat, offset, value) \
103 bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
104 (offset), (value))
105
106 #define ioat_write_4(ioat, offset, value) \
107 bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
108 (offset), (value))
109
110 #define ioat_write_8(ioat, offset, value) \
111 ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
112 (offset), (value))
113
114 #define ioat_write_double_4(ioat, offset, value) \
115 ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
116 (ioat)->pci_bus_handle, (offset), (value))
117
118 MALLOC_DECLARE(M_IOAT);
119
120 SYSCTL_DECL(_hw_ioat);
121
122 extern int g_ioat_debug_level;
123
124 struct generic_dma_control {
125 uint32_t int_enable:1;
126 uint32_t src_snoop_disable:1;
127 uint32_t dest_snoop_disable:1;
128 uint32_t completion_update:1;
129 uint32_t fence:1;
130 uint32_t reserved1:1;
131 uint32_t src_page_break:1;
132 uint32_t dest_page_break:1;
133 uint32_t bundle:1;
134 uint32_t dest_dca:1;
135 uint32_t hint:1;
136 uint32_t reserved2:13;
137 uint32_t op:8;
138 };
139
140 struct ioat_generic_hw_descriptor {
141 uint32_t size;
142 union {
143 uint32_t control_raw;
144 struct generic_dma_control control_generic;
145 } u;
146 uint64_t src_addr;
147 uint64_t dest_addr;
148 uint64_t next;
149 uint64_t reserved[4];
150 };
151
152 struct ioat_dma_hw_descriptor {
153 uint32_t size;
154 union {
155 uint32_t control_raw;
156 struct generic_dma_control control_generic;
157 struct {
158 uint32_t int_enable:1;
159 uint32_t src_snoop_disable:1;
160 uint32_t dest_snoop_disable:1;
161 uint32_t completion_update:1;
162 uint32_t fence:1;
163 uint32_t null:1;
164 uint32_t src_page_break:1;
165 uint32_t dest_page_break:1;
166 uint32_t bundle:1;
167 uint32_t dest_dca:1;
168 uint32_t hint:1;
169 uint32_t reserved:13;
170 #define IOAT_OP_COPY 0x00
171 uint32_t op:8;
172 } control;
173 } u;
174 uint64_t src_addr;
175 uint64_t dest_addr;
176 uint64_t next;
177 uint64_t next_src_addr;
178 uint64_t next_dest_addr;
179 uint64_t user1;
180 uint64_t user2;
181 };
182
183 struct ioat_fill_hw_descriptor {
184 uint32_t size;
185 union {
186 uint32_t control_raw;
187 struct generic_dma_control control_generic;
188 struct {
189 uint32_t int_enable:1;
190 uint32_t reserved:1;
191 uint32_t dest_snoop_disable:1;
192 uint32_t completion_update:1;
193 uint32_t fence:1;
194 uint32_t reserved2:2;
195 uint32_t dest_page_break:1;
196 uint32_t bundle:1;
197 uint32_t reserved3:15;
198 #define IOAT_OP_FILL 0x01
199 uint32_t op:8;
200 } control;
201 } u;
202 uint64_t src_data;
203 uint64_t dest_addr;
204 uint64_t next;
205 uint64_t reserved;
206 uint64_t next_dest_addr;
207 uint64_t user1;
208 uint64_t user2;
209 };
210
211 struct ioat_crc32_hw_descriptor {
212 uint32_t size;
213 union {
214 uint32_t control_raw;
215 struct generic_dma_control control_generic;
216 struct {
217 uint32_t int_enable:1;
218 uint32_t src_snoop_disable:1;
219 uint32_t dest_snoop_disable:1;
220 uint32_t completion_update:1;
221 uint32_t fence:1;
222 uint32_t reserved1:3;
223 uint32_t bundle:1;
224 uint32_t dest_dca:1;
225 uint32_t hint:1;
226 uint32_t use_seed:1;
227 /*
228 * crc_location:
229 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
230 * 0: comparison value is pointed to by CRC Address
231 * field.
232 * 1: comparison value follows data in wire format
233 * ("inverted reflected bit order") in the 4 bytes
234 * following the source data.
235 *
236 * For IOAT_OP_CRC_STORE:
237 * 0: Result will be stored at location pointed to by
238 * CRC Address field (in wire format).
239 * 1: Result will be stored directly following the
240 * source data.
241 *
242 * For IOAT_OP_MOVECRC_STORE:
243 * 0: Result will be stored at location pointed to by
244 * CRC Address field (in wire format).
245 * 1: Result will be stored directly following the
246 * *destination* data.
247 */
248 uint32_t crc_location:1;
249 uint32_t reserved2:11;
250 /*
251 * MOVECRC - Move data in the same way as standard copy
252 * operation, but also compute CRC32.
253 *
254 * CRC - Only compute CRC on source data.
255 *
256 * There is a CRC accumulator register in the hardware.
257 * If 'initial' is set, it is initialized to the value
258 * in 'seed.'
259 *
260 * In all modes, these operators accumulate size bytes
261 * at src_addr into the running CRC32C.
262 *
263 * Store mode emits the accumulated CRC, in wire
264 * format, as specified by the crc_location bit above.
265 *
266 * Test mode compares the accumulated CRC against the
267 * reference CRC, as described in crc_location above.
268 * On failure, halts the DMA engine with a CRC error
269 * status.
270 */
271 #define IOAT_OP_MOVECRC 0x41
272 #define IOAT_OP_MOVECRC_TEST 0x42
273 #define IOAT_OP_MOVECRC_STORE 0x43
274 #define IOAT_OP_CRC 0x81
275 #define IOAT_OP_CRC_TEST 0x82
276 #define IOAT_OP_CRC_STORE 0x83
277 uint32_t op:8;
278 } control;
279 } u;
280 uint64_t src_addr;
281 uint64_t dest_addr;
282 uint64_t next;
283 uint64_t next_src_addr;
284 uint64_t next_dest_addr;
285 uint32_t seed;
286 uint32_t reserved;
287 uint64_t crc_address;
288 };
289
290 struct ioat_xor_hw_descriptor {
291 uint32_t size;
292 union {
293 uint32_t control_raw;
294 struct generic_dma_control control_generic;
295 struct {
296 uint32_t int_enable:1;
297 uint32_t src_snoop_disable:1;
298 uint32_t dest_snoop_disable:1;
299 uint32_t completion_update:1;
300 uint32_t fence:1;
301 uint32_t src_count:3;
302 uint32_t bundle:1;
303 uint32_t dest_dca:1;
304 uint32_t hint:1;
305 uint32_t reserved:13;
306 #define IOAT_OP_XOR 0x87
307 #define IOAT_OP_XOR_VAL 0x88
308 uint32_t op:8;
309 } control;
310 } u;
311 uint64_t src_addr;
312 uint64_t dest_addr;
313 uint64_t next;
314 uint64_t src_addr2;
315 uint64_t src_addr3;
316 uint64_t src_addr4;
317 uint64_t src_addr5;
318 };
319
320 struct ioat_xor_ext_hw_descriptor {
321 uint64_t src_addr6;
322 uint64_t src_addr7;
323 uint64_t src_addr8;
324 uint64_t next;
325 uint64_t reserved[4];
326 };
327
328 struct ioat_pq_hw_descriptor {
329 uint32_t size;
330 union {
331 uint32_t control_raw;
332 struct generic_dma_control control_generic;
333 struct {
334 uint32_t int_enable:1;
335 uint32_t src_snoop_disable:1;
336 uint32_t dest_snoop_disable:1;
337 uint32_t completion_update:1;
338 uint32_t fence:1;
339 uint32_t src_count:3;
340 uint32_t bundle:1;
341 uint32_t dest_dca:1;
342 uint32_t hint:1;
343 uint32_t p_disable:1;
344 uint32_t q_disable:1;
345 uint32_t reserved:11;
346 #define IOAT_OP_PQ 0x89
347 #define IOAT_OP_PQ_VAL 0x8a
348 uint32_t op:8;
349 } control;
350 } u;
351 uint64_t src_addr;
352 uint64_t p_addr;
353 uint64_t next;
354 uint64_t src_addr2;
355 uint64_t src_addr3;
356 uint8_t coef[8];
357 uint64_t q_addr;
358 };
359
360 struct ioat_pq_ext_hw_descriptor {
361 uint64_t src_addr4;
362 uint64_t src_addr5;
363 uint64_t src_addr6;
364 uint64_t next;
365 uint64_t src_addr7;
366 uint64_t src_addr8;
367 uint64_t reserved[2];
368 };
369
370 struct ioat_pq_update_hw_descriptor {
371 uint32_t size;
372 union {
373 uint32_t control_raw;
374 struct generic_dma_control control_generic;
375 struct {
376 uint32_t int_enable:1;
377 uint32_t src_snoop_disable:1;
378 uint32_t dest_snoop_disable:1;
379 uint32_t completion_update:1;
380 uint32_t fence:1;
381 uint32_t src_cnt:3;
382 uint32_t bundle:1;
383 uint32_t dest_dca:1;
384 uint32_t hint:1;
385 uint32_t p_disable:1;
386 uint32_t q_disable:1;
387 uint32_t reserved:3;
388 uint32_t coef:8;
389 #define IOAT_OP_PQ_UP 0x8b
390 uint32_t op:8;
391 } control;
392 } u;
393 uint64_t src_addr;
394 uint64_t p_addr;
395 uint64_t next;
396 uint64_t src_addr2;
397 uint64_t p_src;
398 uint64_t q_src;
399 uint64_t q_addr;
400 };
401
402 struct ioat_raw_hw_descriptor {
403 uint64_t field[8];
404 };
405
406 struct bus_dmadesc {
407 bus_dmaengine_callback_t callback_fn;
408 void *callback_arg;
409 };
410
411 struct ioat_descriptor {
412 struct bus_dmadesc bus_dmadesc;
413 uint32_t id;
414 bus_dmamap_t src_dmamap;
415 bus_dmamap_t dst_dmamap;
416 bus_dmamap_t src2_dmamap;
417 bus_dmamap_t dst2_dmamap;
418 };
419
420 /* Unused by this driver at this time. */
421 #define IOAT_OP_MARKER 0x84
422
423 /*
424 * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver
425 * doesn't support anything older than v3.
426 */
427 #define IOAT_OP_OLD_XOR 0x85
428 #define IOAT_OP_OLD_XOR_VAL 0x86
429
430 /* One of these per allocated PCI device. */
431 struct ioat_softc {
432 bus_dmaengine_t dmaengine;
433 #define to_ioat_softc(_dmaeng) \
434 ({ \
435 bus_dmaengine_t *_p = (_dmaeng); \
436 (struct ioat_softc *)((char *)_p - \
437 offsetof(struct ioat_softc, dmaengine)); \
438 })
439
440 device_t device;
441 int domain;
442 int cpu;
443 int version;
444 unsigned chan_idx;
445
446 bus_space_tag_t pci_bus_tag;
447 bus_space_handle_t pci_bus_handle;
448 struct resource *pci_resource;
449 int pci_resource_id;
450 uint32_t max_xfer_size;
451 uint32_t capabilities;
452 uint32_t ring_size_order;
453 uint16_t intrdelay_max;
454 uint16_t cached_intrdelay;
455
456 int rid;
457 struct resource *res;
458 void *tag;
459
460 bus_dma_tag_t hw_desc_tag;
461 bus_dmamap_t hw_desc_map;
462
463 bus_dma_tag_t data_tag;
464
465 bus_dma_tag_t comp_update_tag;
466 bus_dmamap_t comp_update_map;
467 uint64_t *comp_update;
468 bus_addr_t comp_update_bus_addr;
469
470 boolean_t quiescing;
471 boolean_t destroying;
472 boolean_t is_submitter_processing;
473 boolean_t intrdelay_supported;
474 boolean_t resetting; /* submit_lock */
475 boolean_t resetting_cleanup; /* cleanup_lock */
476
477 struct ioat_descriptor *ring;
478
479 union ioat_hw_descriptor {
480 struct ioat_generic_hw_descriptor generic;
481 struct ioat_dma_hw_descriptor dma;
482 struct ioat_fill_hw_descriptor fill;
483 struct ioat_crc32_hw_descriptor crc32;
484 struct ioat_xor_hw_descriptor xor;
485 struct ioat_xor_ext_hw_descriptor xor_ext;
486 struct ioat_pq_hw_descriptor pq;
487 struct ioat_pq_ext_hw_descriptor pq_ext;
488 struct ioat_raw_hw_descriptor raw;
489 } *hw_desc_ring;
490 bus_addr_t hw_desc_bus_addr;
491 #define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \
492 (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor))
493
494 struct mtx_padalign submit_lock;
495 struct callout poll_timer;
496 struct task reset_task;
497 struct mtx_padalign cleanup_lock;
498
499 uint32_t refcnt;
500 uint32_t head;
501 uint32_t acq_head;
502 uint32_t tail;
503 bus_addr_t last_seen;
504
505 struct {
506 uint64_t interrupts;
507 uint64_t descriptors_processed;
508 uint64_t descriptors_error;
509 uint64_t descriptors_submitted;
510
511 uint32_t channel_halts;
512 uint32_t last_halt_chanerr;
513 } stats;
514 };
515
516 void ioat_test_attach(void);
517 void ioat_test_detach(void);
518
519 /*
520 * XXX DO NOT USE this routine for obtaining the current completed descriptor.
521 *
522 * The double_4 read on ioat<3.3 appears to result in torn reads. And v3.2
523 * hardware is still commonplace (Broadwell Xeon has it). Instead, use the
524 * device-pushed *comp_update.
525 *
526 * It is safe to use ioat_get_chansts() for the low status bits.
527 */
528 static inline uint64_t
ioat_get_chansts(struct ioat_softc * ioat)529 ioat_get_chansts(struct ioat_softc *ioat)
530 {
531 uint64_t status;
532
533 if (ioat->version >= IOAT_VER_3_3)
534 status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
535 else
536 /* Must read lower 4 bytes before upper 4 bytes. */
537 status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
538 return (status);
539 }
540
541 static inline void
ioat_write_chancmp(struct ioat_softc * ioat,uint64_t addr)542 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
543 {
544
545 if (ioat->version >= IOAT_VER_3_3)
546 ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
547 else
548 ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
549 }
550
551 static inline void
ioat_write_chainaddr(struct ioat_softc * ioat,uint64_t addr)552 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
553 {
554
555 if (ioat->version >= IOAT_VER_3_3)
556 ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
557 else
558 ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
559 }
560
561 static inline boolean_t
is_ioat_active(uint64_t status)562 is_ioat_active(uint64_t status)
563 {
564 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
565 }
566
567 static inline boolean_t
is_ioat_idle(uint64_t status)568 is_ioat_idle(uint64_t status)
569 {
570 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
571 }
572
573 static inline boolean_t
is_ioat_halted(uint64_t status)574 is_ioat_halted(uint64_t status)
575 {
576 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
577 }
578
579 static inline boolean_t
is_ioat_suspended(uint64_t status)580 is_ioat_suspended(uint64_t status)
581 {
582 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
583 }
584
585 static inline void
ioat_suspend(struct ioat_softc * ioat)586 ioat_suspend(struct ioat_softc *ioat)
587 {
588 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
589 }
590
591 static inline void
ioat_reset(struct ioat_softc * ioat)592 ioat_reset(struct ioat_softc *ioat)
593 {
594 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
595 }
596
597 static inline boolean_t
ioat_reset_pending(struct ioat_softc * ioat)598 ioat_reset_pending(struct ioat_softc *ioat)
599 {
600 uint8_t cmd;
601
602 cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
603 return ((cmd & IOAT_CHANCMD_RESET) != 0);
604 }
605
606 #endif /* __IOAT_INTERNAL_H__ */
607