1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 #ifndef __IOAT_INTERNAL_H__ 27 #define __IOAT_INTERNAL_H__ 28 29 #include <sys/_task.h> 30 31 #define DEVICE2SOFTC(dev) ((struct ioat_softc *) device_get_softc(dev)) 32 #define KTR_IOAT KTR_SPARE3 33 34 #define ioat_read_chancnt(ioat) \ 35 ioat_read_1((ioat), IOAT_CHANCNT_OFFSET) 36 37 #define ioat_read_xfercap(ioat) \ 38 (ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK) 39 40 #define ioat_write_intrctrl(ioat, value) \ 41 ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value)) 42 43 #define ioat_read_cbver(ioat) \ 44 (ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF) 45 46 #define ioat_read_dmacapability(ioat) \ 47 ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET) 48 49 #define ioat_write_chanctrl(ioat, value) \ 50 ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value)) 51 52 static __inline uint64_t 53 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag, 54 bus_space_handle_t handle, bus_size_t offset) 55 { 56 return (bus_space_read_4(tag, handle, offset) | 57 ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32); 58 } 59 60 static __inline void 61 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag, 62 bus_space_handle_t handle, bus_size_t offset, uint64_t val) 63 { 64 bus_space_write_4(tag, handle, offset, val); 65 bus_space_write_4(tag, handle, offset + 4, val >> 32); 66 } 67 68 #ifdef __i386__ 69 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first 70 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first 71 #else 72 #define ioat_bus_space_read_8(tag, handle, offset) \ 73 bus_space_read_8((tag), (handle), (offset)) 74 #define ioat_bus_space_write_8(tag, handle, offset, val) \ 75 bus_space_write_8((tag), (handle), (offset), (val)) 76 #endif 77 78 #define ioat_read_1(ioat, offset) \ 79 bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 80 (offset)) 81 82 #define ioat_read_2(ioat, offset) \ 83 bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 84 (offset)) 85 86 #define ioat_read_4(ioat, offset) \ 87 bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 88 (offset)) 89 90 #define ioat_read_8(ioat, offset) \ 91 ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 92 (offset)) 93 94 #define ioat_read_double_4(ioat, offset) \ 95 ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \ 96 (ioat)->pci_bus_handle, (offset)) 97 98 #define ioat_write_1(ioat, offset, value) \ 99 bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 100 (offset), (value)) 101 102 #define ioat_write_2(ioat, offset, value) \ 103 bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 104 (offset), (value)) 105 106 #define ioat_write_4(ioat, offset, value) \ 107 bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 108 (offset), (value)) 109 110 #define ioat_write_8(ioat, offset, value) \ 111 ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 112 (offset), (value)) 113 114 #define ioat_write_double_4(ioat, offset, value) \ 115 ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \ 116 (ioat)->pci_bus_handle, (offset), (value)) 117 118 MALLOC_DECLARE(M_IOAT); 119 120 SYSCTL_DECL(_hw_ioat); 121 122 extern int g_ioat_debug_level; 123 124 struct generic_dma_control { 125 uint32_t int_enable:1; 126 uint32_t src_snoop_disable:1; 127 uint32_t dest_snoop_disable:1; 128 uint32_t completion_update:1; 129 uint32_t fence:1; 130 uint32_t reserved1:1; 131 uint32_t src_page_break:1; 132 uint32_t dest_page_break:1; 133 uint32_t bundle:1; 134 uint32_t dest_dca:1; 135 uint32_t hint:1; 136 uint32_t reserved2:13; 137 uint32_t op:8; 138 }; 139 140 struct ioat_generic_hw_descriptor { 141 uint32_t size; 142 union { 143 uint32_t control_raw; 144 struct generic_dma_control control_generic; 145 } u; 146 uint64_t src_addr; 147 uint64_t dest_addr; 148 uint64_t next; 149 uint64_t reserved[4]; 150 }; 151 152 struct ioat_dma_hw_descriptor { 153 uint32_t size; 154 union { 155 uint32_t control_raw; 156 struct generic_dma_control control_generic; 157 struct { 158 uint32_t int_enable:1; 159 uint32_t src_snoop_disable:1; 160 uint32_t dest_snoop_disable:1; 161 uint32_t completion_update:1; 162 uint32_t fence:1; 163 uint32_t null:1; 164 uint32_t src_page_break:1; 165 uint32_t dest_page_break:1; 166 uint32_t bundle:1; 167 uint32_t dest_dca:1; 168 uint32_t hint:1; 169 uint32_t reserved:13; 170 #define IOAT_OP_COPY 0x00 171 uint32_t op:8; 172 } control; 173 } u; 174 uint64_t src_addr; 175 uint64_t dest_addr; 176 uint64_t next; 177 uint64_t next_src_addr; 178 uint64_t next_dest_addr; 179 uint64_t user1; 180 uint64_t user2; 181 }; 182 183 struct ioat_fill_hw_descriptor { 184 uint32_t size; 185 union { 186 uint32_t control_raw; 187 struct generic_dma_control control_generic; 188 struct { 189 uint32_t int_enable:1; 190 uint32_t reserved:1; 191 uint32_t dest_snoop_disable:1; 192 uint32_t completion_update:1; 193 uint32_t fence:1; 194 uint32_t reserved2:2; 195 uint32_t dest_page_break:1; 196 uint32_t bundle:1; 197 uint32_t reserved3:15; 198 #define IOAT_OP_FILL 0x01 199 uint32_t op:8; 200 } control; 201 } u; 202 uint64_t src_data; 203 uint64_t dest_addr; 204 uint64_t next; 205 uint64_t reserved; 206 uint64_t next_dest_addr; 207 uint64_t user1; 208 uint64_t user2; 209 }; 210 211 struct ioat_crc32_hw_descriptor { 212 uint32_t size; 213 union { 214 uint32_t control_raw; 215 struct generic_dma_control control_generic; 216 struct { 217 uint32_t int_enable:1; 218 uint32_t src_snoop_disable:1; 219 uint32_t dest_snoop_disable:1; 220 uint32_t completion_update:1; 221 uint32_t fence:1; 222 uint32_t reserved1:3; 223 uint32_t bundle:1; 224 uint32_t dest_dca:1; 225 uint32_t hint:1; 226 uint32_t use_seed:1; 227 /* 228 * crc_location: 229 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST: 230 * 0: comparison value is pointed to by CRC Address 231 * field. 232 * 1: comparison value follows data in wire format 233 * ("inverted reflected bit order") in the 4 bytes 234 * following the source data. 235 * 236 * For IOAT_OP_CRC_STORE: 237 * 0: Result will be stored at location pointed to by 238 * CRC Address field (in wire format). 239 * 1: Result will be stored directly following the 240 * source data. 241 * 242 * For IOAT_OP_MOVECRC_STORE: 243 * 0: Result will be stored at location pointed to by 244 * CRC Address field (in wire format). 245 * 1: Result will be stored directly following the 246 * *destination* data. 247 */ 248 uint32_t crc_location:1; 249 uint32_t reserved2:11; 250 /* 251 * MOVECRC - Move data in the same way as standard copy 252 * operation, but also compute CRC32. 253 * 254 * CRC - Only compute CRC on source data. 255 * 256 * There is a CRC accumulator register in the hardware. 257 * If 'initial' is set, it is initialized to the value 258 * in 'seed.' 259 * 260 * In all modes, these operators accumulate size bytes 261 * at src_addr into the running CRC32C. 262 * 263 * Store mode emits the accumulated CRC, in wire 264 * format, as specified by the crc_location bit above. 265 * 266 * Test mode compares the accumulated CRC against the 267 * reference CRC, as described in crc_location above. 268 * On failure, halts the DMA engine with a CRC error 269 * status. 270 */ 271 #define IOAT_OP_MOVECRC 0x41 272 #define IOAT_OP_MOVECRC_TEST 0x42 273 #define IOAT_OP_MOVECRC_STORE 0x43 274 #define IOAT_OP_CRC 0x81 275 #define IOAT_OP_CRC_TEST 0x82 276 #define IOAT_OP_CRC_STORE 0x83 277 uint32_t op:8; 278 } control; 279 } u; 280 uint64_t src_addr; 281 uint64_t dest_addr; 282 uint64_t next; 283 uint64_t next_src_addr; 284 uint64_t next_dest_addr; 285 uint32_t seed; 286 uint32_t reserved; 287 uint64_t crc_address; 288 }; 289 290 struct ioat_xor_hw_descriptor { 291 uint32_t size; 292 union { 293 uint32_t control_raw; 294 struct generic_dma_control control_generic; 295 struct { 296 uint32_t int_enable:1; 297 uint32_t src_snoop_disable:1; 298 uint32_t dest_snoop_disable:1; 299 uint32_t completion_update:1; 300 uint32_t fence:1; 301 uint32_t src_count:3; 302 uint32_t bundle:1; 303 uint32_t dest_dca:1; 304 uint32_t hint:1; 305 uint32_t reserved:13; 306 #define IOAT_OP_XOR 0x87 307 #define IOAT_OP_XOR_VAL 0x88 308 uint32_t op:8; 309 } control; 310 } u; 311 uint64_t src_addr; 312 uint64_t dest_addr; 313 uint64_t next; 314 uint64_t src_addr2; 315 uint64_t src_addr3; 316 uint64_t src_addr4; 317 uint64_t src_addr5; 318 }; 319 320 struct ioat_xor_ext_hw_descriptor { 321 uint64_t src_addr6; 322 uint64_t src_addr7; 323 uint64_t src_addr8; 324 uint64_t next; 325 uint64_t reserved[4]; 326 }; 327 328 struct ioat_pq_hw_descriptor { 329 uint32_t size; 330 union { 331 uint32_t control_raw; 332 struct generic_dma_control control_generic; 333 struct { 334 uint32_t int_enable:1; 335 uint32_t src_snoop_disable:1; 336 uint32_t dest_snoop_disable:1; 337 uint32_t completion_update:1; 338 uint32_t fence:1; 339 uint32_t src_count:3; 340 uint32_t bundle:1; 341 uint32_t dest_dca:1; 342 uint32_t hint:1; 343 uint32_t p_disable:1; 344 uint32_t q_disable:1; 345 uint32_t reserved:11; 346 #define IOAT_OP_PQ 0x89 347 #define IOAT_OP_PQ_VAL 0x8a 348 uint32_t op:8; 349 } control; 350 } u; 351 uint64_t src_addr; 352 uint64_t p_addr; 353 uint64_t next; 354 uint64_t src_addr2; 355 uint64_t src_addr3; 356 uint8_t coef[8]; 357 uint64_t q_addr; 358 }; 359 360 struct ioat_pq_ext_hw_descriptor { 361 uint64_t src_addr4; 362 uint64_t src_addr5; 363 uint64_t src_addr6; 364 uint64_t next; 365 uint64_t src_addr7; 366 uint64_t src_addr8; 367 uint64_t reserved[2]; 368 }; 369 370 struct ioat_pq_update_hw_descriptor { 371 uint32_t size; 372 union { 373 uint32_t control_raw; 374 struct generic_dma_control control_generic; 375 struct { 376 uint32_t int_enable:1; 377 uint32_t src_snoop_disable:1; 378 uint32_t dest_snoop_disable:1; 379 uint32_t completion_update:1; 380 uint32_t fence:1; 381 uint32_t src_cnt:3; 382 uint32_t bundle:1; 383 uint32_t dest_dca:1; 384 uint32_t hint:1; 385 uint32_t p_disable:1; 386 uint32_t q_disable:1; 387 uint32_t reserved:3; 388 uint32_t coef:8; 389 #define IOAT_OP_PQ_UP 0x8b 390 uint32_t op:8; 391 } control; 392 } u; 393 uint64_t src_addr; 394 uint64_t p_addr; 395 uint64_t next; 396 uint64_t src_addr2; 397 uint64_t p_src; 398 uint64_t q_src; 399 uint64_t q_addr; 400 }; 401 402 struct ioat_raw_hw_descriptor { 403 uint64_t field[8]; 404 }; 405 406 struct bus_dmadesc { 407 bus_dmaengine_callback_t callback_fn; 408 void *callback_arg; 409 }; 410 411 struct ioat_descriptor { 412 struct bus_dmadesc bus_dmadesc; 413 uint32_t id; 414 bus_dmamap_t src_dmamap; 415 bus_dmamap_t dst_dmamap; 416 bus_dmamap_t src2_dmamap; 417 bus_dmamap_t dst2_dmamap; 418 }; 419 420 /* Unused by this driver at this time. */ 421 #define IOAT_OP_MARKER 0x84 422 423 /* 424 * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver 425 * doesn't support anything older than v3. 426 */ 427 #define IOAT_OP_OLD_XOR 0x85 428 #define IOAT_OP_OLD_XOR_VAL 0x86 429 430 /* One of these per allocated PCI device. */ 431 struct ioat_softc { 432 bus_dmaengine_t dmaengine; 433 #define to_ioat_softc(_dmaeng) \ 434 ({ \ 435 bus_dmaengine_t *_p = (_dmaeng); \ 436 (struct ioat_softc *)((char *)_p - \ 437 offsetof(struct ioat_softc, dmaengine)); \ 438 }) 439 440 device_t device; 441 int domain; 442 int cpu; 443 int version; 444 unsigned chan_idx; 445 446 bus_space_tag_t pci_bus_tag; 447 bus_space_handle_t pci_bus_handle; 448 struct resource *pci_resource; 449 int pci_resource_id; 450 uint32_t max_xfer_size; 451 uint32_t capabilities; 452 uint32_t ring_size_order; 453 uint16_t intrdelay_max; 454 uint16_t cached_intrdelay; 455 456 int rid; 457 struct resource *res; 458 void *tag; 459 460 bus_dma_tag_t hw_desc_tag; 461 bus_dmamap_t hw_desc_map; 462 463 bus_dma_tag_t data_tag; 464 465 bus_dma_tag_t comp_update_tag; 466 bus_dmamap_t comp_update_map; 467 uint64_t *comp_update; 468 bus_addr_t comp_update_bus_addr; 469 470 boolean_t quiescing; 471 boolean_t destroying; 472 boolean_t is_submitter_processing; 473 boolean_t intrdelay_supported; 474 boolean_t resetting; /* submit_lock */ 475 boolean_t resetting_cleanup; /* cleanup_lock */ 476 477 struct ioat_descriptor *ring; 478 479 union ioat_hw_descriptor { 480 struct ioat_generic_hw_descriptor generic; 481 struct ioat_dma_hw_descriptor dma; 482 struct ioat_fill_hw_descriptor fill; 483 struct ioat_crc32_hw_descriptor crc32; 484 struct ioat_xor_hw_descriptor xor; 485 struct ioat_xor_ext_hw_descriptor xor_ext; 486 struct ioat_pq_hw_descriptor pq; 487 struct ioat_pq_ext_hw_descriptor pq_ext; 488 struct ioat_raw_hw_descriptor raw; 489 } *hw_desc_ring; 490 bus_addr_t hw_desc_bus_addr; 491 #define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \ 492 (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor)) 493 494 struct mtx_padalign submit_lock; 495 struct callout poll_timer; 496 struct task reset_task; 497 struct mtx_padalign cleanup_lock; 498 499 uint32_t refcnt; 500 uint32_t head; 501 uint32_t acq_head; 502 uint32_t tail; 503 bus_addr_t last_seen; 504 505 struct { 506 uint64_t interrupts; 507 uint64_t descriptors_processed; 508 uint64_t descriptors_error; 509 uint64_t descriptors_submitted; 510 511 uint32_t channel_halts; 512 uint32_t last_halt_chanerr; 513 } stats; 514 }; 515 516 void ioat_test_attach(void); 517 void ioat_test_detach(void); 518 519 /* 520 * XXX DO NOT USE this routine for obtaining the current completed descriptor. 521 * 522 * The double_4 read on ioat<3.3 appears to result in torn reads. And v3.2 523 * hardware is still commonplace (Broadwell Xeon has it). Instead, use the 524 * device-pushed *comp_update. 525 * 526 * It is safe to use ioat_get_chansts() for the low status bits. 527 */ 528 static inline uint64_t 529 ioat_get_chansts(struct ioat_softc *ioat) 530 { 531 uint64_t status; 532 533 if (ioat->version >= IOAT_VER_3_3) 534 status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET); 535 else 536 /* Must read lower 4 bytes before upper 4 bytes. */ 537 status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET); 538 return (status); 539 } 540 541 static inline void 542 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr) 543 { 544 545 if (ioat->version >= IOAT_VER_3_3) 546 ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr); 547 else 548 ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr); 549 } 550 551 static inline void 552 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr) 553 { 554 555 if (ioat->version >= IOAT_VER_3_3) 556 ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr); 557 else 558 ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr); 559 } 560 561 static inline boolean_t 562 is_ioat_active(uint64_t status) 563 { 564 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); 565 } 566 567 static inline boolean_t 568 is_ioat_idle(uint64_t status) 569 { 570 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE); 571 } 572 573 static inline boolean_t 574 is_ioat_halted(uint64_t status) 575 { 576 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); 577 } 578 579 static inline boolean_t 580 is_ioat_suspended(uint64_t status) 581 { 582 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); 583 } 584 585 static inline void 586 ioat_suspend(struct ioat_softc *ioat) 587 { 588 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND); 589 } 590 591 static inline void 592 ioat_reset(struct ioat_softc *ioat) 593 { 594 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); 595 } 596 597 static inline boolean_t 598 ioat_reset_pending(struct ioat_softc *ioat) 599 { 600 uint8_t cmd; 601 602 cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET); 603 return ((cmd & IOAT_CHANCMD_RESET) != 0); 604 } 605 606 #endif /* __IOAT_INTERNAL_H__ */ 607