1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 __FBSDID("$FreeBSD$"); 28 29 #ifndef __IOAT_INTERNAL_H__ 30 #define __IOAT_INTERNAL_H__ 31 32 #include <sys/_task.h> 33 34 #define DEVICE2SOFTC(dev) ((struct ioat_softc *) device_get_softc(dev)) 35 #define KTR_IOAT KTR_SPARE3 36 37 #define ioat_read_chancnt(ioat) \ 38 ioat_read_1((ioat), IOAT_CHANCNT_OFFSET) 39 40 #define ioat_read_xfercap(ioat) \ 41 (ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK) 42 43 #define ioat_write_intrctrl(ioat, value) \ 44 ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value)) 45 46 #define ioat_read_cbver(ioat) \ 47 (ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF) 48 49 #define ioat_read_dmacapability(ioat) \ 50 ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET) 51 52 #define ioat_write_chanctrl(ioat, value) \ 53 ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value)) 54 55 static __inline uint64_t 56 ioat_bus_space_read_8_lower_first(bus_space_tag_t tag, 57 bus_space_handle_t handle, bus_size_t offset) 58 { 59 return (bus_space_read_4(tag, handle, offset) | 60 ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32); 61 } 62 63 static __inline void 64 ioat_bus_space_write_8_lower_first(bus_space_tag_t tag, 65 bus_space_handle_t handle, bus_size_t offset, uint64_t val) 66 { 67 bus_space_write_4(tag, handle, offset, val); 68 bus_space_write_4(tag, handle, offset + 4, val >> 32); 69 } 70 71 #ifdef __i386__ 72 #define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first 73 #define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first 74 #else 75 #define ioat_bus_space_read_8(tag, handle, offset) \ 76 bus_space_read_8((tag), (handle), (offset)) 77 #define ioat_bus_space_write_8(tag, handle, offset, val) \ 78 bus_space_write_8((tag), (handle), (offset), (val)) 79 #endif 80 81 #define ioat_read_1(ioat, offset) \ 82 bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 83 (offset)) 84 85 #define ioat_read_2(ioat, offset) \ 86 bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 87 (offset)) 88 89 #define ioat_read_4(ioat, offset) \ 90 bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 91 (offset)) 92 93 #define ioat_read_8(ioat, offset) \ 94 ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 95 (offset)) 96 97 #define ioat_read_double_4(ioat, offset) \ 98 ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \ 99 (ioat)->pci_bus_handle, (offset)) 100 101 #define ioat_write_1(ioat, offset, value) \ 102 bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 103 (offset), (value)) 104 105 #define ioat_write_2(ioat, offset, value) \ 106 bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 107 (offset), (value)) 108 109 #define ioat_write_4(ioat, offset, value) \ 110 bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 111 (offset), (value)) 112 113 #define ioat_write_8(ioat, offset, value) \ 114 ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \ 115 (offset), (value)) 116 117 #define ioat_write_double_4(ioat, offset, value) \ 118 ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \ 119 (ioat)->pci_bus_handle, (offset), (value)) 120 121 MALLOC_DECLARE(M_IOAT); 122 123 SYSCTL_DECL(_hw_ioat); 124 125 extern int g_ioat_debug_level; 126 127 struct generic_dma_control { 128 uint32_t int_enable:1; 129 uint32_t src_snoop_disable:1; 130 uint32_t dest_snoop_disable:1; 131 uint32_t completion_update:1; 132 uint32_t fence:1; 133 uint32_t reserved1:1; 134 uint32_t src_page_break:1; 135 uint32_t dest_page_break:1; 136 uint32_t bundle:1; 137 uint32_t dest_dca:1; 138 uint32_t hint:1; 139 uint32_t reserved2:13; 140 uint32_t op:8; 141 }; 142 143 struct ioat_generic_hw_descriptor { 144 uint32_t size; 145 union { 146 uint32_t control_raw; 147 struct generic_dma_control control_generic; 148 } u; 149 uint64_t src_addr; 150 uint64_t dest_addr; 151 uint64_t next; 152 uint64_t reserved[4]; 153 }; 154 155 struct ioat_dma_hw_descriptor { 156 uint32_t size; 157 union { 158 uint32_t control_raw; 159 struct generic_dma_control control_generic; 160 struct { 161 uint32_t int_enable:1; 162 uint32_t src_snoop_disable:1; 163 uint32_t dest_snoop_disable:1; 164 uint32_t completion_update:1; 165 uint32_t fence:1; 166 uint32_t null:1; 167 uint32_t src_page_break:1; 168 uint32_t dest_page_break:1; 169 uint32_t bundle:1; 170 uint32_t dest_dca:1; 171 uint32_t hint:1; 172 uint32_t reserved:13; 173 #define IOAT_OP_COPY 0x00 174 uint32_t op:8; 175 } control; 176 } u; 177 uint64_t src_addr; 178 uint64_t dest_addr; 179 uint64_t next; 180 uint64_t next_src_addr; 181 uint64_t next_dest_addr; 182 uint64_t user1; 183 uint64_t user2; 184 }; 185 186 struct ioat_fill_hw_descriptor { 187 uint32_t size; 188 union { 189 uint32_t control_raw; 190 struct generic_dma_control control_generic; 191 struct { 192 uint32_t int_enable:1; 193 uint32_t reserved:1; 194 uint32_t dest_snoop_disable:1; 195 uint32_t completion_update:1; 196 uint32_t fence:1; 197 uint32_t reserved2:2; 198 uint32_t dest_page_break:1; 199 uint32_t bundle:1; 200 uint32_t reserved3:15; 201 #define IOAT_OP_FILL 0x01 202 uint32_t op:8; 203 } control; 204 } u; 205 uint64_t src_data; 206 uint64_t dest_addr; 207 uint64_t next; 208 uint64_t reserved; 209 uint64_t next_dest_addr; 210 uint64_t user1; 211 uint64_t user2; 212 }; 213 214 struct ioat_crc32_hw_descriptor { 215 uint32_t size; 216 union { 217 uint32_t control_raw; 218 struct generic_dma_control control_generic; 219 struct { 220 uint32_t int_enable:1; 221 uint32_t src_snoop_disable:1; 222 uint32_t dest_snoop_disable:1; 223 uint32_t completion_update:1; 224 uint32_t fence:1; 225 uint32_t reserved1:3; 226 uint32_t bundle:1; 227 uint32_t dest_dca:1; 228 uint32_t hint:1; 229 uint32_t use_seed:1; 230 /* 231 * crc_location: 232 * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST: 233 * 0: comparison value is pointed to by CRC Address 234 * field. 235 * 1: comparison value follows data in wire format 236 * ("inverted reflected bit order") in the 4 bytes 237 * following the source data. 238 * 239 * For IOAT_OP_CRC_STORE: 240 * 0: Result will be stored at location pointed to by 241 * CRC Address field (in wire format). 242 * 1: Result will be stored directly following the 243 * source data. 244 * 245 * For IOAT_OP_MOVECRC_STORE: 246 * 0: Result will be stored at location pointed to by 247 * CRC Address field (in wire format). 248 * 1: Result will be stored directly following the 249 * *destination* data. 250 */ 251 uint32_t crc_location:1; 252 uint32_t reserved2:11; 253 /* 254 * MOVECRC - Move data in the same way as standard copy 255 * operation, but also compute CRC32. 256 * 257 * CRC - Only compute CRC on source data. 258 * 259 * There is a CRC accumulator register in the hardware. 260 * If 'initial' is set, it is initialized to the value 261 * in 'seed.' 262 * 263 * In all modes, these operators accumulate size bytes 264 * at src_addr into the running CRC32C. 265 * 266 * Store mode emits the accumulated CRC, in wire 267 * format, as specified by the crc_location bit above. 268 * 269 * Test mode compares the accumulated CRC against the 270 * reference CRC, as described in crc_location above. 271 * On failure, halts the DMA engine with a CRC error 272 * status. 273 */ 274 #define IOAT_OP_MOVECRC 0x41 275 #define IOAT_OP_MOVECRC_TEST 0x42 276 #define IOAT_OP_MOVECRC_STORE 0x43 277 #define IOAT_OP_CRC 0x81 278 #define IOAT_OP_CRC_TEST 0x82 279 #define IOAT_OP_CRC_STORE 0x83 280 uint32_t op:8; 281 } control; 282 } u; 283 uint64_t src_addr; 284 uint64_t dest_addr; 285 uint64_t next; 286 uint64_t next_src_addr; 287 uint64_t next_dest_addr; 288 uint32_t seed; 289 uint32_t reserved; 290 uint64_t crc_address; 291 }; 292 293 struct ioat_xor_hw_descriptor { 294 uint32_t size; 295 union { 296 uint32_t control_raw; 297 struct generic_dma_control control_generic; 298 struct { 299 uint32_t int_enable:1; 300 uint32_t src_snoop_disable:1; 301 uint32_t dest_snoop_disable:1; 302 uint32_t completion_update:1; 303 uint32_t fence:1; 304 uint32_t src_count:3; 305 uint32_t bundle:1; 306 uint32_t dest_dca:1; 307 uint32_t hint:1; 308 uint32_t reserved:13; 309 #define IOAT_OP_XOR 0x87 310 #define IOAT_OP_XOR_VAL 0x88 311 uint32_t op:8; 312 } control; 313 } u; 314 uint64_t src_addr; 315 uint64_t dest_addr; 316 uint64_t next; 317 uint64_t src_addr2; 318 uint64_t src_addr3; 319 uint64_t src_addr4; 320 uint64_t src_addr5; 321 }; 322 323 struct ioat_xor_ext_hw_descriptor { 324 uint64_t src_addr6; 325 uint64_t src_addr7; 326 uint64_t src_addr8; 327 uint64_t next; 328 uint64_t reserved[4]; 329 }; 330 331 struct ioat_pq_hw_descriptor { 332 uint32_t size; 333 union { 334 uint32_t control_raw; 335 struct generic_dma_control control_generic; 336 struct { 337 uint32_t int_enable:1; 338 uint32_t src_snoop_disable:1; 339 uint32_t dest_snoop_disable:1; 340 uint32_t completion_update:1; 341 uint32_t fence:1; 342 uint32_t src_count:3; 343 uint32_t bundle:1; 344 uint32_t dest_dca:1; 345 uint32_t hint:1; 346 uint32_t p_disable:1; 347 uint32_t q_disable:1; 348 uint32_t reserved:11; 349 #define IOAT_OP_PQ 0x89 350 #define IOAT_OP_PQ_VAL 0x8a 351 uint32_t op:8; 352 } control; 353 } u; 354 uint64_t src_addr; 355 uint64_t p_addr; 356 uint64_t next; 357 uint64_t src_addr2; 358 uint64_t src_addr3; 359 uint8_t coef[8]; 360 uint64_t q_addr; 361 }; 362 363 struct ioat_pq_ext_hw_descriptor { 364 uint64_t src_addr4; 365 uint64_t src_addr5; 366 uint64_t src_addr6; 367 uint64_t next; 368 uint64_t src_addr7; 369 uint64_t src_addr8; 370 uint64_t reserved[2]; 371 }; 372 373 struct ioat_pq_update_hw_descriptor { 374 uint32_t size; 375 union { 376 uint32_t control_raw; 377 struct generic_dma_control control_generic; 378 struct { 379 uint32_t int_enable:1; 380 uint32_t src_snoop_disable:1; 381 uint32_t dest_snoop_disable:1; 382 uint32_t completion_update:1; 383 uint32_t fence:1; 384 uint32_t src_cnt:3; 385 uint32_t bundle:1; 386 uint32_t dest_dca:1; 387 uint32_t hint:1; 388 uint32_t p_disable:1; 389 uint32_t q_disable:1; 390 uint32_t reserved:3; 391 uint32_t coef:8; 392 #define IOAT_OP_PQ_UP 0x8b 393 uint32_t op:8; 394 } control; 395 } u; 396 uint64_t src_addr; 397 uint64_t p_addr; 398 uint64_t next; 399 uint64_t src_addr2; 400 uint64_t p_src; 401 uint64_t q_src; 402 uint64_t q_addr; 403 }; 404 405 struct ioat_raw_hw_descriptor { 406 uint64_t field[8]; 407 }; 408 409 struct bus_dmadesc { 410 bus_dmaengine_callback_t callback_fn; 411 void *callback_arg; 412 }; 413 414 struct ioat_descriptor { 415 struct bus_dmadesc bus_dmadesc; 416 uint32_t id; 417 bus_dmamap_t src_dmamap; 418 bus_dmamap_t dst_dmamap; 419 bus_dmamap_t src2_dmamap; 420 bus_dmamap_t dst2_dmamap; 421 bus_dmamap_t crc_dmamap; 422 }; 423 424 /* Unused by this driver at this time. */ 425 #define IOAT_OP_MARKER 0x84 426 427 /* 428 * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver 429 * doesn't support anything older than v3. 430 */ 431 #define IOAT_OP_OLD_XOR 0x85 432 #define IOAT_OP_OLD_XOR_VAL 0x86 433 434 /* One of these per allocated PCI device. */ 435 struct ioat_softc { 436 bus_dmaengine_t dmaengine; 437 #define to_ioat_softc(_dmaeng) \ 438 ({ \ 439 bus_dmaengine_t *_p = (_dmaeng); \ 440 (struct ioat_softc *)((char *)_p - \ 441 offsetof(struct ioat_softc, dmaengine)); \ 442 }) 443 444 device_t device; 445 int version; 446 unsigned chan_idx; 447 448 bus_space_tag_t pci_bus_tag; 449 bus_space_handle_t pci_bus_handle; 450 struct resource *pci_resource; 451 int pci_resource_id; 452 uint32_t max_xfer_size; 453 uint32_t capabilities; 454 uint32_t ring_size_order; 455 uint16_t intrdelay_max; 456 uint16_t cached_intrdelay; 457 458 int rid; 459 struct resource *res; 460 void *tag; 461 462 bus_dma_tag_t hw_desc_tag; 463 bus_dmamap_t hw_desc_map; 464 465 bus_dma_tag_t data_tag; 466 bus_dma_tag_t data_crc_tag; 467 468 bus_dma_tag_t comp_update_tag; 469 bus_dmamap_t comp_update_map; 470 uint64_t *comp_update; 471 bus_addr_t comp_update_bus_addr; 472 473 boolean_t quiescing; 474 boolean_t destroying; 475 boolean_t is_submitter_processing; 476 boolean_t intrdelay_supported; 477 boolean_t resetting; /* submit_lock */ 478 boolean_t resetting_cleanup; /* cleanup_lock */ 479 480 struct ioat_descriptor *ring; 481 482 union ioat_hw_descriptor { 483 struct ioat_generic_hw_descriptor generic; 484 struct ioat_dma_hw_descriptor dma; 485 struct ioat_fill_hw_descriptor fill; 486 struct ioat_crc32_hw_descriptor crc32; 487 struct ioat_xor_hw_descriptor xor; 488 struct ioat_xor_ext_hw_descriptor xor_ext; 489 struct ioat_pq_hw_descriptor pq; 490 struct ioat_pq_ext_hw_descriptor pq_ext; 491 struct ioat_raw_hw_descriptor raw; 492 } *hw_desc_ring; 493 bus_addr_t hw_desc_bus_addr; 494 #define RING_PHYS_ADDR(sc, i) (sc)->hw_desc_bus_addr + \ 495 (((i) % (1 << (sc)->ring_size_order)) * sizeof(struct ioat_dma_hw_descriptor)) 496 497 struct mtx_padalign submit_lock; 498 struct callout poll_timer; 499 struct task reset_task; 500 struct mtx_padalign cleanup_lock; 501 502 uint32_t refcnt; 503 uint32_t head; 504 uint32_t acq_head; 505 uint32_t tail; 506 bus_addr_t last_seen; 507 508 struct { 509 uint64_t interrupts; 510 uint64_t descriptors_processed; 511 uint64_t descriptors_error; 512 uint64_t descriptors_submitted; 513 514 uint32_t channel_halts; 515 uint32_t last_halt_chanerr; 516 } stats; 517 }; 518 519 void ioat_test_attach(void); 520 void ioat_test_detach(void); 521 522 /* 523 * XXX DO NOT USE this routine for obtaining the current completed descriptor. 524 * 525 * The double_4 read on ioat<3.3 appears to result in torn reads. And v3.2 526 * hardware is still commonplace (Broadwell Xeon has it). Instead, use the 527 * device-pushed *comp_update. 528 * 529 * It is safe to use ioat_get_chansts() for the low status bits. 530 */ 531 static inline uint64_t 532 ioat_get_chansts(struct ioat_softc *ioat) 533 { 534 uint64_t status; 535 536 if (ioat->version >= IOAT_VER_3_3) 537 status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET); 538 else 539 /* Must read lower 4 bytes before upper 4 bytes. */ 540 status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET); 541 return (status); 542 } 543 544 static inline void 545 ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr) 546 { 547 548 if (ioat->version >= IOAT_VER_3_3) 549 ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr); 550 else 551 ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr); 552 } 553 554 static inline void 555 ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr) 556 { 557 558 if (ioat->version >= IOAT_VER_3_3) 559 ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr); 560 else 561 ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr); 562 } 563 564 static inline boolean_t 565 is_ioat_active(uint64_t status) 566 { 567 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE); 568 } 569 570 static inline boolean_t 571 is_ioat_idle(uint64_t status) 572 { 573 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE); 574 } 575 576 static inline boolean_t 577 is_ioat_halted(uint64_t status) 578 { 579 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED); 580 } 581 582 static inline boolean_t 583 is_ioat_suspended(uint64_t status) 584 { 585 return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED); 586 } 587 588 static inline void 589 ioat_suspend(struct ioat_softc *ioat) 590 { 591 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND); 592 } 593 594 static inline void 595 ioat_reset(struct ioat_softc *ioat) 596 { 597 ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); 598 } 599 600 static inline boolean_t 601 ioat_reset_pending(struct ioat_softc *ioat) 602 { 603 uint8_t cmd; 604 605 cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET); 606 return ((cmd & IOAT_CHANCMD_RESET) != 0); 607 } 608 609 #endif /* __IOAT_INTERNAL_H__ */ 610