1 /****************************************************************************** 2 3 Copyright (c) 2006, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/linker.h> 40 #include <sys/firmware.h> 41 #include <sys/endian.h> 42 #include <sys/sockio.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/memrange.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 #include <net/zlib.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <machine/bus.h> 72 #include <machine/resource.h> 73 #include <sys/bus.h> 74 #include <sys/rman.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 79 #include <vm/vm.h> /* for pmap_mapdev() */ 80 #include <vm/pmap.h> 81 82 #include <dev/mxge/mxge_mcp.h> 83 #include <dev/mxge/mcp_gen_header.h> 84 #include <dev/mxge/if_mxge_var.h> 85 86 /* tunable params */ 87 static int mxge_nvidia_ecrc_enable = 1; 88 static int mxge_force_firmware = 0; 89 static int mxge_max_intr_slots = 1024; 90 static int mxge_intr_coal_delay = 30; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_verbose = 0; 94 static int mxge_ticks; 95 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 96 static char *mxge_fw_aligned = "mxge_eth_z8e"; 97 98 static int mxge_probe(device_t dev); 99 static int mxge_attach(device_t dev); 100 static int mxge_detach(device_t dev); 101 static int mxge_shutdown(device_t dev); 102 static void mxge_intr(void *arg); 103 104 static device_method_t mxge_methods[] = 105 { 106 /* Device interface */ 107 DEVMETHOD(device_probe, mxge_probe), 108 DEVMETHOD(device_attach, mxge_attach), 109 DEVMETHOD(device_detach, mxge_detach), 110 DEVMETHOD(device_shutdown, mxge_shutdown), 111 {0, 0} 112 }; 113 114 static driver_t mxge_driver = 115 { 116 "mxge", 117 mxge_methods, 118 sizeof(mxge_softc_t), 119 }; 120 121 static devclass_t mxge_devclass; 122 123 /* Declare ourselves to be a child of the PCI bus.*/ 124 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 125 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 126 127 static int mxge_load_firmware(mxge_softc_t *sc); 128 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 129 130 static int 131 mxge_probe(device_t dev) 132 { 133 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 134 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 135 device_set_desc(dev, "Myri10G-PCIE-8A"); 136 return 0; 137 } 138 return ENXIO; 139 } 140 141 static void 142 mxge_enable_wc(mxge_softc_t *sc) 143 { 144 struct mem_range_desc mrdesc; 145 vm_paddr_t pa; 146 vm_offset_t len; 147 int err, action; 148 149 pa = rman_get_start(sc->mem_res); 150 len = rman_get_size(sc->mem_res); 151 mrdesc.mr_base = pa; 152 mrdesc.mr_len = len; 153 mrdesc.mr_flags = MDF_WRITECOMBINE; 154 action = MEMRANGE_SET_UPDATE; 155 strcpy((char *)&mrdesc.mr_owner, "mxge"); 156 err = mem_range_attr_set(&mrdesc, &action); 157 if (err != 0) { 158 device_printf(sc->dev, 159 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 160 (unsigned long)pa, (unsigned long)len, err); 161 } else { 162 sc->wc = 1; 163 } 164 } 165 166 167 /* callback to get our DMA address */ 168 static void 169 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 170 int error) 171 { 172 if (error == 0) { 173 *(bus_addr_t *) arg = segs->ds_addr; 174 } 175 } 176 177 static int 178 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 179 bus_size_t alignment) 180 { 181 int err; 182 device_t dev = sc->dev; 183 184 /* allocate DMAable memory tags */ 185 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 186 alignment, /* alignment */ 187 4096, /* boundary */ 188 BUS_SPACE_MAXADDR, /* low */ 189 BUS_SPACE_MAXADDR, /* high */ 190 NULL, NULL, /* filter */ 191 bytes, /* maxsize */ 192 1, /* num segs */ 193 4096, /* maxsegsize */ 194 BUS_DMA_COHERENT, /* flags */ 195 NULL, NULL, /* lock */ 196 &dma->dmat); /* tag */ 197 if (err != 0) { 198 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 199 return err; 200 } 201 202 /* allocate DMAable memory & map */ 203 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 204 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 205 | BUS_DMA_ZERO), &dma->map); 206 if (err != 0) { 207 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 208 goto abort_with_dmat; 209 } 210 211 /* load the memory */ 212 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 213 mxge_dmamap_callback, 214 (void *)&dma->bus_addr, 0); 215 if (err != 0) { 216 device_printf(dev, "couldn't load map (err = %d)\n", err); 217 goto abort_with_mem; 218 } 219 return 0; 220 221 abort_with_mem: 222 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 223 abort_with_dmat: 224 (void)bus_dma_tag_destroy(dma->dmat); 225 return err; 226 } 227 228 229 static void 230 mxge_dma_free(mxge_dma_t *dma) 231 { 232 bus_dmamap_unload(dma->dmat, dma->map); 233 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 234 (void)bus_dma_tag_destroy(dma->dmat); 235 } 236 237 /* 238 * The eeprom strings on the lanaiX have the format 239 * SN=x\0 240 * MAC=x:x:x:x:x:x\0 241 * PC=text\0 242 */ 243 244 static int 245 mxge_parse_strings(mxge_softc_t *sc) 246 { 247 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 248 249 char *ptr, *limit; 250 int i, found_mac; 251 252 ptr = sc->eeprom_strings; 253 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 254 found_mac = 0; 255 while (ptr < limit && *ptr != '\0') { 256 if (memcmp(ptr, "MAC=", 4) == 0) { 257 ptr += 1; 258 sc->mac_addr_string = ptr; 259 for (i = 0; i < 6; i++) { 260 ptr += 3; 261 if ((ptr + 2) > limit) 262 goto abort; 263 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 264 found_mac = 1; 265 } 266 } else if (memcmp(ptr, "PC=", 3) == 0) { 267 ptr += 3; 268 strncpy(sc->product_code_string, ptr, 269 sizeof (sc->product_code_string) - 1); 270 } else if (memcmp(ptr, "SN=", 3) == 0) { 271 ptr += 3; 272 strncpy(sc->serial_number_string, ptr, 273 sizeof (sc->serial_number_string) - 1); 274 } 275 MXGE_NEXT_STRING(ptr); 276 } 277 278 if (found_mac) 279 return 0; 280 281 abort: 282 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 283 284 return ENXIO; 285 } 286 287 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 288 static void 289 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 290 { 291 uint32_t val; 292 unsigned long base, off; 293 char *va, *cfgptr; 294 device_t pdev, mcp55; 295 uint16_t vendor_id, device_id, word; 296 uintptr_t bus, slot, func, ivend, idev; 297 uint32_t *ptr32; 298 299 300 if (!mxge_nvidia_ecrc_enable) 301 return; 302 303 pdev = device_get_parent(device_get_parent(sc->dev)); 304 if (pdev == NULL) { 305 device_printf(sc->dev, "could not find parent?\n"); 306 return; 307 } 308 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 309 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 310 311 if (vendor_id != 0x10de) 312 return; 313 314 base = 0; 315 316 if (device_id == 0x005d) { 317 /* ck804, base address is magic */ 318 base = 0xe0000000UL; 319 } else if (device_id >= 0x0374 && device_id <= 0x378) { 320 /* mcp55, base address stored in chipset */ 321 mcp55 = pci_find_bsf(0, 0, 0); 322 if (mcp55 && 323 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 324 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 325 word = pci_read_config(mcp55, 0x90, 2); 326 base = ((unsigned long)word & 0x7ffeU) << 25; 327 } 328 } 329 if (!base) 330 return; 331 332 /* XXXX 333 Test below is commented because it is believed that doing 334 config read/write beyond 0xff will access the config space 335 for the next larger function. Uncomment this and remove 336 the hacky pmap_mapdev() way of accessing config space when 337 FreeBSD grows support for extended pcie config space access 338 */ 339 #if 0 340 /* See if we can, by some miracle, access the extended 341 config space */ 342 val = pci_read_config(pdev, 0x178, 4); 343 if (val != 0xffffffff) { 344 val |= 0x40; 345 pci_write_config(pdev, 0x178, val, 4); 346 return; 347 } 348 #endif 349 /* Rather than using normal pci config space writes, we must 350 * map the Nvidia config space ourselves. This is because on 351 * opteron/nvidia class machine the 0xe000000 mapping is 352 * handled by the nvidia chipset, that means the internal PCI 353 * device (the on-chip northbridge), or the amd-8131 bridge 354 * and things behind them are not visible by this method. 355 */ 356 357 BUS_READ_IVAR(device_get_parent(pdev), pdev, 358 PCI_IVAR_BUS, &bus); 359 BUS_READ_IVAR(device_get_parent(pdev), pdev, 360 PCI_IVAR_SLOT, &slot); 361 BUS_READ_IVAR(device_get_parent(pdev), pdev, 362 PCI_IVAR_FUNCTION, &func); 363 BUS_READ_IVAR(device_get_parent(pdev), pdev, 364 PCI_IVAR_VENDOR, &ivend); 365 BUS_READ_IVAR(device_get_parent(pdev), pdev, 366 PCI_IVAR_DEVICE, &idev); 367 368 off = base 369 + 0x00100000UL * (unsigned long)bus 370 + 0x00001000UL * (unsigned long)(func 371 + 8 * slot); 372 373 /* map it into the kernel */ 374 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 375 376 377 if (va == NULL) { 378 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 379 return; 380 } 381 /* get a pointer to the config space mapped into the kernel */ 382 cfgptr = va + (off & PAGE_MASK); 383 384 /* make sure that we can really access it */ 385 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 386 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 387 if (! (vendor_id == ivend && device_id == idev)) { 388 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 389 vendor_id, device_id); 390 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 391 return; 392 } 393 394 ptr32 = (uint32_t*)(cfgptr + 0x178); 395 val = *ptr32; 396 397 if (val == 0xffffffff) { 398 device_printf(sc->dev, "extended mapping failed\n"); 399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 400 return; 401 } 402 *ptr32 = val | 0x40; 403 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 404 if (mxge_verbose) 405 device_printf(sc->dev, 406 "Enabled ECRC on upstream Nvidia bridge " 407 "at %d:%d:%d\n", 408 (int)bus, (int)slot, (int)func); 409 return; 410 } 411 #else 412 static void 413 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 414 { 415 device_printf(sc->dev, 416 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 417 return; 418 } 419 #endif 420 421 422 static int 423 mxge_dma_test(mxge_softc_t *sc, int test_type) 424 { 425 mxge_cmd_t cmd; 426 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 427 int status; 428 uint32_t len; 429 char *test = " "; 430 431 432 /* Run a small DMA test. 433 * The magic multipliers to the length tell the firmware 434 * to do DMA read, write, or read+write tests. The 435 * results are returned in cmd.data0. The upper 16 436 * bits of the return is the number of transfers completed. 437 * The lower 16 bits is the time in 0.5us ticks that the 438 * transfers took to complete. 439 */ 440 441 len = sc->tx.boundary; 442 443 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 444 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 445 cmd.data2 = len * 0x10000; 446 status = mxge_send_cmd(sc, test_type, &cmd); 447 if (status != 0) { 448 test = "read"; 449 goto abort; 450 } 451 sc->read_dma = ((cmd.data0>>16) * len * 2) / 452 (cmd.data0 & 0xffff); 453 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 454 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 455 cmd.data2 = len * 0x1; 456 status = mxge_send_cmd(sc, test_type, &cmd); 457 if (status != 0) { 458 test = "write"; 459 goto abort; 460 } 461 sc->write_dma = ((cmd.data0>>16) * len * 2) / 462 (cmd.data0 & 0xffff); 463 464 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 465 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 466 cmd.data2 = len * 0x10001; 467 status = mxge_send_cmd(sc, test_type, &cmd); 468 if (status != 0) { 469 test = "read/write"; 470 goto abort; 471 } 472 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 473 (cmd.data0 & 0xffff); 474 475 abort: 476 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 477 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 478 test, status); 479 480 return status; 481 } 482 483 /* 484 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 485 * when the PCI-E Completion packets are aligned on an 8-byte 486 * boundary. Some PCI-E chip sets always align Completion packets; on 487 * the ones that do not, the alignment can be enforced by enabling 488 * ECRC generation (if supported). 489 * 490 * When PCI-E Completion packets are not aligned, it is actually more 491 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 492 * 493 * If the driver can neither enable ECRC nor verify that it has 494 * already been enabled, then it must use a firmware image which works 495 * around unaligned completion packets (ethp_z8e.dat), and it should 496 * also ensure that it never gives the device a Read-DMA which is 497 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 498 * enabled, then the driver should use the aligned (eth_z8e.dat) 499 * firmware image, and set tx.boundary to 4KB. 500 */ 501 502 static int 503 mxge_firmware_probe(mxge_softc_t *sc) 504 { 505 device_t dev = sc->dev; 506 int reg, status; 507 uint16_t pectl; 508 509 sc->tx.boundary = 4096; 510 /* 511 * Verify the max read request size was set to 4KB 512 * before trying the test with 4KB. 513 */ 514 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 515 pectl = pci_read_config(dev, reg + 0x8, 2); 516 if ((pectl & (5 << 12)) != (5 << 12)) { 517 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 518 pectl); 519 sc->tx.boundary = 2048; 520 } 521 } 522 523 /* 524 * load the optimized firmware (which assumes aligned PCIe 525 * completions) in order to see if it works on this host. 526 */ 527 sc->fw_name = mxge_fw_aligned; 528 status = mxge_load_firmware(sc); 529 if (status != 0) { 530 return status; 531 } 532 533 /* 534 * Enable ECRC if possible 535 */ 536 mxge_enable_nvidia_ecrc(sc); 537 538 /* 539 * Run a DMA test which watches for unaligned completions and 540 * aborts on the first one seen. 541 */ 542 543 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 544 if (status == 0) 545 return 0; /* keep the aligned firmware */ 546 547 if (status != E2BIG) 548 device_printf(dev, "DMA test failed: %d\n", status); 549 if (status == ENOSYS) 550 device_printf(dev, "Falling back to ethp! " 551 "Please install up to date fw\n"); 552 return status; 553 } 554 555 static int 556 mxge_select_firmware(mxge_softc_t *sc) 557 { 558 int aligned = 0; 559 560 561 if (mxge_force_firmware != 0) { 562 if (mxge_force_firmware == 1) 563 aligned = 1; 564 else 565 aligned = 0; 566 if (mxge_verbose) 567 device_printf(sc->dev, 568 "Assuming %s completions (forced)\n", 569 aligned ? "aligned" : "unaligned"); 570 goto abort; 571 } 572 573 /* if the PCIe link width is 4 or less, we can use the aligned 574 firmware and skip any checks */ 575 if (sc->link_width != 0 && sc->link_width <= 4) { 576 device_printf(sc->dev, 577 "PCIe x%d Link, expect reduced performance\n", 578 sc->link_width); 579 aligned = 1; 580 goto abort; 581 } 582 583 if (0 == mxge_firmware_probe(sc)) 584 return 0; 585 586 abort: 587 if (aligned) { 588 sc->fw_name = mxge_fw_aligned; 589 sc->tx.boundary = 4096; 590 } else { 591 sc->fw_name = mxge_fw_unaligned; 592 sc->tx.boundary = 2048; 593 } 594 return (mxge_load_firmware(sc)); 595 } 596 597 union qualhack 598 { 599 const char *ro_char; 600 char *rw_char; 601 }; 602 603 static int 604 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 605 { 606 607 608 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 609 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 610 be32toh(hdr->mcp_type)); 611 return EIO; 612 } 613 614 /* save firmware version for sysctl */ 615 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 616 if (mxge_verbose) 617 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 618 619 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 620 &sc->fw_ver_minor, &sc->fw_ver_tiny); 621 622 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 623 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 624 device_printf(sc->dev, "Found firmware version %s\n", 625 sc->fw_version); 626 device_printf(sc->dev, "Driver needs %d.%d\n", 627 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 628 return EINVAL; 629 } 630 return 0; 631 632 } 633 634 static int 635 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 636 { 637 const struct firmware *fw; 638 const mcp_gen_header_t *hdr; 639 unsigned hdr_offset; 640 const char *fw_data; 641 union qualhack hack; 642 int status; 643 unsigned int i; 644 char dummy; 645 646 647 fw = firmware_get(sc->fw_name); 648 649 if (fw == NULL) { 650 device_printf(sc->dev, "Could not find firmware image %s\n", 651 sc->fw_name); 652 return ENOENT; 653 } 654 if (fw->datasize > *limit || 655 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 656 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 657 sc->fw_name, (int)fw->datasize, (int) *limit); 658 status = ENOSPC; 659 goto abort_with_fw; 660 } 661 *limit = fw->datasize; 662 663 /* check id */ 664 fw_data = (const char *)fw->data; 665 hdr_offset = htobe32(*(const uint32_t *) 666 (fw_data + MCP_HEADER_PTR_OFFSET)); 667 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 668 device_printf(sc->dev, "Bad firmware file"); 669 status = EIO; 670 goto abort_with_fw; 671 } 672 hdr = (const void*)(fw_data + hdr_offset); 673 674 status = mxge_validate_firmware(sc, hdr); 675 if (status != 0) 676 goto abort_with_fw; 677 678 hack.ro_char = fw_data; 679 /* Copy the inflated firmware to NIC SRAM. */ 680 for (i = 0; i < *limit; i += 256) { 681 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 682 hack.rw_char + i, 683 min(256U, (unsigned)(*limit - i))); 684 mb(); 685 dummy = *sc->sram; 686 mb(); 687 } 688 689 status = 0; 690 abort_with_fw: 691 firmware_put(fw, FIRMWARE_UNLOAD); 692 return status; 693 } 694 695 /* 696 * Enable or disable periodic RDMAs from the host to make certain 697 * chipsets resend dropped PCIe messages 698 */ 699 700 static void 701 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 702 { 703 char buf_bytes[72]; 704 volatile uint32_t *confirm; 705 volatile char *submit; 706 uint32_t *buf, dma_low, dma_high; 707 int i; 708 709 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 710 711 /* clear confirmation addr */ 712 confirm = (volatile uint32_t *)sc->cmd; 713 *confirm = 0; 714 mb(); 715 716 /* send an rdma command to the PCIe engine, and wait for the 717 response in the confirmation address. The firmware should 718 write a -1 there to indicate it is alive and well 719 */ 720 721 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 722 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 723 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 724 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 725 buf[2] = htobe32(0xffffffff); /* confirm data */ 726 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 727 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 728 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 729 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 730 buf[5] = htobe32(enable); /* enable? */ 731 732 733 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 734 735 mxge_pio_copy(submit, buf, 64); 736 mb(); 737 DELAY(1000); 738 mb(); 739 i = 0; 740 while (*confirm != 0xffffffff && i < 20) { 741 DELAY(1000); 742 i++; 743 } 744 if (*confirm != 0xffffffff) { 745 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 746 (enable ? "enable" : "disable"), confirm, 747 *confirm); 748 } 749 return; 750 } 751 752 static int 753 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 754 { 755 mcp_cmd_t *buf; 756 char buf_bytes[sizeof(*buf) + 8]; 757 volatile mcp_cmd_response_t *response = sc->cmd; 758 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 759 uint32_t dma_low, dma_high; 760 int err, sleep_total = 0; 761 762 /* ensure buf is aligned to 8 bytes */ 763 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 764 765 buf->data0 = htobe32(data->data0); 766 buf->data1 = htobe32(data->data1); 767 buf->data2 = htobe32(data->data2); 768 buf->cmd = htobe32(cmd); 769 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 770 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 771 772 buf->response_addr.low = htobe32(dma_low); 773 buf->response_addr.high = htobe32(dma_high); 774 mtx_lock(&sc->cmd_mtx); 775 response->result = 0xffffffff; 776 mb(); 777 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 778 779 /* wait up to 20ms */ 780 err = EAGAIN; 781 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 782 bus_dmamap_sync(sc->cmd_dma.dmat, 783 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 784 mb(); 785 switch (be32toh(response->result)) { 786 case 0: 787 data->data0 = be32toh(response->data); 788 err = 0; 789 break; 790 case 0xffffffff: 791 DELAY(1000); 792 break; 793 case MXGEFW_CMD_UNKNOWN: 794 err = ENOSYS; 795 break; 796 case MXGEFW_CMD_ERROR_UNALIGNED: 797 err = E2BIG; 798 break; 799 default: 800 device_printf(sc->dev, 801 "mxge: command %d " 802 "failed, result = %d\n", 803 cmd, be32toh(response->result)); 804 err = ENXIO; 805 break; 806 } 807 if (err != EAGAIN) 808 break; 809 } 810 if (err == EAGAIN) 811 device_printf(sc->dev, "mxge: command %d timed out" 812 "result = %d\n", 813 cmd, be32toh(response->result)); 814 mtx_unlock(&sc->cmd_mtx); 815 return err; 816 } 817 818 static int 819 mxge_adopt_running_firmware(mxge_softc_t *sc) 820 { 821 struct mcp_gen_header *hdr; 822 const size_t bytes = sizeof (struct mcp_gen_header); 823 size_t hdr_offset; 824 int status; 825 826 /* find running firmware header */ 827 hdr_offset = htobe32(*(volatile uint32_t *) 828 (sc->sram + MCP_HEADER_PTR_OFFSET)); 829 830 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 831 device_printf(sc->dev, 832 "Running firmware has bad header offset (%d)\n", 833 (int)hdr_offset); 834 return EIO; 835 } 836 837 /* copy header of running firmware from SRAM to host memory to 838 * validate firmware */ 839 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 840 if (hdr == NULL) { 841 device_printf(sc->dev, "could not malloc firmware hdr\n"); 842 return ENOMEM; 843 } 844 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 845 rman_get_bushandle(sc->mem_res), 846 hdr_offset, (char *)hdr, bytes); 847 status = mxge_validate_firmware(sc, hdr); 848 free(hdr, M_DEVBUF); 849 850 /* 851 * check to see if adopted firmware has bug where adopting 852 * it will cause broadcasts to be filtered unless the NIC 853 * is kept in ALLMULTI mode 854 */ 855 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 856 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 857 sc->adopted_rx_filter_bug = 1; 858 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 859 "working around rx filter bug\n", 860 sc->fw_ver_major, sc->fw_ver_minor, 861 sc->fw_ver_tiny); 862 } 863 864 return status; 865 } 866 867 868 static int 869 mxge_load_firmware(mxge_softc_t *sc) 870 { 871 volatile uint32_t *confirm; 872 volatile char *submit; 873 char buf_bytes[72]; 874 uint32_t *buf, size, dma_low, dma_high; 875 int status, i; 876 877 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 878 879 size = sc->sram_size; 880 status = mxge_load_firmware_helper(sc, &size); 881 if (status) { 882 /* Try to use the currently running firmware, if 883 it is new enough */ 884 status = mxge_adopt_running_firmware(sc); 885 if (status) { 886 device_printf(sc->dev, 887 "failed to adopt running firmware\n"); 888 return status; 889 } 890 device_printf(sc->dev, 891 "Successfully adopted running firmware\n"); 892 if (sc->tx.boundary == 4096) { 893 device_printf(sc->dev, 894 "Using firmware currently running on NIC" 895 ". For optimal\n"); 896 device_printf(sc->dev, 897 "performance consider loading optimized " 898 "firmware\n"); 899 } 900 sc->fw_name = mxge_fw_unaligned; 901 sc->tx.boundary = 2048; 902 return 0; 903 } 904 /* clear confirmation addr */ 905 confirm = (volatile uint32_t *)sc->cmd; 906 *confirm = 0; 907 mb(); 908 /* send a reload command to the bootstrap MCP, and wait for the 909 response in the confirmation address. The firmware should 910 write a -1 there to indicate it is alive and well 911 */ 912 913 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 914 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 915 916 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 917 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 918 buf[2] = htobe32(0xffffffff); /* confirm data */ 919 920 /* FIX: All newest firmware should un-protect the bottom of 921 the sram before handoff. However, the very first interfaces 922 do not. Therefore the handoff copy must skip the first 8 bytes 923 */ 924 /* where the code starts*/ 925 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 926 buf[4] = htobe32(size - 8); /* length of code */ 927 buf[5] = htobe32(8); /* where to copy to */ 928 buf[6] = htobe32(0); /* where to jump to */ 929 930 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 931 mxge_pio_copy(submit, buf, 64); 932 mb(); 933 DELAY(1000); 934 mb(); 935 i = 0; 936 while (*confirm != 0xffffffff && i < 20) { 937 DELAY(1000*10); 938 i++; 939 bus_dmamap_sync(sc->cmd_dma.dmat, 940 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 941 } 942 if (*confirm != 0xffffffff) { 943 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 944 confirm, *confirm); 945 946 return ENXIO; 947 } 948 return 0; 949 } 950 951 static int 952 mxge_update_mac_address(mxge_softc_t *sc) 953 { 954 mxge_cmd_t cmd; 955 uint8_t *addr = sc->mac_addr; 956 int status; 957 958 959 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 960 | (addr[2] << 8) | addr[3]); 961 962 cmd.data1 = ((addr[4] << 8) | (addr[5])); 963 964 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 965 return status; 966 } 967 968 static int 969 mxge_change_pause(mxge_softc_t *sc, int pause) 970 { 971 mxge_cmd_t cmd; 972 int status; 973 974 if (pause) 975 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 976 &cmd); 977 else 978 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 979 &cmd); 980 981 if (status) { 982 device_printf(sc->dev, "Failed to set flow control mode\n"); 983 return ENXIO; 984 } 985 sc->pause = pause; 986 return 0; 987 } 988 989 static void 990 mxge_change_promisc(mxge_softc_t *sc, int promisc) 991 { 992 mxge_cmd_t cmd; 993 int status; 994 995 if (promisc) 996 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 997 &cmd); 998 else 999 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1000 &cmd); 1001 1002 if (status) { 1003 device_printf(sc->dev, "Failed to set promisc mode\n"); 1004 } 1005 } 1006 1007 static void 1008 mxge_set_multicast_list(mxge_softc_t *sc) 1009 { 1010 mxge_cmd_t cmd; 1011 struct ifmultiaddr *ifma; 1012 struct ifnet *ifp = sc->ifp; 1013 int err; 1014 1015 /* This firmware is known to not support multicast */ 1016 if (!sc->fw_multicast_support) 1017 return; 1018 1019 /* Disable multicast filtering while we play with the lists*/ 1020 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1021 if (err != 0) { 1022 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1023 " error status: %d\n", err); 1024 return; 1025 } 1026 1027 if (sc->adopted_rx_filter_bug) 1028 return; 1029 1030 if (ifp->if_flags & IFF_ALLMULTI) 1031 /* request to disable multicast filtering, so quit here */ 1032 return; 1033 1034 /* Flush all the filters */ 1035 1036 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1037 if (err != 0) { 1038 device_printf(sc->dev, 1039 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1040 ", error status: %d\n", err); 1041 return; 1042 } 1043 1044 /* Walk the multicast list, and add each address */ 1045 1046 IF_ADDR_LOCK(ifp); 1047 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1048 if (ifma->ifma_addr->sa_family != AF_LINK) 1049 continue; 1050 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1051 &cmd.data0, 4); 1052 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1053 &cmd.data1, 2); 1054 cmd.data0 = htonl(cmd.data0); 1055 cmd.data1 = htonl(cmd.data1); 1056 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1057 if (err != 0) { 1058 device_printf(sc->dev, "Failed " 1059 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1060 "%d\t", err); 1061 /* abort, leaving multicast filtering off */ 1062 IF_ADDR_UNLOCK(ifp); 1063 return; 1064 } 1065 } 1066 IF_ADDR_UNLOCK(ifp); 1067 /* Enable multicast filtering */ 1068 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1069 if (err != 0) { 1070 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1071 ", error status: %d\n", err); 1072 } 1073 } 1074 1075 static int 1076 mxge_reset(mxge_softc_t *sc) 1077 { 1078 1079 mxge_cmd_t cmd; 1080 size_t bytes; 1081 int status; 1082 1083 /* try to send a reset command to the card to see if it 1084 is alive */ 1085 memset(&cmd, 0, sizeof (cmd)); 1086 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1087 if (status != 0) { 1088 device_printf(sc->dev, "failed reset\n"); 1089 return ENXIO; 1090 } 1091 1092 mxge_dummy_rdma(sc, 1); 1093 1094 /* Now exchange information about interrupts */ 1095 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\ 1096 memset(sc->rx_done.entry, 0, bytes); 1097 cmd.data0 = (uint32_t)bytes; 1098 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1099 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1100 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1101 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1102 1103 status |= mxge_send_cmd(sc, 1104 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1105 1106 1107 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1108 1109 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1110 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1111 1112 1113 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1114 &cmd); 1115 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1116 if (status != 0) { 1117 device_printf(sc->dev, "failed set interrupt parameters\n"); 1118 return status; 1119 } 1120 1121 1122 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1123 1124 1125 /* run a DMA benchmark */ 1126 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1127 1128 /* reset mcp/driver shared state back to 0 */ 1129 bzero(sc->rx_done.entry, bytes); 1130 sc->rx_done.idx = 0; 1131 sc->rx_done.cnt = 0; 1132 sc->tx.req = 0; 1133 sc->tx.done = 0; 1134 sc->tx.pkt_done = 0; 1135 sc->tx.wake = 0; 1136 sc->tx.stall = 0; 1137 sc->rx_big.cnt = 0; 1138 sc->rx_small.cnt = 0; 1139 sc->rdma_tags_available = 15; 1140 sc->fw_stats->valid = 0; 1141 sc->fw_stats->send_done_count = 0; 1142 status = mxge_update_mac_address(sc); 1143 mxge_change_promisc(sc, 0); 1144 mxge_change_pause(sc, sc->pause); 1145 mxge_set_multicast_list(sc); 1146 return status; 1147 } 1148 1149 static int 1150 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1151 { 1152 mxge_softc_t *sc; 1153 unsigned int intr_coal_delay; 1154 int err; 1155 1156 sc = arg1; 1157 intr_coal_delay = sc->intr_coal_delay; 1158 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1159 if (err != 0) { 1160 return err; 1161 } 1162 if (intr_coal_delay == sc->intr_coal_delay) 1163 return 0; 1164 1165 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1166 return EINVAL; 1167 1168 mtx_lock(&sc->driver_mtx); 1169 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1170 sc->intr_coal_delay = intr_coal_delay; 1171 1172 mtx_unlock(&sc->driver_mtx); 1173 return err; 1174 } 1175 1176 static int 1177 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1178 { 1179 mxge_softc_t *sc; 1180 unsigned int enabled; 1181 int err; 1182 1183 sc = arg1; 1184 enabled = sc->pause; 1185 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1186 if (err != 0) { 1187 return err; 1188 } 1189 if (enabled == sc->pause) 1190 return 0; 1191 1192 mtx_lock(&sc->driver_mtx); 1193 err = mxge_change_pause(sc, enabled); 1194 mtx_unlock(&sc->driver_mtx); 1195 return err; 1196 } 1197 1198 static int 1199 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1200 { 1201 int err; 1202 1203 if (arg1 == NULL) 1204 return EFAULT; 1205 arg2 = be32toh(*(int *)arg1); 1206 arg1 = NULL; 1207 err = sysctl_handle_int(oidp, arg1, arg2, req); 1208 1209 return err; 1210 } 1211 1212 static void 1213 mxge_add_sysctls(mxge_softc_t *sc) 1214 { 1215 struct sysctl_ctx_list *ctx; 1216 struct sysctl_oid_list *children; 1217 mcp_irq_data_t *fw; 1218 1219 ctx = device_get_sysctl_ctx(sc->dev); 1220 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1221 fw = sc->fw_stats; 1222 1223 /* random information */ 1224 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1225 "firmware_version", 1226 CTLFLAG_RD, &sc->fw_version, 1227 0, "firmware version"); 1228 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1229 "serial_number", 1230 CTLFLAG_RD, &sc->serial_number_string, 1231 0, "serial number"); 1232 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1233 "product_code", 1234 CTLFLAG_RD, &sc->product_code_string, 1235 0, "product_code"); 1236 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1237 "pcie_link_width", 1238 CTLFLAG_RD, &sc->link_width, 1239 0, "tx_boundary"); 1240 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1241 "tx_boundary", 1242 CTLFLAG_RD, &sc->tx.boundary, 1243 0, "tx_boundary"); 1244 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1245 "write_combine", 1246 CTLFLAG_RD, &sc->wc, 1247 0, "write combining PIO?"); 1248 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1249 "read_dma_MBs", 1250 CTLFLAG_RD, &sc->read_dma, 1251 0, "DMA Read speed in MB/s"); 1252 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1253 "write_dma_MBs", 1254 CTLFLAG_RD, &sc->write_dma, 1255 0, "DMA Write speed in MB/s"); 1256 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1257 "read_write_dma_MBs", 1258 CTLFLAG_RD, &sc->read_write_dma, 1259 0, "DMA concurrent Read/Write speed in MB/s"); 1260 1261 1262 /* performance related tunables */ 1263 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1264 "intr_coal_delay", 1265 CTLTYPE_INT|CTLFLAG_RW, sc, 1266 0, mxge_change_intr_coal, 1267 "I", "interrupt coalescing delay in usecs"); 1268 1269 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1270 "flow_control_enabled", 1271 CTLTYPE_INT|CTLFLAG_RW, sc, 1272 0, mxge_change_flow_control, 1273 "I", "interrupt coalescing delay in usecs"); 1274 1275 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1276 "deassert_wait", 1277 CTLFLAG_RW, &mxge_deassert_wait, 1278 0, "Wait for IRQ line to go low in ihandler"); 1279 1280 /* stats block from firmware is in network byte order. 1281 Need to swap it */ 1282 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1283 "link_up", 1284 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1285 0, mxge_handle_be32, 1286 "I", "link up"); 1287 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1288 "rdma_tags_available", 1289 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1290 0, mxge_handle_be32, 1291 "I", "rdma_tags_available"); 1292 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1293 "dropped_link_overflow", 1294 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1295 0, mxge_handle_be32, 1296 "I", "dropped_link_overflow"); 1297 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1298 "dropped_link_error_or_filtered", 1299 CTLTYPE_INT|CTLFLAG_RD, 1300 &fw->dropped_link_error_or_filtered, 1301 0, mxge_handle_be32, 1302 "I", "dropped_link_error_or_filtered"); 1303 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1304 "dropped_multicast_filtered", 1305 CTLTYPE_INT|CTLFLAG_RD, 1306 &fw->dropped_multicast_filtered, 1307 0, mxge_handle_be32, 1308 "I", "dropped_multicast_filtered"); 1309 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1310 "dropped_runt", 1311 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1312 0, mxge_handle_be32, 1313 "I", "dropped_runt"); 1314 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1315 "dropped_overrun", 1316 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1317 0, mxge_handle_be32, 1318 "I", "dropped_overrun"); 1319 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1320 "dropped_no_small_buffer", 1321 CTLTYPE_INT|CTLFLAG_RD, 1322 &fw->dropped_no_small_buffer, 1323 0, mxge_handle_be32, 1324 "I", "dropped_no_small_buffer"); 1325 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1326 "dropped_no_big_buffer", 1327 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1328 0, mxge_handle_be32, 1329 "I", "dropped_no_big_buffer"); 1330 1331 /* host counters exported for debugging */ 1332 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1333 "rx_small_cnt", 1334 CTLFLAG_RD, &sc->rx_small.cnt, 1335 0, "rx_small_cnt"); 1336 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1337 "rx_big_cnt", 1338 CTLFLAG_RD, &sc->rx_big.cnt, 1339 0, "rx_small_cnt"); 1340 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1341 "tx_req", 1342 CTLFLAG_RD, &sc->tx.req, 1343 0, "tx_req"); 1344 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1345 "tx_done", 1346 CTLFLAG_RD, &sc->tx.done, 1347 0, "tx_done"); 1348 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1349 "tx_pkt_done", 1350 CTLFLAG_RD, &sc->tx.pkt_done, 1351 0, "tx_done"); 1352 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1353 "tx_stall", 1354 CTLFLAG_RD, &sc->tx.stall, 1355 0, "tx_stall"); 1356 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1357 "tx_wake", 1358 CTLFLAG_RD, &sc->tx.wake, 1359 0, "tx_wake"); 1360 1361 /* verbose printing? */ 1362 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1363 "verbose", 1364 CTLFLAG_RW, &mxge_verbose, 1365 0, "verbose printing"); 1366 1367 } 1368 1369 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1370 backwards one at a time and handle ring wraps */ 1371 1372 static inline void 1373 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1374 mcp_kreq_ether_send_t *src, int cnt) 1375 { 1376 int idx, starting_slot; 1377 starting_slot = tx->req; 1378 while (cnt > 1) { 1379 cnt--; 1380 idx = (starting_slot + cnt) & tx->mask; 1381 mxge_pio_copy(&tx->lanai[idx], 1382 &src[cnt], sizeof(*src)); 1383 mb(); 1384 } 1385 } 1386 1387 /* 1388 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1389 * at most 32 bytes at a time, so as to avoid involving the software 1390 * pio handler in the nic. We re-write the first segment's flags 1391 * to mark them valid only after writing the entire chain 1392 */ 1393 1394 static inline void 1395 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1396 int cnt) 1397 { 1398 int idx, i; 1399 uint32_t *src_ints; 1400 volatile uint32_t *dst_ints; 1401 mcp_kreq_ether_send_t *srcp; 1402 volatile mcp_kreq_ether_send_t *dstp, *dst; 1403 uint8_t last_flags; 1404 1405 idx = tx->req & tx->mask; 1406 1407 last_flags = src->flags; 1408 src->flags = 0; 1409 mb(); 1410 dst = dstp = &tx->lanai[idx]; 1411 srcp = src; 1412 1413 if ((idx + cnt) < tx->mask) { 1414 for (i = 0; i < (cnt - 1); i += 2) { 1415 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1416 mb(); /* force write every 32 bytes */ 1417 srcp += 2; 1418 dstp += 2; 1419 } 1420 } else { 1421 /* submit all but the first request, and ensure 1422 that it is submitted below */ 1423 mxge_submit_req_backwards(tx, src, cnt); 1424 i = 0; 1425 } 1426 if (i < cnt) { 1427 /* submit the first request */ 1428 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1429 mb(); /* barrier before setting valid flag */ 1430 } 1431 1432 /* re-write the last 32-bits with the valid flags */ 1433 src->flags = last_flags; 1434 src_ints = (uint32_t *)src; 1435 src_ints+=3; 1436 dst_ints = (volatile uint32_t *)dst; 1437 dst_ints+=3; 1438 *dst_ints = *src_ints; 1439 tx->req += cnt; 1440 mb(); 1441 } 1442 1443 static inline void 1444 mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1445 { 1446 tx->req += cnt; 1447 mb(); 1448 while (cnt >= 4) { 1449 mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64); 1450 mb(); 1451 src += 4; 1452 cnt -= 4; 1453 } 1454 if (cnt > 0) { 1455 /* pad it to 64 bytes. The src is 64 bytes bigger than it 1456 needs to be so that we don't overrun it */ 1457 mxge_pio_copy(tx->wc_fifo + MXGEFW_ETH_SEND_OFFSET(cnt), src, 64); 1458 mb(); 1459 } 1460 } 1461 1462 static void 1463 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt) 1464 { 1465 mxge_tx_buf_t *tx; 1466 mcp_kreq_ether_send_t *req; 1467 bus_dma_segment_t *seg; 1468 struct ether_header *eh; 1469 struct ip *ip; 1470 struct tcphdr *tcp; 1471 uint32_t low, high_swapped; 1472 int len, seglen, cum_len, cum_len_next; 1473 int next_is_first, chop, cnt, rdma_count, small; 1474 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1475 uint8_t flags, flags_next; 1476 static int once; 1477 1478 mss = m->m_pkthdr.tso_segsz; 1479 1480 /* negative cum_len signifies to the 1481 * send loop that we are still in the 1482 * header portion of the TSO packet. 1483 */ 1484 1485 /* ensure we have the ethernet, IP and TCP 1486 header together in the first mbuf, copy 1487 it to a scratch buffer if not */ 1488 if (__predict_false(m->m_len < sizeof (*eh) 1489 + sizeof (*ip))) { 1490 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1491 sc->scratch); 1492 eh = (struct ether_header *)sc->scratch; 1493 } else { 1494 eh = mtod(m, struct ether_header *); 1495 } 1496 ip = (struct ip *) (eh + 1); 1497 if (__predict_false(m->m_len < sizeof (*eh) + (ip->ip_hl << 2) 1498 + sizeof (*tcp))) { 1499 m_copydata(m, 0, sizeof (*eh) + (ip->ip_hl << 2) 1500 + sizeof (*tcp), sc->scratch); 1501 eh = (struct ether_header *) sc->scratch; 1502 ip = (struct ip *) (eh + 1); 1503 } 1504 1505 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1506 cum_len = -(sizeof (*eh) + ((ip->ip_hl + tcp->th_off) << 2)); 1507 1508 /* TSO implies checksum offload on this hardware */ 1509 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1510 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1511 1512 1513 /* for TSO, pseudo_hdr_offset holds mss. 1514 * The firmware figures out where to put 1515 * the checksum by parsing the header. */ 1516 pseudo_hdr_offset = htobe16(mss); 1517 1518 tx = &sc->tx; 1519 req = tx->req_list; 1520 seg = tx->seg_list; 1521 cnt = 0; 1522 rdma_count = 0; 1523 /* "rdma_count" is the number of RDMAs belonging to the 1524 * current packet BEFORE the current send request. For 1525 * non-TSO packets, this is equal to "count". 1526 * For TSO packets, rdma_count needs to be reset 1527 * to 0 after a segment cut. 1528 * 1529 * The rdma_count field of the send request is 1530 * the number of RDMAs of the packet starting at 1531 * that request. For TSO send requests with one ore more cuts 1532 * in the middle, this is the number of RDMAs starting 1533 * after the last cut in the request. All previous 1534 * segments before the last cut implicitly have 1 RDMA. 1535 * 1536 * Since the number of RDMAs is not known beforehand, 1537 * it must be filled-in retroactively - after each 1538 * segmentation cut or at the end of the entire packet. 1539 */ 1540 1541 while (busdma_seg_cnt) { 1542 /* Break the busdma segment up into pieces*/ 1543 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1544 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1545 len = seg->ds_len; 1546 1547 while (len) { 1548 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1549 seglen = len; 1550 cum_len_next = cum_len + seglen; 1551 (req-rdma_count)->rdma_count = rdma_count + 1; 1552 if (__predict_true(cum_len >= 0)) { 1553 /* payload */ 1554 chop = (cum_len_next > mss); 1555 cum_len_next = cum_len_next % mss; 1556 next_is_first = (cum_len_next == 0); 1557 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1558 flags_next |= next_is_first * 1559 MXGEFW_FLAGS_FIRST; 1560 rdma_count |= -(chop | next_is_first); 1561 rdma_count += chop & !next_is_first; 1562 } else if (cum_len_next >= 0) { 1563 /* header ends */ 1564 rdma_count = -1; 1565 cum_len_next = 0; 1566 seglen = -cum_len; 1567 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1568 flags_next = MXGEFW_FLAGS_TSO_PLD | 1569 MXGEFW_FLAGS_FIRST | 1570 (small * MXGEFW_FLAGS_SMALL); 1571 } 1572 1573 req->addr_high = high_swapped; 1574 req->addr_low = htobe32(low); 1575 req->pseudo_hdr_offset = pseudo_hdr_offset; 1576 req->pad = 0; 1577 req->rdma_count = 1; 1578 req->length = htobe16(seglen); 1579 req->cksum_offset = cksum_offset; 1580 req->flags = flags | ((cum_len & 1) * 1581 MXGEFW_FLAGS_ALIGN_ODD); 1582 low += seglen; 1583 len -= seglen; 1584 cum_len = cum_len_next; 1585 flags = flags_next; 1586 req++; 1587 cnt++; 1588 rdma_count++; 1589 if (__predict_false(cksum_offset > seglen)) 1590 cksum_offset -= seglen; 1591 else 1592 cksum_offset = 0; 1593 if (__predict_false(cnt > MXGE_MAX_SEND_DESC)) 1594 goto drop; 1595 } 1596 busdma_seg_cnt--; 1597 seg++; 1598 } 1599 (req-rdma_count)->rdma_count = rdma_count; 1600 1601 do { 1602 req--; 1603 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1604 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1605 1606 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1607 if (tx->wc_fifo == NULL) 1608 mxge_submit_req(tx, tx->req_list, cnt); 1609 else 1610 mxge_submit_req_wc(tx, tx->req_list, cnt); 1611 return; 1612 1613 drop: 1614 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1615 m_freem(m); 1616 sc->ifp->if_oerrors++; 1617 if (!once) { 1618 printf("MXGE_MAX_SEND_DESC exceeded via TSO!\n"); 1619 printf("mss = %d, %ld!\n", mss, (long)seg - (long)tx->seg_list); 1620 once = 1; 1621 } 1622 return; 1623 1624 } 1625 1626 static void 1627 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1628 { 1629 mcp_kreq_ether_send_t *req; 1630 bus_dma_segment_t *seg; 1631 struct mbuf *m_tmp; 1632 struct ifnet *ifp; 1633 mxge_tx_buf_t *tx; 1634 struct ether_header *eh; 1635 struct ip *ip; 1636 int cnt, cum_len, err, i, idx, odd_flag; 1637 uint16_t pseudo_hdr_offset; 1638 uint8_t flags, cksum_offset; 1639 1640 1641 1642 ifp = sc->ifp; 1643 tx = &sc->tx; 1644 1645 /* (try to) map the frame for DMA */ 1646 idx = tx->req & tx->mask; 1647 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1648 m, tx->seg_list, &cnt, 1649 BUS_DMA_NOWAIT); 1650 if (err == EFBIG) { 1651 /* Too many segments in the chain. Try 1652 to defrag */ 1653 m_tmp = m_defrag(m, M_NOWAIT); 1654 if (m_tmp == NULL) { 1655 goto drop; 1656 } 1657 m = m_tmp; 1658 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1659 tx->info[idx].map, 1660 m, tx->seg_list, &cnt, 1661 BUS_DMA_NOWAIT); 1662 } 1663 if (err != 0) { 1664 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1665 " packet len = %d\n", err, m->m_pkthdr.len); 1666 goto drop; 1667 } 1668 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1669 BUS_DMASYNC_PREWRITE); 1670 tx->info[idx].m = m; 1671 1672 1673 /* TSO is different enough, we handle it in another routine */ 1674 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1675 mxge_encap_tso(sc, m, cnt); 1676 return; 1677 } 1678 1679 req = tx->req_list; 1680 cksum_offset = 0; 1681 pseudo_hdr_offset = 0; 1682 flags = MXGEFW_FLAGS_NO_TSO; 1683 1684 /* checksum offloading? */ 1685 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1686 /* ensure ip header is in first mbuf, copy 1687 it to a scratch buffer if not */ 1688 if (__predict_false(m->m_len < sizeof (*eh) 1689 + sizeof (*ip))) { 1690 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1691 sc->scratch); 1692 eh = (struct ether_header *)sc->scratch; 1693 } else { 1694 eh = mtod(m, struct ether_header *); 1695 } 1696 ip = (struct ip *) (eh + 1); 1697 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1698 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1699 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1700 req->cksum_offset = cksum_offset; 1701 flags |= MXGEFW_FLAGS_CKSUM; 1702 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1703 } else { 1704 odd_flag = 0; 1705 } 1706 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1707 flags |= MXGEFW_FLAGS_SMALL; 1708 1709 /* convert segments into a request list */ 1710 cum_len = 0; 1711 seg = tx->seg_list; 1712 req->flags = MXGEFW_FLAGS_FIRST; 1713 for (i = 0; i < cnt; i++) { 1714 req->addr_low = 1715 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1716 req->addr_high = 1717 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1718 req->length = htobe16(seg->ds_len); 1719 req->cksum_offset = cksum_offset; 1720 if (cksum_offset > seg->ds_len) 1721 cksum_offset -= seg->ds_len; 1722 else 1723 cksum_offset = 0; 1724 req->pseudo_hdr_offset = pseudo_hdr_offset; 1725 req->pad = 0; /* complete solid 16-byte block */ 1726 req->rdma_count = 1; 1727 req->flags |= flags | ((cum_len & 1) * odd_flag); 1728 cum_len += seg->ds_len; 1729 seg++; 1730 req++; 1731 req->flags = 0; 1732 } 1733 req--; 1734 /* pad runts to 60 bytes */ 1735 if (cum_len < 60) { 1736 req++; 1737 req->addr_low = 1738 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1739 req->addr_high = 1740 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1741 req->length = htobe16(60 - cum_len); 1742 req->cksum_offset = 0; 1743 req->pseudo_hdr_offset = pseudo_hdr_offset; 1744 req->pad = 0; /* complete solid 16-byte block */ 1745 req->rdma_count = 1; 1746 req->flags |= flags | ((cum_len & 1) * odd_flag); 1747 cnt++; 1748 } 1749 1750 tx->req_list[0].rdma_count = cnt; 1751 #if 0 1752 /* print what the firmware will see */ 1753 for (i = 0; i < cnt; i++) { 1754 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1755 "cso:%d, flags:0x%x, rdma:%d\n", 1756 i, (int)ntohl(tx->req_list[i].addr_high), 1757 (int)ntohl(tx->req_list[i].addr_low), 1758 (int)ntohs(tx->req_list[i].length), 1759 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1760 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1761 tx->req_list[i].rdma_count); 1762 } 1763 printf("--------------\n"); 1764 #endif 1765 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1766 if (tx->wc_fifo == NULL) 1767 mxge_submit_req(tx, tx->req_list, cnt); 1768 else 1769 mxge_submit_req_wc(tx, tx->req_list, cnt); 1770 return; 1771 1772 drop: 1773 m_freem(m); 1774 ifp->if_oerrors++; 1775 return; 1776 } 1777 1778 1779 1780 1781 static inline void 1782 mxge_start_locked(mxge_softc_t *sc) 1783 { 1784 struct mbuf *m; 1785 struct ifnet *ifp; 1786 1787 ifp = sc->ifp; 1788 while ((sc->tx.mask - (sc->tx.req - sc->tx.done)) 1789 > MXGE_MAX_SEND_DESC) { 1790 1791 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1792 if (m == NULL) { 1793 return; 1794 } 1795 /* let BPF see it */ 1796 BPF_MTAP(ifp, m); 1797 1798 /* give it to the nic */ 1799 mxge_encap(sc, m); 1800 } 1801 /* ran out of transmit slots */ 1802 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1803 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1804 sc->tx.stall++; 1805 } 1806 } 1807 1808 static void 1809 mxge_start(struct ifnet *ifp) 1810 { 1811 mxge_softc_t *sc = ifp->if_softc; 1812 1813 1814 mtx_lock(&sc->tx_mtx); 1815 mxge_start_locked(sc); 1816 mtx_unlock(&sc->tx_mtx); 1817 } 1818 1819 /* 1820 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1821 * at most 32 bytes at a time, so as to avoid involving the software 1822 * pio handler in the nic. We re-write the first segment's low 1823 * DMA address to mark it valid only after we write the entire chunk 1824 * in a burst 1825 */ 1826 static inline void 1827 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1828 mcp_kreq_ether_recv_t *src) 1829 { 1830 uint32_t low; 1831 1832 low = src->addr_low; 1833 src->addr_low = 0xffffffff; 1834 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1835 mb(); 1836 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1837 mb(); 1838 src->addr_low = low; 1839 dst->addr_low = low; 1840 mb(); 1841 } 1842 1843 static int 1844 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1845 { 1846 bus_dma_segment_t seg; 1847 struct mbuf *m; 1848 mxge_rx_buf_t *rx = &sc->rx_small; 1849 int cnt, err; 1850 1851 m = m_gethdr(M_DONTWAIT, MT_DATA); 1852 if (m == NULL) { 1853 rx->alloc_fail++; 1854 err = ENOBUFS; 1855 goto done; 1856 } 1857 m->m_len = MHLEN; 1858 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1859 &seg, &cnt, BUS_DMA_NOWAIT); 1860 if (err != 0) { 1861 m_free(m); 1862 goto done; 1863 } 1864 rx->info[idx].m = m; 1865 rx->shadow[idx].addr_low = 1866 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1867 rx->shadow[idx].addr_high = 1868 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1869 1870 done: 1871 if ((idx & 7) == 7) { 1872 if (rx->wc_fifo == NULL) 1873 mxge_submit_8rx(&rx->lanai[idx - 7], 1874 &rx->shadow[idx - 7]); 1875 else { 1876 mb(); 1877 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1878 } 1879 } 1880 return err; 1881 } 1882 1883 static int 1884 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1885 { 1886 bus_dma_segment_t seg; 1887 struct mbuf *m; 1888 mxge_rx_buf_t *rx = &sc->rx_big; 1889 int cnt, err; 1890 1891 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes); 1892 if (m == NULL) { 1893 rx->alloc_fail++; 1894 err = ENOBUFS; 1895 goto done; 1896 } 1897 m->m_len = sc->big_bytes; 1898 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1899 &seg, &cnt, BUS_DMA_NOWAIT); 1900 if (err != 0) { 1901 m_free(m); 1902 goto done; 1903 } 1904 rx->info[idx].m = m; 1905 rx->shadow[idx].addr_low = 1906 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1907 rx->shadow[idx].addr_high = 1908 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1909 1910 done: 1911 if ((idx & 7) == 7) { 1912 if (rx->wc_fifo == NULL) 1913 mxge_submit_8rx(&rx->lanai[idx - 7], 1914 &rx->shadow[idx - 7]); 1915 else { 1916 mb(); 1917 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1918 } 1919 } 1920 return err; 1921 } 1922 1923 static inline void 1924 mxge_rx_csum(struct mbuf *m, int csum) 1925 { 1926 struct ether_header *eh; 1927 struct ip *ip; 1928 1929 eh = mtod(m, struct ether_header *); 1930 1931 /* only deal with IPv4 TCP & UDP for now */ 1932 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 1933 return; 1934 ip = (struct ip *)(eh + 1); 1935 if (__predict_false(ip->ip_p != IPPROTO_TCP && 1936 ip->ip_p != IPPROTO_UDP)) 1937 return; 1938 1939 /* 1940 * Myri10GE hardware checksums are not valid if the sender 1941 * padded the frame with non-zero padding. This is because 1942 * the firmware just does a simple 16-bit 1s complement 1943 * checksum across the entire frame, excluding the first 14 1944 * bytes. It is easiest to simply to assume the worst, and 1945 * only apply hardware checksums to non-padded frames. This 1946 * is what nearly every other OS does by default. 1947 */ 1948 1949 if (__predict_true(m->m_pkthdr.len == 1950 (ntohs(ip->ip_len) + ETHER_HDR_LEN))) { 1951 m->m_pkthdr.csum_data = csum; 1952 m->m_pkthdr.csum_flags = CSUM_DATA_VALID; 1953 } 1954 } 1955 1956 static inline void 1957 mxge_rx_done_big(mxge_softc_t *sc, int len, int csum) 1958 { 1959 struct ifnet *ifp; 1960 struct mbuf *m = 0; /* -Wunitialized */ 1961 struct mbuf *m_prev = 0; /* -Wunitialized */ 1962 struct mbuf *m_head = 0; 1963 bus_dmamap_t old_map; 1964 mxge_rx_buf_t *rx; 1965 int idx; 1966 1967 1968 rx = &sc->rx_big; 1969 ifp = sc->ifp; 1970 while (len > 0) { 1971 idx = rx->cnt & rx->mask; 1972 rx->cnt++; 1973 /* save a pointer to the received mbuf */ 1974 m = rx->info[idx].m; 1975 /* try to replace the received mbuf */ 1976 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 1977 goto drop; 1978 } 1979 /* unmap the received buffer */ 1980 old_map = rx->info[idx].map; 1981 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 1982 bus_dmamap_unload(rx->dmat, old_map); 1983 1984 /* swap the bus_dmamap_t's */ 1985 rx->info[idx].map = rx->extra_map; 1986 rx->extra_map = old_map; 1987 1988 /* chain multiple segments together */ 1989 if (!m_head) { 1990 m_head = m; 1991 /* mcp implicitly skips 1st bytes so that 1992 * packet is properly aligned */ 1993 m->m_data += MXGEFW_PAD; 1994 m->m_pkthdr.len = len; 1995 m->m_len = sc->big_bytes - MXGEFW_PAD; 1996 } else { 1997 m->m_len = sc->big_bytes; 1998 m->m_flags &= ~M_PKTHDR; 1999 m_prev->m_next = m; 2000 } 2001 len -= m->m_len; 2002 m_prev = m; 2003 } 2004 2005 /* trim trailing garbage from the last mbuf in the chain. If 2006 * there is any garbage, len will be negative */ 2007 m->m_len += len; 2008 2009 m_head->m_pkthdr.rcvif = ifp; 2010 ifp->if_ipackets++; 2011 /* if the checksum is valid, mark it in the mbuf header */ 2012 if (sc->csum_flag) 2013 mxge_rx_csum(m_head, csum); 2014 2015 /* pass the frame up the stack */ 2016 (*ifp->if_input)(ifp, m_head); 2017 return; 2018 2019 drop: 2020 /* drop the frame -- the old mbuf(s) are re-cycled by running 2021 every slot through the allocator */ 2022 if (m_head) { 2023 len -= sc->big_bytes; 2024 m_freem(m_head); 2025 } else { 2026 len -= (sc->big_bytes + MXGEFW_PAD); 2027 } 2028 while ((int)len > 0) { 2029 idx = rx->cnt & rx->mask; 2030 rx->cnt++; 2031 m = rx->info[idx].m; 2032 if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) { 2033 m_freem(m); 2034 /* unmap the received buffer */ 2035 old_map = rx->info[idx].map; 2036 bus_dmamap_sync(rx->dmat, old_map, 2037 BUS_DMASYNC_POSTREAD); 2038 bus_dmamap_unload(rx->dmat, old_map); 2039 2040 /* swap the bus_dmamap_t's */ 2041 rx->info[idx].map = rx->extra_map; 2042 rx->extra_map = old_map; 2043 } 2044 len -= sc->big_bytes; 2045 } 2046 2047 ifp->if_ierrors++; 2048 2049 } 2050 2051 static inline void 2052 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2053 { 2054 struct ifnet *ifp; 2055 struct mbuf *m; 2056 mxge_rx_buf_t *rx; 2057 bus_dmamap_t old_map; 2058 int idx; 2059 2060 ifp = sc->ifp; 2061 rx = &sc->rx_small; 2062 idx = rx->cnt & rx->mask; 2063 rx->cnt++; 2064 /* save a pointer to the received mbuf */ 2065 m = rx->info[idx].m; 2066 /* try to replace the received mbuf */ 2067 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2068 /* drop the frame -- the old mbuf is re-cycled */ 2069 ifp->if_ierrors++; 2070 return; 2071 } 2072 2073 /* unmap the received buffer */ 2074 old_map = rx->info[idx].map; 2075 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2076 bus_dmamap_unload(rx->dmat, old_map); 2077 2078 /* swap the bus_dmamap_t's */ 2079 rx->info[idx].map = rx->extra_map; 2080 rx->extra_map = old_map; 2081 2082 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2083 * aligned */ 2084 m->m_data += MXGEFW_PAD; 2085 2086 m->m_pkthdr.rcvif = ifp; 2087 m->m_len = m->m_pkthdr.len = len; 2088 ifp->if_ipackets++; 2089 /* if the checksum is valid, mark it in the mbuf header */ 2090 if (sc->csum_flag) 2091 mxge_rx_csum(m, csum); 2092 2093 /* pass the frame up the stack */ 2094 (*ifp->if_input)(ifp, m); 2095 } 2096 2097 static inline void 2098 mxge_clean_rx_done(mxge_softc_t *sc) 2099 { 2100 mxge_rx_done_t *rx_done = &sc->rx_done; 2101 int limit = 0; 2102 uint16_t length; 2103 uint16_t checksum; 2104 2105 2106 while (rx_done->entry[rx_done->idx].length != 0) { 2107 length = ntohs(rx_done->entry[rx_done->idx].length); 2108 rx_done->entry[rx_done->idx].length = 0; 2109 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2110 if (length <= (MHLEN - MXGEFW_PAD)) 2111 mxge_rx_done_small(sc, length, checksum); 2112 else 2113 mxge_rx_done_big(sc, length, checksum); 2114 rx_done->cnt++; 2115 rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1); 2116 2117 /* limit potential for livelock */ 2118 if (__predict_false(++limit > 2 * mxge_max_intr_slots)) 2119 break; 2120 2121 } 2122 } 2123 2124 2125 static inline void 2126 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2127 { 2128 struct ifnet *ifp; 2129 mxge_tx_buf_t *tx; 2130 struct mbuf *m; 2131 bus_dmamap_t map; 2132 int idx, limit; 2133 2134 limit = 0; 2135 tx = &sc->tx; 2136 ifp = sc->ifp; 2137 while (tx->pkt_done != mcp_idx) { 2138 idx = tx->done & tx->mask; 2139 tx->done++; 2140 m = tx->info[idx].m; 2141 /* mbuf and DMA map only attached to the first 2142 segment per-mbuf */ 2143 if (m != NULL) { 2144 ifp->if_opackets++; 2145 tx->info[idx].m = NULL; 2146 map = tx->info[idx].map; 2147 bus_dmamap_unload(tx->dmat, map); 2148 m_freem(m); 2149 } 2150 if (tx->info[idx].flag) { 2151 tx->info[idx].flag = 0; 2152 tx->pkt_done++; 2153 } 2154 /* limit potential for livelock by only handling 2155 2 full tx rings per call */ 2156 if (__predict_false(++limit > 2 * tx->mask)) 2157 break; 2158 } 2159 2160 /* If we have space, clear IFF_OACTIVE to tell the stack that 2161 its OK to send packets */ 2162 2163 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2164 tx->req - tx->done < (tx->mask + 1)/4) { 2165 mtx_lock(&sc->tx_mtx); 2166 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2167 sc->tx.wake++; 2168 mxge_start_locked(sc); 2169 mtx_unlock(&sc->tx_mtx); 2170 } 2171 } 2172 2173 static void 2174 mxge_intr(void *arg) 2175 { 2176 mxge_softc_t *sc = arg; 2177 mcp_irq_data_t *stats = sc->fw_stats; 2178 mxge_tx_buf_t *tx = &sc->tx; 2179 mxge_rx_done_t *rx_done = &sc->rx_done; 2180 uint32_t send_done_count; 2181 uint8_t valid; 2182 2183 2184 /* make sure the DMA has finished */ 2185 if (!stats->valid) { 2186 return; 2187 } 2188 valid = stats->valid; 2189 2190 if (!sc->msi_enabled) { 2191 /* lower legacy IRQ */ 2192 *sc->irq_deassert = 0; 2193 if (!mxge_deassert_wait) 2194 /* don't wait for conf. that irq is low */ 2195 stats->valid = 0; 2196 } else { 2197 stats->valid = 0; 2198 } 2199 2200 /* loop while waiting for legacy irq deassertion */ 2201 do { 2202 /* check for transmit completes and receives */ 2203 send_done_count = be32toh(stats->send_done_count); 2204 while ((send_done_count != tx->pkt_done) || 2205 (rx_done->entry[rx_done->idx].length != 0)) { 2206 mxge_tx_done(sc, (int)send_done_count); 2207 mxge_clean_rx_done(sc); 2208 send_done_count = be32toh(stats->send_done_count); 2209 } 2210 } while (*((volatile uint8_t *) &stats->valid)); 2211 2212 if (__predict_false(stats->stats_updated)) { 2213 if (sc->link_state != stats->link_up) { 2214 sc->link_state = stats->link_up; 2215 if (sc->link_state) { 2216 if_link_state_change(sc->ifp, LINK_STATE_UP); 2217 if (mxge_verbose) 2218 device_printf(sc->dev, "link up\n"); 2219 } else { 2220 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2221 if (mxge_verbose) 2222 device_printf(sc->dev, "link down\n"); 2223 } 2224 } 2225 if (sc->rdma_tags_available != 2226 be32toh(sc->fw_stats->rdma_tags_available)) { 2227 sc->rdma_tags_available = 2228 be32toh(sc->fw_stats->rdma_tags_available); 2229 device_printf(sc->dev, "RDMA timed out! %d tags " 2230 "left\n", sc->rdma_tags_available); 2231 } 2232 sc->down_cnt += stats->link_down; 2233 } 2234 2235 /* check to see if we have rx token to pass back */ 2236 if (valid & 0x1) 2237 *sc->irq_claim = be32toh(3); 2238 *(sc->irq_claim + 1) = be32toh(3); 2239 } 2240 2241 static void 2242 mxge_init(void *arg) 2243 { 2244 } 2245 2246 2247 2248 static void 2249 mxge_free_mbufs(mxge_softc_t *sc) 2250 { 2251 int i; 2252 2253 for (i = 0; i <= sc->rx_big.mask; i++) { 2254 if (sc->rx_big.info[i].m == NULL) 2255 continue; 2256 bus_dmamap_unload(sc->rx_big.dmat, 2257 sc->rx_big.info[i].map); 2258 m_freem(sc->rx_big.info[i].m); 2259 sc->rx_big.info[i].m = NULL; 2260 } 2261 2262 for (i = 0; i <= sc->rx_small.mask; i++) { 2263 if (sc->rx_small.info[i].m == NULL) 2264 continue; 2265 bus_dmamap_unload(sc->rx_small.dmat, 2266 sc->rx_small.info[i].map); 2267 m_freem(sc->rx_small.info[i].m); 2268 sc->rx_small.info[i].m = NULL; 2269 } 2270 2271 for (i = 0; i <= sc->tx.mask; i++) { 2272 sc->tx.info[i].flag = 0; 2273 if (sc->tx.info[i].m == NULL) 2274 continue; 2275 bus_dmamap_unload(sc->tx.dmat, 2276 sc->tx.info[i].map); 2277 m_freem(sc->tx.info[i].m); 2278 sc->tx.info[i].m = NULL; 2279 } 2280 } 2281 2282 static void 2283 mxge_free_rings(mxge_softc_t *sc) 2284 { 2285 int i; 2286 2287 if (sc->tx.req_bytes != NULL) 2288 free(sc->tx.req_bytes, M_DEVBUF); 2289 if (sc->tx.seg_list != NULL) 2290 free(sc->tx.seg_list, M_DEVBUF); 2291 if (sc->rx_small.shadow != NULL) 2292 free(sc->rx_small.shadow, M_DEVBUF); 2293 if (sc->rx_big.shadow != NULL) 2294 free(sc->rx_big.shadow, M_DEVBUF); 2295 if (sc->tx.info != NULL) { 2296 if (sc->tx.dmat != NULL) { 2297 for (i = 0; i <= sc->tx.mask; i++) { 2298 bus_dmamap_destroy(sc->tx.dmat, 2299 sc->tx.info[i].map); 2300 } 2301 bus_dma_tag_destroy(sc->tx.dmat); 2302 } 2303 free(sc->tx.info, M_DEVBUF); 2304 } 2305 if (sc->rx_small.info != NULL) { 2306 if (sc->rx_small.dmat != NULL) { 2307 for (i = 0; i <= sc->rx_small.mask; i++) { 2308 bus_dmamap_destroy(sc->rx_small.dmat, 2309 sc->rx_small.info[i].map); 2310 } 2311 bus_dmamap_destroy(sc->rx_small.dmat, 2312 sc->rx_small.extra_map); 2313 bus_dma_tag_destroy(sc->rx_small.dmat); 2314 } 2315 free(sc->rx_small.info, M_DEVBUF); 2316 } 2317 if (sc->rx_big.info != NULL) { 2318 if (sc->rx_big.dmat != NULL) { 2319 for (i = 0; i <= sc->rx_big.mask; i++) { 2320 bus_dmamap_destroy(sc->rx_big.dmat, 2321 sc->rx_big.info[i].map); 2322 } 2323 bus_dmamap_destroy(sc->rx_big.dmat, 2324 sc->rx_big.extra_map); 2325 bus_dma_tag_destroy(sc->rx_big.dmat); 2326 } 2327 free(sc->rx_big.info, M_DEVBUF); 2328 } 2329 } 2330 2331 static int 2332 mxge_alloc_rings(mxge_softc_t *sc) 2333 { 2334 mxge_cmd_t cmd; 2335 int tx_ring_size, rx_ring_size; 2336 int tx_ring_entries, rx_ring_entries; 2337 int i, err; 2338 unsigned long bytes; 2339 2340 /* get ring sizes */ 2341 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2342 tx_ring_size = cmd.data0; 2343 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2344 if (err != 0) { 2345 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2346 goto abort_with_nothing; 2347 } 2348 2349 rx_ring_size = cmd.data0; 2350 2351 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2352 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2353 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2354 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2355 IFQ_SET_READY(&sc->ifp->if_snd); 2356 2357 sc->tx.mask = tx_ring_entries - 1; 2358 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2359 2360 err = ENOMEM; 2361 2362 /* allocate the tx request copy block */ 2363 bytes = 8 + 2364 sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4); 2365 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2366 if (sc->tx.req_bytes == NULL) 2367 goto abort_with_nothing; 2368 /* ensure req_list entries are aligned to 8 bytes */ 2369 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2370 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2371 2372 /* allocate the tx busdma segment list */ 2373 bytes = sizeof (*sc->tx.seg_list) * MXGE_MAX_SEND_DESC; 2374 sc->tx.seg_list = (bus_dma_segment_t *) 2375 malloc(bytes, M_DEVBUF, M_WAITOK); 2376 if (sc->tx.seg_list == NULL) 2377 goto abort_with_alloc; 2378 2379 /* allocate the rx shadow rings */ 2380 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2381 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2382 if (sc->rx_small.shadow == NULL) 2383 goto abort_with_alloc; 2384 2385 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2386 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2387 if (sc->rx_big.shadow == NULL) 2388 goto abort_with_alloc; 2389 2390 /* allocate the host info rings */ 2391 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2392 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2393 if (sc->tx.info == NULL) 2394 goto abort_with_alloc; 2395 2396 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2397 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2398 if (sc->rx_small.info == NULL) 2399 goto abort_with_alloc; 2400 2401 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2402 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2403 if (sc->rx_big.info == NULL) 2404 goto abort_with_alloc; 2405 2406 /* allocate the busdma resources */ 2407 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2408 1, /* alignment */ 2409 sc->tx.boundary, /* boundary */ 2410 BUS_SPACE_MAXADDR, /* low */ 2411 BUS_SPACE_MAXADDR, /* high */ 2412 NULL, NULL, /* filter */ 2413 65536 + 256, /* maxsize */ 2414 MXGE_MAX_SEND_DESC/2, /* num segs */ 2415 sc->tx.boundary, /* maxsegsize */ 2416 BUS_DMA_ALLOCNOW, /* flags */ 2417 NULL, NULL, /* lock */ 2418 &sc->tx.dmat); /* tag */ 2419 2420 if (err != 0) { 2421 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2422 err); 2423 goto abort_with_alloc; 2424 } 2425 2426 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2427 1, /* alignment */ 2428 4096, /* boundary */ 2429 BUS_SPACE_MAXADDR, /* low */ 2430 BUS_SPACE_MAXADDR, /* high */ 2431 NULL, NULL, /* filter */ 2432 MHLEN, /* maxsize */ 2433 1, /* num segs */ 2434 MHLEN, /* maxsegsize */ 2435 BUS_DMA_ALLOCNOW, /* flags */ 2436 NULL, NULL, /* lock */ 2437 &sc->rx_small.dmat); /* tag */ 2438 if (err != 0) { 2439 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2440 err); 2441 goto abort_with_alloc; 2442 } 2443 2444 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2445 1, /* alignment */ 2446 4096, /* boundary */ 2447 BUS_SPACE_MAXADDR, /* low */ 2448 BUS_SPACE_MAXADDR, /* high */ 2449 NULL, NULL, /* filter */ 2450 4096, /* maxsize */ 2451 1, /* num segs */ 2452 4096, /* maxsegsize */ 2453 BUS_DMA_ALLOCNOW, /* flags */ 2454 NULL, NULL, /* lock */ 2455 &sc->rx_big.dmat); /* tag */ 2456 if (err != 0) { 2457 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2458 err); 2459 goto abort_with_alloc; 2460 } 2461 2462 /* now use these tags to setup dmamaps for each slot 2463 in each ring */ 2464 for (i = 0; i <= sc->tx.mask; i++) { 2465 err = bus_dmamap_create(sc->tx.dmat, 0, 2466 &sc->tx.info[i].map); 2467 if (err != 0) { 2468 device_printf(sc->dev, "Err %d tx dmamap\n", 2469 err); 2470 goto abort_with_alloc; 2471 } 2472 } 2473 for (i = 0; i <= sc->rx_small.mask; i++) { 2474 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2475 &sc->rx_small.info[i].map); 2476 if (err != 0) { 2477 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2478 err); 2479 goto abort_with_alloc; 2480 } 2481 } 2482 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2483 &sc->rx_small.extra_map); 2484 if (err != 0) { 2485 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2486 err); 2487 goto abort_with_alloc; 2488 } 2489 2490 for (i = 0; i <= sc->rx_big.mask; i++) { 2491 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2492 &sc->rx_big.info[i].map); 2493 if (err != 0) { 2494 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2495 err); 2496 goto abort_with_alloc; 2497 } 2498 } 2499 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2500 &sc->rx_big.extra_map); 2501 if (err != 0) { 2502 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2503 err); 2504 goto abort_with_alloc; 2505 } 2506 return 0; 2507 2508 abort_with_alloc: 2509 mxge_free_rings(sc); 2510 2511 abort_with_nothing: 2512 return err; 2513 } 2514 2515 static int 2516 mxge_open(mxge_softc_t *sc) 2517 { 2518 mxge_cmd_t cmd; 2519 int i, err; 2520 bus_dmamap_t map; 2521 bus_addr_t bus; 2522 2523 2524 /* Copy the MAC address in case it was overridden */ 2525 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2526 2527 err = mxge_reset(sc); 2528 if (err != 0) { 2529 device_printf(sc->dev, "failed to reset\n"); 2530 return EIO; 2531 } 2532 bzero(sc->rx_done.entry, 2533 mxge_max_intr_slots * sizeof(*sc->rx_done.entry)); 2534 2535 if (MCLBYTES >= 2536 sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD) 2537 sc->big_bytes = MCLBYTES; 2538 else 2539 sc->big_bytes = MJUMPAGESIZE; 2540 2541 2542 /* get the lanai pointers to the send and receive rings */ 2543 2544 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2545 sc->tx.lanai = 2546 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2547 err |= mxge_send_cmd(sc, 2548 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2549 sc->rx_small.lanai = 2550 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2551 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2552 sc->rx_big.lanai = 2553 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2554 2555 if (err != 0) { 2556 device_printf(sc->dev, 2557 "failed to get ring sizes or locations\n"); 2558 return EIO; 2559 } 2560 2561 if (sc->wc) { 2562 sc->tx.wc_fifo = sc->sram + MXGEFW_ETH_SEND_4; 2563 sc->rx_small.wc_fifo = sc->sram + MXGEFW_ETH_RECV_SMALL; 2564 sc->rx_big.wc_fifo = sc->sram + MXGEFW_ETH_RECV_BIG; 2565 } else { 2566 sc->tx.wc_fifo = 0; 2567 sc->rx_small.wc_fifo = 0; 2568 sc->rx_big.wc_fifo = 0; 2569 } 2570 2571 2572 /* stock receive rings */ 2573 for (i = 0; i <= sc->rx_small.mask; i++) { 2574 map = sc->rx_small.info[i].map; 2575 err = mxge_get_buf_small(sc, map, i); 2576 if (err) { 2577 device_printf(sc->dev, "alloced %d/%d smalls\n", 2578 i, sc->rx_small.mask + 1); 2579 goto abort; 2580 } 2581 } 2582 for (i = 0; i <= sc->rx_big.mask; i++) { 2583 map = sc->rx_big.info[i].map; 2584 err = mxge_get_buf_big(sc, map, i); 2585 if (err) { 2586 device_printf(sc->dev, "alloced %d/%d bigs\n", 2587 i, sc->rx_big.mask + 1); 2588 goto abort; 2589 } 2590 } 2591 2592 /* Give the firmware the mtu and the big and small buffer 2593 sizes. The firmware wants the big buf size to be a power 2594 of two. Luckily, FreeBSD's clusters are powers of two */ 2595 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN; 2596 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2597 cmd.data0 = MHLEN - MXGEFW_PAD; 2598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2599 &cmd); 2600 cmd.data0 = sc->big_bytes; 2601 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2602 2603 if (err != 0) { 2604 device_printf(sc->dev, "failed to setup params\n"); 2605 goto abort; 2606 } 2607 2608 /* Now give him the pointer to the stats block */ 2609 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2610 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2611 cmd.data2 = sizeof(struct mcp_irq_data); 2612 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2613 2614 if (err != 0) { 2615 bus = sc->fw_stats_dma.bus_addr; 2616 bus += offsetof(struct mcp_irq_data, send_done_count); 2617 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2618 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2619 err = mxge_send_cmd(sc, 2620 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2621 &cmd); 2622 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2623 sc->fw_multicast_support = 0; 2624 } else { 2625 sc->fw_multicast_support = 1; 2626 } 2627 2628 if (err != 0) { 2629 device_printf(sc->dev, "failed to setup params\n"); 2630 goto abort; 2631 } 2632 2633 /* Finally, start the firmware running */ 2634 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2635 if (err) { 2636 device_printf(sc->dev, "Couldn't bring up link\n"); 2637 goto abort; 2638 } 2639 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2640 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2641 2642 return 0; 2643 2644 2645 abort: 2646 mxge_free_mbufs(sc); 2647 2648 return err; 2649 } 2650 2651 static int 2652 mxge_close(mxge_softc_t *sc) 2653 { 2654 mxge_cmd_t cmd; 2655 int err, old_down_cnt; 2656 2657 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2658 old_down_cnt = sc->down_cnt; 2659 mb(); 2660 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2661 if (err) { 2662 device_printf(sc->dev, "Couldn't bring down link\n"); 2663 } 2664 if (old_down_cnt == sc->down_cnt) { 2665 /* wait for down irq */ 2666 DELAY(10 * sc->intr_coal_delay); 2667 } 2668 if (old_down_cnt == sc->down_cnt) { 2669 device_printf(sc->dev, "never got down irq\n"); 2670 } 2671 2672 mxge_free_mbufs(sc); 2673 2674 return 0; 2675 } 2676 2677 static void 2678 mxge_setup_cfg_space(mxge_softc_t *sc) 2679 { 2680 device_t dev = sc->dev; 2681 int reg; 2682 uint16_t cmd, lnk, pectl; 2683 2684 /* find the PCIe link width and set max read request to 4KB*/ 2685 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2686 lnk = pci_read_config(dev, reg + 0x12, 2); 2687 sc->link_width = (lnk >> 4) & 0x3f; 2688 2689 pectl = pci_read_config(dev, reg + 0x8, 2); 2690 pectl = (pectl & ~0x7000) | (5 << 12); 2691 pci_write_config(dev, reg + 0x8, pectl, 2); 2692 } 2693 2694 /* Enable DMA and Memory space access */ 2695 pci_enable_busmaster(dev); 2696 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2697 cmd |= PCIM_CMD_MEMEN; 2698 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2699 } 2700 2701 static uint32_t 2702 mxge_read_reboot(mxge_softc_t *sc) 2703 { 2704 device_t dev = sc->dev; 2705 uint32_t vs; 2706 2707 /* find the vendor specific offset */ 2708 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2709 device_printf(sc->dev, 2710 "could not find vendor specific offset\n"); 2711 return (uint32_t)-1; 2712 } 2713 /* enable read32 mode */ 2714 pci_write_config(dev, vs + 0x10, 0x3, 1); 2715 /* tell NIC which register to read */ 2716 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2717 return (pci_read_config(dev, vs + 0x14, 4)); 2718 } 2719 2720 static void 2721 mxge_watchdog_reset(mxge_softc_t *sc) 2722 { 2723 int err; 2724 uint32_t reboot; 2725 uint16_t cmd; 2726 2727 err = ENXIO; 2728 2729 device_printf(sc->dev, "Watchdog reset!\n"); 2730 2731 /* 2732 * check to see if the NIC rebooted. If it did, then all of 2733 * PCI config space has been reset, and things like the 2734 * busmaster bit will be zero. If this is the case, then we 2735 * must restore PCI config space before the NIC can be used 2736 * again 2737 */ 2738 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2739 if (cmd == 0xffff) { 2740 /* 2741 * maybe the watchdog caught the NIC rebooting; wait 2742 * up to 100ms for it to finish. If it does not come 2743 * back, then give up 2744 */ 2745 DELAY(1000*100); 2746 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2747 if (cmd == 0xffff) { 2748 device_printf(sc->dev, "NIC disappeared!\n"); 2749 goto abort; 2750 } 2751 } 2752 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2753 /* print the reboot status */ 2754 reboot = mxge_read_reboot(sc); 2755 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2756 reboot); 2757 /* restore PCI configuration space */ 2758 2759 /* XXXX waiting for pci_cfg_restore() to be exported */ 2760 goto abort; /* just abort for now */ 2761 2762 /* and redo any changes we made to our config space */ 2763 mxge_setup_cfg_space(sc); 2764 } else { 2765 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2766 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2767 sc->tx.req, sc->tx.done); 2768 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2769 sc->tx.pkt_done, 2770 be32toh(sc->fw_stats->send_done_count)); 2771 } 2772 2773 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2774 mxge_close(sc); 2775 err = mxge_open(sc); 2776 } 2777 2778 abort: 2779 /* 2780 * stop the watchdog if the nic is dead, to avoid spamming the 2781 * console 2782 */ 2783 if (err != 0) { 2784 callout_stop(&sc->co_hdl); 2785 } 2786 } 2787 2788 static void 2789 mxge_watchdog(mxge_softc_t *sc) 2790 { 2791 mxge_tx_buf_t *tx = &sc->tx; 2792 2793 /* see if we have outstanding transmits, which 2794 have been pending for more than mxge_ticks */ 2795 if (tx->req != tx->done && 2796 tx->watchdog_req != tx->watchdog_done && 2797 tx->done == tx->watchdog_done) 2798 mxge_watchdog_reset(sc); 2799 2800 tx->watchdog_req = tx->req; 2801 tx->watchdog_done = tx->done; 2802 } 2803 2804 static void 2805 mxge_tick(void *arg) 2806 { 2807 mxge_softc_t *sc = arg; 2808 2809 2810 /* Synchronize with possible callout reset/stop. */ 2811 if (callout_pending(&sc->co_hdl) || 2812 !callout_active(&sc->co_hdl)) { 2813 mtx_unlock(&sc->driver_mtx); 2814 return; 2815 } 2816 2817 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2818 mxge_watchdog(sc); 2819 } 2820 2821 static int 2822 mxge_media_change(struct ifnet *ifp) 2823 { 2824 return EINVAL; 2825 } 2826 2827 static int 2828 mxge_change_mtu(mxge_softc_t *sc, int mtu) 2829 { 2830 struct ifnet *ifp = sc->ifp; 2831 int real_mtu, old_mtu; 2832 int err = 0; 2833 2834 2835 real_mtu = mtu + ETHER_HDR_LEN; 2836 if ((real_mtu > MXGE_MAX_ETHER_MTU) || 2837 real_mtu < 60) 2838 return EINVAL; 2839 mtx_lock(&sc->driver_mtx); 2840 old_mtu = ifp->if_mtu; 2841 ifp->if_mtu = mtu; 2842 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2843 callout_stop(&sc->co_hdl); 2844 mxge_close(sc); 2845 err = mxge_open(sc); 2846 if (err != 0) { 2847 ifp->if_mtu = old_mtu; 2848 mxge_close(sc); 2849 (void) mxge_open(sc); 2850 } 2851 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2852 } 2853 mtx_unlock(&sc->driver_mtx); 2854 return err; 2855 } 2856 2857 static void 2858 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 2859 { 2860 mxge_softc_t *sc = ifp->if_softc; 2861 2862 2863 if (sc == NULL) 2864 return; 2865 ifmr->ifm_status = IFM_AVALID; 2866 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 2867 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 2868 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 2869 } 2870 2871 static int 2872 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 2873 { 2874 mxge_softc_t *sc = ifp->if_softc; 2875 struct ifreq *ifr = (struct ifreq *)data; 2876 int err, mask; 2877 2878 err = 0; 2879 switch (command) { 2880 case SIOCSIFADDR: 2881 case SIOCGIFADDR: 2882 err = ether_ioctl(ifp, command, data); 2883 break; 2884 2885 case SIOCSIFMTU: 2886 err = mxge_change_mtu(sc, ifr->ifr_mtu); 2887 break; 2888 2889 case SIOCSIFFLAGS: 2890 mtx_lock(&sc->driver_mtx); 2891 if (ifp->if_flags & IFF_UP) { 2892 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 2893 err = mxge_open(sc); 2894 callout_reset(&sc->co_hdl, mxge_ticks, 2895 mxge_tick, sc); 2896 } else { 2897 /* take care of promis can allmulti 2898 flag chages */ 2899 mxge_change_promisc(sc, 2900 ifp->if_flags & IFF_PROMISC); 2901 mxge_set_multicast_list(sc); 2902 } 2903 } else { 2904 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2905 mxge_close(sc); 2906 callout_stop(&sc->co_hdl); 2907 } 2908 } 2909 mtx_unlock(&sc->driver_mtx); 2910 break; 2911 2912 case SIOCADDMULTI: 2913 case SIOCDELMULTI: 2914 mtx_lock(&sc->driver_mtx); 2915 mxge_set_multicast_list(sc); 2916 mtx_unlock(&sc->driver_mtx); 2917 break; 2918 2919 case SIOCSIFCAP: 2920 mtx_lock(&sc->driver_mtx); 2921 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2922 if (mask & IFCAP_TXCSUM) { 2923 if (IFCAP_TXCSUM & ifp->if_capenable) { 2924 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 2925 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 2926 | CSUM_TSO); 2927 } else { 2928 ifp->if_capenable |= IFCAP_TXCSUM; 2929 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 2930 } 2931 } else if (mask & IFCAP_RXCSUM) { 2932 if (IFCAP_RXCSUM & ifp->if_capenable) { 2933 ifp->if_capenable &= ~IFCAP_RXCSUM; 2934 sc->csum_flag = 0; 2935 } else { 2936 ifp->if_capenable |= IFCAP_RXCSUM; 2937 sc->csum_flag = 1; 2938 } 2939 } 2940 if (mask & IFCAP_TSO4) { 2941 if (IFCAP_TSO4 & ifp->if_capenable) { 2942 ifp->if_capenable &= ~IFCAP_TSO4; 2943 ifp->if_hwassist &= ~CSUM_TSO; 2944 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 2945 ifp->if_capenable |= IFCAP_TSO4; 2946 ifp->if_hwassist |= CSUM_TSO; 2947 } else { 2948 printf("mxge requires tx checksum offload" 2949 " be enabled to use TSO\n"); 2950 err = EINVAL; 2951 } 2952 } 2953 mtx_unlock(&sc->driver_mtx); 2954 break; 2955 2956 case SIOCGIFMEDIA: 2957 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 2958 &sc->media, command); 2959 break; 2960 2961 default: 2962 err = ENOTTY; 2963 } 2964 return err; 2965 } 2966 2967 static void 2968 mxge_fetch_tunables(mxge_softc_t *sc) 2969 { 2970 2971 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 2972 &mxge_flow_control); 2973 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 2974 &mxge_intr_coal_delay); 2975 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 2976 &mxge_nvidia_ecrc_enable); 2977 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 2978 &mxge_force_firmware); 2979 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 2980 &mxge_deassert_wait); 2981 TUNABLE_INT_FETCH("hw.mxge.verbose", 2982 &mxge_verbose); 2983 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 2984 2985 if (bootverbose) 2986 mxge_verbose = 1; 2987 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 2988 mxge_intr_coal_delay = 30; 2989 if (mxge_ticks == 0) 2990 mxge_ticks = hz; 2991 sc->pause = mxge_flow_control; 2992 } 2993 2994 static int 2995 mxge_attach(device_t dev) 2996 { 2997 mxge_softc_t *sc = device_get_softc(dev); 2998 struct ifnet *ifp; 2999 size_t bytes; 3000 int count, rid, err; 3001 3002 sc->dev = dev; 3003 mxge_fetch_tunables(sc); 3004 3005 err = bus_dma_tag_create(NULL, /* parent */ 3006 1, /* alignment */ 3007 4096, /* boundary */ 3008 BUS_SPACE_MAXADDR, /* low */ 3009 BUS_SPACE_MAXADDR, /* high */ 3010 NULL, NULL, /* filter */ 3011 65536 + 256, /* maxsize */ 3012 MXGE_MAX_SEND_DESC, /* num segs */ 3013 4096, /* maxsegsize */ 3014 0, /* flags */ 3015 NULL, NULL, /* lock */ 3016 &sc->parent_dmat); /* tag */ 3017 3018 if (err != 0) { 3019 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3020 err); 3021 goto abort_with_nothing; 3022 } 3023 3024 ifp = sc->ifp = if_alloc(IFT_ETHER); 3025 if (ifp == NULL) { 3026 device_printf(dev, "can not if_alloc()\n"); 3027 err = ENOSPC; 3028 goto abort_with_parent_dmat; 3029 } 3030 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3031 device_get_nameunit(dev)); 3032 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3033 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3034 device_get_nameunit(dev)); 3035 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3036 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3037 "%s:drv", device_get_nameunit(dev)); 3038 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3039 MTX_NETWORK_LOCK, MTX_DEF); 3040 3041 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3042 3043 mxge_setup_cfg_space(sc); 3044 3045 /* Map the board into the kernel */ 3046 rid = PCIR_BARS; 3047 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3048 ~0, 1, RF_ACTIVE); 3049 if (sc->mem_res == NULL) { 3050 device_printf(dev, "could not map memory\n"); 3051 err = ENXIO; 3052 goto abort_with_lock; 3053 } 3054 sc->sram = rman_get_virtual(sc->mem_res); 3055 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3056 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3057 device_printf(dev, "impossible memory region size %ld\n", 3058 rman_get_size(sc->mem_res)); 3059 err = ENXIO; 3060 goto abort_with_mem_res; 3061 } 3062 3063 /* make NULL terminated copy of the EEPROM strings section of 3064 lanai SRAM */ 3065 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3066 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3067 rman_get_bushandle(sc->mem_res), 3068 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3069 sc->eeprom_strings, 3070 MXGE_EEPROM_STRINGS_SIZE - 2); 3071 err = mxge_parse_strings(sc); 3072 if (err != 0) 3073 goto abort_with_mem_res; 3074 3075 /* Enable write combining for efficient use of PCIe bus */ 3076 mxge_enable_wc(sc); 3077 3078 /* Allocate the out of band dma memory */ 3079 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3080 sizeof (mxge_cmd_t), 64); 3081 if (err != 0) 3082 goto abort_with_mem_res; 3083 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3084 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3085 if (err != 0) 3086 goto abort_with_cmd_dma; 3087 3088 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3089 sizeof (*sc->fw_stats), 64); 3090 if (err != 0) 3091 goto abort_with_zeropad_dma; 3092 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3093 3094 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3095 if (err != 0) 3096 goto abort_with_fw_stats; 3097 3098 /* allocate interrupt queues */ 3099 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry); 3100 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 3101 if (err != 0) 3102 goto abort_with_dmabench; 3103 sc->rx_done.entry = sc->rx_done.dma.addr; 3104 bzero(sc->rx_done.entry, bytes); 3105 3106 /* Add our ithread */ 3107 count = pci_msi_count(dev); 3108 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3109 rid = 1; 3110 sc->msi_enabled = 1; 3111 } else { 3112 rid = 0; 3113 } 3114 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3115 1, RF_SHAREABLE | RF_ACTIVE); 3116 if (sc->irq_res == NULL) { 3117 device_printf(dev, "could not alloc interrupt\n"); 3118 goto abort_with_rx_done; 3119 } 3120 if (mxge_verbose) 3121 device_printf(dev, "using %s irq %ld\n", 3122 sc->msi_enabled ? "MSI" : "INTx", 3123 rman_get_start(sc->irq_res)); 3124 /* select & load the firmware */ 3125 err = mxge_select_firmware(sc); 3126 if (err != 0) 3127 goto abort_with_irq_res; 3128 sc->intr_coal_delay = mxge_intr_coal_delay; 3129 err = mxge_reset(sc); 3130 if (err != 0) 3131 goto abort_with_irq_res; 3132 3133 err = mxge_alloc_rings(sc); 3134 if (err != 0) { 3135 device_printf(sc->dev, "failed to allocate rings\n"); 3136 goto abort_with_irq_res; 3137 } 3138 3139 err = bus_setup_intr(sc->dev, sc->irq_res, 3140 INTR_TYPE_NET | INTR_MPSAFE, 3141 NULL, mxge_intr, sc, &sc->ih); 3142 if (err != 0) { 3143 goto abort_with_rings; 3144 } 3145 /* hook into the network stack */ 3146 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3147 ifp->if_baudrate = 100000000; 3148 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3149 IFCAP_JUMBO_MTU; 3150 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3151 ifp->if_capenable = ifp->if_capabilities; 3152 sc->csum_flag = 1; 3153 ifp->if_init = mxge_init; 3154 ifp->if_softc = sc; 3155 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3156 ifp->if_ioctl = mxge_ioctl; 3157 ifp->if_start = mxge_start; 3158 ether_ifattach(ifp, sc->mac_addr); 3159 /* ether_ifattach sets mtu to 1500 */ 3160 ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN; 3161 3162 /* Initialise the ifmedia structure */ 3163 ifmedia_init(&sc->media, 0, mxge_media_change, 3164 mxge_media_status); 3165 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3166 mxge_add_sysctls(sc); 3167 return 0; 3168 3169 abort_with_rings: 3170 mxge_free_rings(sc); 3171 abort_with_irq_res: 3172 bus_release_resource(dev, SYS_RES_IRQ, 3173 sc->msi_enabled ? 1 : 0, sc->irq_res); 3174 if (sc->msi_enabled) 3175 pci_release_msi(dev); 3176 abort_with_rx_done: 3177 sc->rx_done.entry = NULL; 3178 mxge_dma_free(&sc->rx_done.dma); 3179 abort_with_dmabench: 3180 mxge_dma_free(&sc->dmabench_dma); 3181 abort_with_fw_stats: 3182 mxge_dma_free(&sc->fw_stats_dma); 3183 abort_with_zeropad_dma: 3184 mxge_dma_free(&sc->zeropad_dma); 3185 abort_with_cmd_dma: 3186 mxge_dma_free(&sc->cmd_dma); 3187 abort_with_mem_res: 3188 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3189 abort_with_lock: 3190 pci_disable_busmaster(dev); 3191 mtx_destroy(&sc->cmd_mtx); 3192 mtx_destroy(&sc->tx_mtx); 3193 mtx_destroy(&sc->driver_mtx); 3194 if_free(ifp); 3195 abort_with_parent_dmat: 3196 bus_dma_tag_destroy(sc->parent_dmat); 3197 3198 abort_with_nothing: 3199 return err; 3200 } 3201 3202 static int 3203 mxge_detach(device_t dev) 3204 { 3205 mxge_softc_t *sc = device_get_softc(dev); 3206 3207 mtx_lock(&sc->driver_mtx); 3208 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3209 mxge_close(sc); 3210 callout_stop(&sc->co_hdl); 3211 mtx_unlock(&sc->driver_mtx); 3212 ether_ifdetach(sc->ifp); 3213 ifmedia_removeall(&sc->media); 3214 mxge_dummy_rdma(sc, 0); 3215 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3216 mxge_free_rings(sc); 3217 bus_release_resource(dev, SYS_RES_IRQ, 3218 sc->msi_enabled ? 1 : 0, sc->irq_res); 3219 if (sc->msi_enabled) 3220 pci_release_msi(dev); 3221 3222 sc->rx_done.entry = NULL; 3223 mxge_dma_free(&sc->rx_done.dma); 3224 mxge_dma_free(&sc->fw_stats_dma); 3225 mxge_dma_free(&sc->dmabench_dma); 3226 mxge_dma_free(&sc->zeropad_dma); 3227 mxge_dma_free(&sc->cmd_dma); 3228 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3229 pci_disable_busmaster(dev); 3230 mtx_destroy(&sc->cmd_mtx); 3231 mtx_destroy(&sc->tx_mtx); 3232 mtx_destroy(&sc->driver_mtx); 3233 if_free(sc->ifp); 3234 bus_dma_tag_destroy(sc->parent_dmat); 3235 return 0; 3236 } 3237 3238 static int 3239 mxge_shutdown(device_t dev) 3240 { 3241 return 0; 3242 } 3243 3244 /* 3245 This file uses Myri10GE driver indentation. 3246 3247 Local Variables: 3248 c-file-style:"linux" 3249 tab-width:8 3250 End: 3251 */ 3252