1 /****************************************************************************** 2 3 Copyright (c) 2006, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/linker.h> 40 #include <sys/firmware.h> 41 #include <sys/endian.h> 42 #include <sys/sockio.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/memrange.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 #include <net/zlib.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <machine/bus.h> 72 #include <machine/in_cksum.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/pci/pcireg.h> 78 #include <dev/pci/pcivar.h> 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 #include <dev/mxge/if_mxge_var.h> 90 91 /* tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = 30; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_verbose = 0; 98 static int mxge_ticks; 99 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 100 static char *mxge_fw_aligned = "mxge_eth_z8e"; 101 102 static int mxge_probe(device_t dev); 103 static int mxge_attach(device_t dev); 104 static int mxge_detach(device_t dev); 105 static int mxge_shutdown(device_t dev); 106 static void mxge_intr(void *arg); 107 108 static device_method_t mxge_methods[] = 109 { 110 /* Device interface */ 111 DEVMETHOD(device_probe, mxge_probe), 112 DEVMETHOD(device_attach, mxge_attach), 113 DEVMETHOD(device_detach, mxge_detach), 114 DEVMETHOD(device_shutdown, mxge_shutdown), 115 {0, 0} 116 }; 117 118 static driver_t mxge_driver = 119 { 120 "mxge", 121 mxge_methods, 122 sizeof(mxge_softc_t), 123 }; 124 125 static devclass_t mxge_devclass; 126 127 /* Declare ourselves to be a child of the PCI bus.*/ 128 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 129 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 130 131 static int mxge_load_firmware(mxge_softc_t *sc); 132 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 133 134 static int 135 mxge_probe(device_t dev) 136 { 137 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 138 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 139 device_set_desc(dev, "Myri10G-PCIE-8A"); 140 return 0; 141 } 142 return ENXIO; 143 } 144 145 static void 146 mxge_enable_wc(mxge_softc_t *sc) 147 { 148 struct mem_range_desc mrdesc; 149 vm_paddr_t pa; 150 vm_offset_t len; 151 int err, action; 152 153 len = rman_get_size(sc->mem_res); 154 #if defined(__i386) || defined(__amd64) 155 err = pmap_change_attr((vm_offset_t) sc->sram, 156 len, PAT_WRITE_COMBINING); 157 if (err == 0) 158 return; 159 else 160 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 161 err); 162 #endif 163 pa = rman_get_start(sc->mem_res); 164 mrdesc.mr_base = pa; 165 mrdesc.mr_len = len; 166 mrdesc.mr_flags = MDF_WRITECOMBINE; 167 action = MEMRANGE_SET_UPDATE; 168 strcpy((char *)&mrdesc.mr_owner, "mxge"); 169 err = mem_range_attr_set(&mrdesc, &action); 170 if (err != 0) { 171 device_printf(sc->dev, 172 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 173 (unsigned long)pa, (unsigned long)len, err); 174 } else { 175 sc->wc = 1; 176 } 177 } 178 179 180 /* callback to get our DMA address */ 181 static void 182 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 183 int error) 184 { 185 if (error == 0) { 186 *(bus_addr_t *) arg = segs->ds_addr; 187 } 188 } 189 190 static int 191 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 192 bus_size_t alignment) 193 { 194 int err; 195 device_t dev = sc->dev; 196 197 /* allocate DMAable memory tags */ 198 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 199 alignment, /* alignment */ 200 4096, /* boundary */ 201 BUS_SPACE_MAXADDR, /* low */ 202 BUS_SPACE_MAXADDR, /* high */ 203 NULL, NULL, /* filter */ 204 bytes, /* maxsize */ 205 1, /* num segs */ 206 4096, /* maxsegsize */ 207 BUS_DMA_COHERENT, /* flags */ 208 NULL, NULL, /* lock */ 209 &dma->dmat); /* tag */ 210 if (err != 0) { 211 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 212 return err; 213 } 214 215 /* allocate DMAable memory & map */ 216 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 217 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 218 | BUS_DMA_ZERO), &dma->map); 219 if (err != 0) { 220 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 221 goto abort_with_dmat; 222 } 223 224 /* load the memory */ 225 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 226 mxge_dmamap_callback, 227 (void *)&dma->bus_addr, 0); 228 if (err != 0) { 229 device_printf(dev, "couldn't load map (err = %d)\n", err); 230 goto abort_with_mem; 231 } 232 return 0; 233 234 abort_with_mem: 235 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 236 abort_with_dmat: 237 (void)bus_dma_tag_destroy(dma->dmat); 238 return err; 239 } 240 241 242 static void 243 mxge_dma_free(mxge_dma_t *dma) 244 { 245 bus_dmamap_unload(dma->dmat, dma->map); 246 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 247 (void)bus_dma_tag_destroy(dma->dmat); 248 } 249 250 /* 251 * The eeprom strings on the lanaiX have the format 252 * SN=x\0 253 * MAC=x:x:x:x:x:x\0 254 * PC=text\0 255 */ 256 257 static int 258 mxge_parse_strings(mxge_softc_t *sc) 259 { 260 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 261 262 char *ptr, *limit; 263 int i, found_mac; 264 265 ptr = sc->eeprom_strings; 266 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 267 found_mac = 0; 268 while (ptr < limit && *ptr != '\0') { 269 if (memcmp(ptr, "MAC=", 4) == 0) { 270 ptr += 1; 271 sc->mac_addr_string = ptr; 272 for (i = 0; i < 6; i++) { 273 ptr += 3; 274 if ((ptr + 2) > limit) 275 goto abort; 276 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 277 found_mac = 1; 278 } 279 } else if (memcmp(ptr, "PC=", 3) == 0) { 280 ptr += 3; 281 strncpy(sc->product_code_string, ptr, 282 sizeof (sc->product_code_string) - 1); 283 } else if (memcmp(ptr, "SN=", 3) == 0) { 284 ptr += 3; 285 strncpy(sc->serial_number_string, ptr, 286 sizeof (sc->serial_number_string) - 1); 287 } 288 MXGE_NEXT_STRING(ptr); 289 } 290 291 if (found_mac) 292 return 0; 293 294 abort: 295 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 296 297 return ENXIO; 298 } 299 300 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 301 static void 302 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 303 { 304 uint32_t val; 305 unsigned long base, off; 306 char *va, *cfgptr; 307 device_t pdev, mcp55; 308 uint16_t vendor_id, device_id, word; 309 uintptr_t bus, slot, func, ivend, idev; 310 uint32_t *ptr32; 311 312 313 if (!mxge_nvidia_ecrc_enable) 314 return; 315 316 pdev = device_get_parent(device_get_parent(sc->dev)); 317 if (pdev == NULL) { 318 device_printf(sc->dev, "could not find parent?\n"); 319 return; 320 } 321 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 322 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 323 324 if (vendor_id != 0x10de) 325 return; 326 327 base = 0; 328 329 if (device_id == 0x005d) { 330 /* ck804, base address is magic */ 331 base = 0xe0000000UL; 332 } else if (device_id >= 0x0374 && device_id <= 0x378) { 333 /* mcp55, base address stored in chipset */ 334 mcp55 = pci_find_bsf(0, 0, 0); 335 if (mcp55 && 336 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 337 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 338 word = pci_read_config(mcp55, 0x90, 2); 339 base = ((unsigned long)word & 0x7ffeU) << 25; 340 } 341 } 342 if (!base) 343 return; 344 345 /* XXXX 346 Test below is commented because it is believed that doing 347 config read/write beyond 0xff will access the config space 348 for the next larger function. Uncomment this and remove 349 the hacky pmap_mapdev() way of accessing config space when 350 FreeBSD grows support for extended pcie config space access 351 */ 352 #if 0 353 /* See if we can, by some miracle, access the extended 354 config space */ 355 val = pci_read_config(pdev, 0x178, 4); 356 if (val != 0xffffffff) { 357 val |= 0x40; 358 pci_write_config(pdev, 0x178, val, 4); 359 return; 360 } 361 #endif 362 /* Rather than using normal pci config space writes, we must 363 * map the Nvidia config space ourselves. This is because on 364 * opteron/nvidia class machine the 0xe000000 mapping is 365 * handled by the nvidia chipset, that means the internal PCI 366 * device (the on-chip northbridge), or the amd-8131 bridge 367 * and things behind them are not visible by this method. 368 */ 369 370 BUS_READ_IVAR(device_get_parent(pdev), pdev, 371 PCI_IVAR_BUS, &bus); 372 BUS_READ_IVAR(device_get_parent(pdev), pdev, 373 PCI_IVAR_SLOT, &slot); 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_FUNCTION, &func); 376 BUS_READ_IVAR(device_get_parent(pdev), pdev, 377 PCI_IVAR_VENDOR, &ivend); 378 BUS_READ_IVAR(device_get_parent(pdev), pdev, 379 PCI_IVAR_DEVICE, &idev); 380 381 off = base 382 + 0x00100000UL * (unsigned long)bus 383 + 0x00001000UL * (unsigned long)(func 384 + 8 * slot); 385 386 /* map it into the kernel */ 387 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 388 389 390 if (va == NULL) { 391 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 392 return; 393 } 394 /* get a pointer to the config space mapped into the kernel */ 395 cfgptr = va + (off & PAGE_MASK); 396 397 /* make sure that we can really access it */ 398 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 399 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 400 if (! (vendor_id == ivend && device_id == idev)) { 401 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 402 vendor_id, device_id); 403 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 404 return; 405 } 406 407 ptr32 = (uint32_t*)(cfgptr + 0x178); 408 val = *ptr32; 409 410 if (val == 0xffffffff) { 411 device_printf(sc->dev, "extended mapping failed\n"); 412 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 413 return; 414 } 415 *ptr32 = val | 0x40; 416 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 417 if (mxge_verbose) 418 device_printf(sc->dev, 419 "Enabled ECRC on upstream Nvidia bridge " 420 "at %d:%d:%d\n", 421 (int)bus, (int)slot, (int)func); 422 return; 423 } 424 #else 425 static void 426 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 427 { 428 device_printf(sc->dev, 429 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 430 return; 431 } 432 #endif 433 434 435 static int 436 mxge_dma_test(mxge_softc_t *sc, int test_type) 437 { 438 mxge_cmd_t cmd; 439 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 440 int status; 441 uint32_t len; 442 char *test = " "; 443 444 445 /* Run a small DMA test. 446 * The magic multipliers to the length tell the firmware 447 * to do DMA read, write, or read+write tests. The 448 * results are returned in cmd.data0. The upper 16 449 * bits of the return is the number of transfers completed. 450 * The lower 16 bits is the time in 0.5us ticks that the 451 * transfers took to complete. 452 */ 453 454 len = sc->tx.boundary; 455 456 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 457 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 458 cmd.data2 = len * 0x10000; 459 status = mxge_send_cmd(sc, test_type, &cmd); 460 if (status != 0) { 461 test = "read"; 462 goto abort; 463 } 464 sc->read_dma = ((cmd.data0>>16) * len * 2) / 465 (cmd.data0 & 0xffff); 466 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 467 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 468 cmd.data2 = len * 0x1; 469 status = mxge_send_cmd(sc, test_type, &cmd); 470 if (status != 0) { 471 test = "write"; 472 goto abort; 473 } 474 sc->write_dma = ((cmd.data0>>16) * len * 2) / 475 (cmd.data0 & 0xffff); 476 477 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 478 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 479 cmd.data2 = len * 0x10001; 480 status = mxge_send_cmd(sc, test_type, &cmd); 481 if (status != 0) { 482 test = "read/write"; 483 goto abort; 484 } 485 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 486 (cmd.data0 & 0xffff); 487 488 abort: 489 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 490 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 491 test, status); 492 493 return status; 494 } 495 496 /* 497 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 498 * when the PCI-E Completion packets are aligned on an 8-byte 499 * boundary. Some PCI-E chip sets always align Completion packets; on 500 * the ones that do not, the alignment can be enforced by enabling 501 * ECRC generation (if supported). 502 * 503 * When PCI-E Completion packets are not aligned, it is actually more 504 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 505 * 506 * If the driver can neither enable ECRC nor verify that it has 507 * already been enabled, then it must use a firmware image which works 508 * around unaligned completion packets (ethp_z8e.dat), and it should 509 * also ensure that it never gives the device a Read-DMA which is 510 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 511 * enabled, then the driver should use the aligned (eth_z8e.dat) 512 * firmware image, and set tx.boundary to 4KB. 513 */ 514 515 static int 516 mxge_firmware_probe(mxge_softc_t *sc) 517 { 518 device_t dev = sc->dev; 519 int reg, status; 520 uint16_t pectl; 521 522 sc->tx.boundary = 4096; 523 /* 524 * Verify the max read request size was set to 4KB 525 * before trying the test with 4KB. 526 */ 527 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 528 pectl = pci_read_config(dev, reg + 0x8, 2); 529 if ((pectl & (5 << 12)) != (5 << 12)) { 530 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 531 pectl); 532 sc->tx.boundary = 2048; 533 } 534 } 535 536 /* 537 * load the optimized firmware (which assumes aligned PCIe 538 * completions) in order to see if it works on this host. 539 */ 540 sc->fw_name = mxge_fw_aligned; 541 status = mxge_load_firmware(sc); 542 if (status != 0) { 543 return status; 544 } 545 546 /* 547 * Enable ECRC if possible 548 */ 549 mxge_enable_nvidia_ecrc(sc); 550 551 /* 552 * Run a DMA test which watches for unaligned completions and 553 * aborts on the first one seen. 554 */ 555 556 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 557 if (status == 0) 558 return 0; /* keep the aligned firmware */ 559 560 if (status != E2BIG) 561 device_printf(dev, "DMA test failed: %d\n", status); 562 if (status == ENOSYS) 563 device_printf(dev, "Falling back to ethp! " 564 "Please install up to date fw\n"); 565 return status; 566 } 567 568 static int 569 mxge_select_firmware(mxge_softc_t *sc) 570 { 571 int aligned = 0; 572 573 574 if (mxge_force_firmware != 0) { 575 if (mxge_force_firmware == 1) 576 aligned = 1; 577 else 578 aligned = 0; 579 if (mxge_verbose) 580 device_printf(sc->dev, 581 "Assuming %s completions (forced)\n", 582 aligned ? "aligned" : "unaligned"); 583 goto abort; 584 } 585 586 /* if the PCIe link width is 4 or less, we can use the aligned 587 firmware and skip any checks */ 588 if (sc->link_width != 0 && sc->link_width <= 4) { 589 device_printf(sc->dev, 590 "PCIe x%d Link, expect reduced performance\n", 591 sc->link_width); 592 aligned = 1; 593 goto abort; 594 } 595 596 if (0 == mxge_firmware_probe(sc)) 597 return 0; 598 599 abort: 600 if (aligned) { 601 sc->fw_name = mxge_fw_aligned; 602 sc->tx.boundary = 4096; 603 } else { 604 sc->fw_name = mxge_fw_unaligned; 605 sc->tx.boundary = 2048; 606 } 607 return (mxge_load_firmware(sc)); 608 } 609 610 union qualhack 611 { 612 const char *ro_char; 613 char *rw_char; 614 }; 615 616 static int 617 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 618 { 619 620 621 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 622 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 623 be32toh(hdr->mcp_type)); 624 return EIO; 625 } 626 627 /* save firmware version for sysctl */ 628 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 629 if (mxge_verbose) 630 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 631 632 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 633 &sc->fw_ver_minor, &sc->fw_ver_tiny); 634 635 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 636 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 637 device_printf(sc->dev, "Found firmware version %s\n", 638 sc->fw_version); 639 device_printf(sc->dev, "Driver needs %d.%d\n", 640 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 641 return EINVAL; 642 } 643 return 0; 644 645 } 646 647 static int 648 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 649 { 650 const struct firmware *fw; 651 const mcp_gen_header_t *hdr; 652 unsigned hdr_offset; 653 const char *fw_data; 654 union qualhack hack; 655 int status; 656 unsigned int i; 657 char dummy; 658 659 660 fw = firmware_get(sc->fw_name); 661 662 if (fw == NULL) { 663 device_printf(sc->dev, "Could not find firmware image %s\n", 664 sc->fw_name); 665 return ENOENT; 666 } 667 if (fw->datasize > *limit || 668 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 669 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 670 sc->fw_name, (int)fw->datasize, (int) *limit); 671 status = ENOSPC; 672 goto abort_with_fw; 673 } 674 *limit = fw->datasize; 675 676 /* check id */ 677 fw_data = (const char *)fw->data; 678 hdr_offset = htobe32(*(const uint32_t *) 679 (fw_data + MCP_HEADER_PTR_OFFSET)); 680 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 681 device_printf(sc->dev, "Bad firmware file"); 682 status = EIO; 683 goto abort_with_fw; 684 } 685 hdr = (const void*)(fw_data + hdr_offset); 686 687 status = mxge_validate_firmware(sc, hdr); 688 if (status != 0) 689 goto abort_with_fw; 690 691 hack.ro_char = fw_data; 692 /* Copy the inflated firmware to NIC SRAM. */ 693 for (i = 0; i < *limit; i += 256) { 694 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 695 hack.rw_char + i, 696 min(256U, (unsigned)(*limit - i))); 697 mb(); 698 dummy = *sc->sram; 699 mb(); 700 } 701 702 status = 0; 703 abort_with_fw: 704 firmware_put(fw, FIRMWARE_UNLOAD); 705 return status; 706 } 707 708 /* 709 * Enable or disable periodic RDMAs from the host to make certain 710 * chipsets resend dropped PCIe messages 711 */ 712 713 static void 714 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 715 { 716 char buf_bytes[72]; 717 volatile uint32_t *confirm; 718 volatile char *submit; 719 uint32_t *buf, dma_low, dma_high; 720 int i; 721 722 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 723 724 /* clear confirmation addr */ 725 confirm = (volatile uint32_t *)sc->cmd; 726 *confirm = 0; 727 mb(); 728 729 /* send an rdma command to the PCIe engine, and wait for the 730 response in the confirmation address. The firmware should 731 write a -1 there to indicate it is alive and well 732 */ 733 734 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 735 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 736 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 737 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 738 buf[2] = htobe32(0xffffffff); /* confirm data */ 739 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 740 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 741 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 742 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 743 buf[5] = htobe32(enable); /* enable? */ 744 745 746 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 747 748 mxge_pio_copy(submit, buf, 64); 749 mb(); 750 DELAY(1000); 751 mb(); 752 i = 0; 753 while (*confirm != 0xffffffff && i < 20) { 754 DELAY(1000); 755 i++; 756 } 757 if (*confirm != 0xffffffff) { 758 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 759 (enable ? "enable" : "disable"), confirm, 760 *confirm); 761 } 762 return; 763 } 764 765 static int 766 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 767 { 768 mcp_cmd_t *buf; 769 char buf_bytes[sizeof(*buf) + 8]; 770 volatile mcp_cmd_response_t *response = sc->cmd; 771 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 772 uint32_t dma_low, dma_high; 773 int err, sleep_total = 0; 774 775 /* ensure buf is aligned to 8 bytes */ 776 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 777 778 buf->data0 = htobe32(data->data0); 779 buf->data1 = htobe32(data->data1); 780 buf->data2 = htobe32(data->data2); 781 buf->cmd = htobe32(cmd); 782 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 783 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 784 785 buf->response_addr.low = htobe32(dma_low); 786 buf->response_addr.high = htobe32(dma_high); 787 mtx_lock(&sc->cmd_mtx); 788 response->result = 0xffffffff; 789 mb(); 790 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 791 792 /* wait up to 20ms */ 793 err = EAGAIN; 794 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 795 bus_dmamap_sync(sc->cmd_dma.dmat, 796 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 797 mb(); 798 switch (be32toh(response->result)) { 799 case 0: 800 data->data0 = be32toh(response->data); 801 err = 0; 802 break; 803 case 0xffffffff: 804 DELAY(1000); 805 break; 806 case MXGEFW_CMD_UNKNOWN: 807 err = ENOSYS; 808 break; 809 case MXGEFW_CMD_ERROR_UNALIGNED: 810 err = E2BIG; 811 break; 812 default: 813 device_printf(sc->dev, 814 "mxge: command %d " 815 "failed, result = %d\n", 816 cmd, be32toh(response->result)); 817 err = ENXIO; 818 break; 819 } 820 if (err != EAGAIN) 821 break; 822 } 823 if (err == EAGAIN) 824 device_printf(sc->dev, "mxge: command %d timed out" 825 "result = %d\n", 826 cmd, be32toh(response->result)); 827 mtx_unlock(&sc->cmd_mtx); 828 return err; 829 } 830 831 static int 832 mxge_adopt_running_firmware(mxge_softc_t *sc) 833 { 834 struct mcp_gen_header *hdr; 835 const size_t bytes = sizeof (struct mcp_gen_header); 836 size_t hdr_offset; 837 int status; 838 839 /* find running firmware header */ 840 hdr_offset = htobe32(*(volatile uint32_t *) 841 (sc->sram + MCP_HEADER_PTR_OFFSET)); 842 843 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 844 device_printf(sc->dev, 845 "Running firmware has bad header offset (%d)\n", 846 (int)hdr_offset); 847 return EIO; 848 } 849 850 /* copy header of running firmware from SRAM to host memory to 851 * validate firmware */ 852 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 853 if (hdr == NULL) { 854 device_printf(sc->dev, "could not malloc firmware hdr\n"); 855 return ENOMEM; 856 } 857 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 858 rman_get_bushandle(sc->mem_res), 859 hdr_offset, (char *)hdr, bytes); 860 status = mxge_validate_firmware(sc, hdr); 861 free(hdr, M_DEVBUF); 862 863 /* 864 * check to see if adopted firmware has bug where adopting 865 * it will cause broadcasts to be filtered unless the NIC 866 * is kept in ALLMULTI mode 867 */ 868 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 869 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 870 sc->adopted_rx_filter_bug = 1; 871 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 872 "working around rx filter bug\n", 873 sc->fw_ver_major, sc->fw_ver_minor, 874 sc->fw_ver_tiny); 875 } 876 877 return status; 878 } 879 880 881 static int 882 mxge_load_firmware(mxge_softc_t *sc) 883 { 884 volatile uint32_t *confirm; 885 volatile char *submit; 886 char buf_bytes[72]; 887 uint32_t *buf, size, dma_low, dma_high; 888 int status, i; 889 890 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 891 892 size = sc->sram_size; 893 status = mxge_load_firmware_helper(sc, &size); 894 if (status) { 895 /* Try to use the currently running firmware, if 896 it is new enough */ 897 status = mxge_adopt_running_firmware(sc); 898 if (status) { 899 device_printf(sc->dev, 900 "failed to adopt running firmware\n"); 901 return status; 902 } 903 device_printf(sc->dev, 904 "Successfully adopted running firmware\n"); 905 if (sc->tx.boundary == 4096) { 906 device_printf(sc->dev, 907 "Using firmware currently running on NIC" 908 ". For optimal\n"); 909 device_printf(sc->dev, 910 "performance consider loading optimized " 911 "firmware\n"); 912 } 913 sc->fw_name = mxge_fw_unaligned; 914 sc->tx.boundary = 2048; 915 return 0; 916 } 917 /* clear confirmation addr */ 918 confirm = (volatile uint32_t *)sc->cmd; 919 *confirm = 0; 920 mb(); 921 /* send a reload command to the bootstrap MCP, and wait for the 922 response in the confirmation address. The firmware should 923 write a -1 there to indicate it is alive and well 924 */ 925 926 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 927 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 928 929 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 930 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 931 buf[2] = htobe32(0xffffffff); /* confirm data */ 932 933 /* FIX: All newest firmware should un-protect the bottom of 934 the sram before handoff. However, the very first interfaces 935 do not. Therefore the handoff copy must skip the first 8 bytes 936 */ 937 /* where the code starts*/ 938 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 939 buf[4] = htobe32(size - 8); /* length of code */ 940 buf[5] = htobe32(8); /* where to copy to */ 941 buf[6] = htobe32(0); /* where to jump to */ 942 943 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 944 mxge_pio_copy(submit, buf, 64); 945 mb(); 946 DELAY(1000); 947 mb(); 948 i = 0; 949 while (*confirm != 0xffffffff && i < 20) { 950 DELAY(1000*10); 951 i++; 952 bus_dmamap_sync(sc->cmd_dma.dmat, 953 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 954 } 955 if (*confirm != 0xffffffff) { 956 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 957 confirm, *confirm); 958 959 return ENXIO; 960 } 961 return 0; 962 } 963 964 static int 965 mxge_update_mac_address(mxge_softc_t *sc) 966 { 967 mxge_cmd_t cmd; 968 uint8_t *addr = sc->mac_addr; 969 int status; 970 971 972 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 973 | (addr[2] << 8) | addr[3]); 974 975 cmd.data1 = ((addr[4] << 8) | (addr[5])); 976 977 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 978 return status; 979 } 980 981 static int 982 mxge_change_pause(mxge_softc_t *sc, int pause) 983 { 984 mxge_cmd_t cmd; 985 int status; 986 987 if (pause) 988 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 989 &cmd); 990 else 991 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 992 &cmd); 993 994 if (status) { 995 device_printf(sc->dev, "Failed to set flow control mode\n"); 996 return ENXIO; 997 } 998 sc->pause = pause; 999 return 0; 1000 } 1001 1002 static void 1003 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1004 { 1005 mxge_cmd_t cmd; 1006 int status; 1007 1008 if (promisc) 1009 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1010 &cmd); 1011 else 1012 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1013 &cmd); 1014 1015 if (status) { 1016 device_printf(sc->dev, "Failed to set promisc mode\n"); 1017 } 1018 } 1019 1020 static void 1021 mxge_set_multicast_list(mxge_softc_t *sc) 1022 { 1023 mxge_cmd_t cmd; 1024 struct ifmultiaddr *ifma; 1025 struct ifnet *ifp = sc->ifp; 1026 int err; 1027 1028 /* This firmware is known to not support multicast */ 1029 if (!sc->fw_multicast_support) 1030 return; 1031 1032 /* Disable multicast filtering while we play with the lists*/ 1033 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1034 if (err != 0) { 1035 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1036 " error status: %d\n", err); 1037 return; 1038 } 1039 1040 if (sc->adopted_rx_filter_bug) 1041 return; 1042 1043 if (ifp->if_flags & IFF_ALLMULTI) 1044 /* request to disable multicast filtering, so quit here */ 1045 return; 1046 1047 /* Flush all the filters */ 1048 1049 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1050 if (err != 0) { 1051 device_printf(sc->dev, 1052 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1053 ", error status: %d\n", err); 1054 return; 1055 } 1056 1057 /* Walk the multicast list, and add each address */ 1058 1059 IF_ADDR_LOCK(ifp); 1060 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1061 if (ifma->ifma_addr->sa_family != AF_LINK) 1062 continue; 1063 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1064 &cmd.data0, 4); 1065 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1066 &cmd.data1, 2); 1067 cmd.data0 = htonl(cmd.data0); 1068 cmd.data1 = htonl(cmd.data1); 1069 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1070 if (err != 0) { 1071 device_printf(sc->dev, "Failed " 1072 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1073 "%d\t", err); 1074 /* abort, leaving multicast filtering off */ 1075 IF_ADDR_UNLOCK(ifp); 1076 return; 1077 } 1078 } 1079 IF_ADDR_UNLOCK(ifp); 1080 /* Enable multicast filtering */ 1081 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1082 if (err != 0) { 1083 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1084 ", error status: %d\n", err); 1085 } 1086 } 1087 1088 static int 1089 mxge_max_mtu(mxge_softc_t *sc) 1090 { 1091 mxge_cmd_t cmd; 1092 int status; 1093 1094 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1095 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1096 1097 /* try to set nbufs to see if it we can 1098 use virtually contiguous jumbos */ 1099 cmd.data0 = 0; 1100 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1101 &cmd); 1102 if (status == 0) 1103 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1104 1105 /* otherwise, we're limited to MJUMPAGESIZE */ 1106 return MJUMPAGESIZE - MXGEFW_PAD; 1107 } 1108 1109 static int 1110 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1111 { 1112 1113 mxge_cmd_t cmd; 1114 size_t bytes; 1115 int status; 1116 1117 /* try to send a reset command to the card to see if it 1118 is alive */ 1119 memset(&cmd, 0, sizeof (cmd)); 1120 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1121 if (status != 0) { 1122 device_printf(sc->dev, "failed reset\n"); 1123 return ENXIO; 1124 } 1125 1126 mxge_dummy_rdma(sc, 1); 1127 1128 if (interrupts_setup) { 1129 /* Now exchange information about interrupts */ 1130 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 1131 memset(sc->rx_done.entry, 0, bytes); 1132 cmd.data0 = (uint32_t)bytes; 1133 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1134 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1135 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1136 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1137 } 1138 1139 status |= mxge_send_cmd(sc, 1140 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1141 1142 1143 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1144 1145 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1146 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1147 1148 1149 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1150 &cmd); 1151 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1152 if (status != 0) { 1153 device_printf(sc->dev, "failed set interrupt parameters\n"); 1154 return status; 1155 } 1156 1157 1158 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1159 1160 1161 /* run a DMA benchmark */ 1162 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1163 1164 /* reset mcp/driver shared state back to 0 */ 1165 sc->rx_done.idx = 0; 1166 sc->rx_done.cnt = 0; 1167 sc->tx.req = 0; 1168 sc->tx.done = 0; 1169 sc->tx.pkt_done = 0; 1170 sc->tx.wake = 0; 1171 sc->tx_defrag = 0; 1172 sc->tx.stall = 0; 1173 sc->rx_big.cnt = 0; 1174 sc->rx_small.cnt = 0; 1175 sc->rdma_tags_available = 15; 1176 sc->fw_stats->valid = 0; 1177 sc->fw_stats->send_done_count = 0; 1178 sc->lro_bad_csum = 0; 1179 sc->lro_queued = 0; 1180 sc->lro_flushed = 0; 1181 status = mxge_update_mac_address(sc); 1182 mxge_change_promisc(sc, 0); 1183 mxge_change_pause(sc, sc->pause); 1184 mxge_set_multicast_list(sc); 1185 return status; 1186 } 1187 1188 static int 1189 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1190 { 1191 mxge_softc_t *sc; 1192 unsigned int intr_coal_delay; 1193 int err; 1194 1195 sc = arg1; 1196 intr_coal_delay = sc->intr_coal_delay; 1197 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1198 if (err != 0) { 1199 return err; 1200 } 1201 if (intr_coal_delay == sc->intr_coal_delay) 1202 return 0; 1203 1204 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1205 return EINVAL; 1206 1207 mtx_lock(&sc->driver_mtx); 1208 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1209 sc->intr_coal_delay = intr_coal_delay; 1210 1211 mtx_unlock(&sc->driver_mtx); 1212 return err; 1213 } 1214 1215 static int 1216 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1217 { 1218 mxge_softc_t *sc; 1219 unsigned int enabled; 1220 int err; 1221 1222 sc = arg1; 1223 enabled = sc->pause; 1224 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1225 if (err != 0) { 1226 return err; 1227 } 1228 if (enabled == sc->pause) 1229 return 0; 1230 1231 mtx_lock(&sc->driver_mtx); 1232 err = mxge_change_pause(sc, enabled); 1233 mtx_unlock(&sc->driver_mtx); 1234 return err; 1235 } 1236 1237 static int 1238 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1239 { 1240 int err; 1241 1242 if (arg1 == NULL) 1243 return EFAULT; 1244 arg2 = be32toh(*(int *)arg1); 1245 arg1 = NULL; 1246 err = sysctl_handle_int(oidp, arg1, arg2, req); 1247 1248 return err; 1249 } 1250 1251 static void 1252 mxge_add_sysctls(mxge_softc_t *sc) 1253 { 1254 struct sysctl_ctx_list *ctx; 1255 struct sysctl_oid_list *children; 1256 mcp_irq_data_t *fw; 1257 1258 ctx = device_get_sysctl_ctx(sc->dev); 1259 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1260 fw = sc->fw_stats; 1261 1262 /* random information */ 1263 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1264 "firmware_version", 1265 CTLFLAG_RD, &sc->fw_version, 1266 0, "firmware version"); 1267 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1268 "serial_number", 1269 CTLFLAG_RD, &sc->serial_number_string, 1270 0, "serial number"); 1271 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1272 "product_code", 1273 CTLFLAG_RD, &sc->product_code_string, 1274 0, "product_code"); 1275 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1276 "pcie_link_width", 1277 CTLFLAG_RD, &sc->link_width, 1278 0, "tx_boundary"); 1279 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1280 "tx_boundary", 1281 CTLFLAG_RD, &sc->tx.boundary, 1282 0, "tx_boundary"); 1283 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1284 "write_combine", 1285 CTLFLAG_RD, &sc->wc, 1286 0, "write combining PIO?"); 1287 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1288 "read_dma_MBs", 1289 CTLFLAG_RD, &sc->read_dma, 1290 0, "DMA Read speed in MB/s"); 1291 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1292 "write_dma_MBs", 1293 CTLFLAG_RD, &sc->write_dma, 1294 0, "DMA Write speed in MB/s"); 1295 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1296 "read_write_dma_MBs", 1297 CTLFLAG_RD, &sc->read_write_dma, 1298 0, "DMA concurrent Read/Write speed in MB/s"); 1299 1300 1301 /* performance related tunables */ 1302 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1303 "intr_coal_delay", 1304 CTLTYPE_INT|CTLFLAG_RW, sc, 1305 0, mxge_change_intr_coal, 1306 "I", "interrupt coalescing delay in usecs"); 1307 1308 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1309 "flow_control_enabled", 1310 CTLTYPE_INT|CTLFLAG_RW, sc, 1311 0, mxge_change_flow_control, 1312 "I", "interrupt coalescing delay in usecs"); 1313 1314 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1315 "deassert_wait", 1316 CTLFLAG_RW, &mxge_deassert_wait, 1317 0, "Wait for IRQ line to go low in ihandler"); 1318 1319 /* stats block from firmware is in network byte order. 1320 Need to swap it */ 1321 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1322 "link_up", 1323 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1324 0, mxge_handle_be32, 1325 "I", "link up"); 1326 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1327 "rdma_tags_available", 1328 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1329 0, mxge_handle_be32, 1330 "I", "rdma_tags_available"); 1331 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1332 "dropped_bad_crc32", 1333 CTLTYPE_INT|CTLFLAG_RD, 1334 &fw->dropped_bad_crc32, 1335 0, mxge_handle_be32, 1336 "I", "dropped_bad_crc32"); 1337 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1338 "dropped_bad_phy", 1339 CTLTYPE_INT|CTLFLAG_RD, 1340 &fw->dropped_bad_phy, 1341 0, mxge_handle_be32, 1342 "I", "dropped_bad_phy"); 1343 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1344 "dropped_link_error_or_filtered", 1345 CTLTYPE_INT|CTLFLAG_RD, 1346 &fw->dropped_link_error_or_filtered, 1347 0, mxge_handle_be32, 1348 "I", "dropped_link_error_or_filtered"); 1349 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1350 "dropped_link_overflow", 1351 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1352 0, mxge_handle_be32, 1353 "I", "dropped_link_overflow"); 1354 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1355 "dropped_multicast_filtered", 1356 CTLTYPE_INT|CTLFLAG_RD, 1357 &fw->dropped_multicast_filtered, 1358 0, mxge_handle_be32, 1359 "I", "dropped_multicast_filtered"); 1360 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1361 "dropped_no_big_buffer", 1362 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1363 0, mxge_handle_be32, 1364 "I", "dropped_no_big_buffer"); 1365 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1366 "dropped_no_small_buffer", 1367 CTLTYPE_INT|CTLFLAG_RD, 1368 &fw->dropped_no_small_buffer, 1369 0, mxge_handle_be32, 1370 "I", "dropped_no_small_buffer"); 1371 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1372 "dropped_overrun", 1373 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1374 0, mxge_handle_be32, 1375 "I", "dropped_overrun"); 1376 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1377 "dropped_pause", 1378 CTLTYPE_INT|CTLFLAG_RD, 1379 &fw->dropped_pause, 1380 0, mxge_handle_be32, 1381 "I", "dropped_pause"); 1382 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1383 "dropped_runt", 1384 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1385 0, mxge_handle_be32, 1386 "I", "dropped_runt"); 1387 1388 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1389 "dropped_unicast_filtered", 1390 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1391 0, mxge_handle_be32, 1392 "I", "dropped_unicast_filtered"); 1393 1394 /* host counters exported for debugging */ 1395 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1396 "rx_small_cnt", 1397 CTLFLAG_RD, &sc->rx_small.cnt, 1398 0, "rx_small_cnt"); 1399 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1400 "rx_big_cnt", 1401 CTLFLAG_RD, &sc->rx_big.cnt, 1402 0, "rx_small_cnt"); 1403 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1404 "tx_req", 1405 CTLFLAG_RD, &sc->tx.req, 1406 0, "tx_req"); 1407 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1408 "tx_done", 1409 CTLFLAG_RD, &sc->tx.done, 1410 0, "tx_done"); 1411 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1412 "tx_pkt_done", 1413 CTLFLAG_RD, &sc->tx.pkt_done, 1414 0, "tx_done"); 1415 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1416 "tx_stall", 1417 CTLFLAG_RD, &sc->tx.stall, 1418 0, "tx_stall"); 1419 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1420 "tx_wake", 1421 CTLFLAG_RD, &sc->tx.wake, 1422 0, "tx_wake"); 1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1424 "tx_defrag", 1425 CTLFLAG_RD, &sc->tx_defrag, 1426 0, "tx_defrag"); 1427 1428 /* verbose printing? */ 1429 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1430 "verbose", 1431 CTLFLAG_RW, &mxge_verbose, 1432 0, "verbose printing"); 1433 1434 /* lro */ 1435 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1436 "lro_cnt", CTLFLAG_RD, &sc->lro_cnt, 1437 0, "number of lro merge queues"); 1438 1439 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1440 "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, 1441 0, "number of lro merge queues flushed"); 1442 1443 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1444 "lro_queued", CTLFLAG_RD, &sc->lro_queued, 1445 0, "number of frames appended to lro merge queues"); 1446 1447 } 1448 1449 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1450 backwards one at a time and handle ring wraps */ 1451 1452 static inline void 1453 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1454 mcp_kreq_ether_send_t *src, int cnt) 1455 { 1456 int idx, starting_slot; 1457 starting_slot = tx->req; 1458 while (cnt > 1) { 1459 cnt--; 1460 idx = (starting_slot + cnt) & tx->mask; 1461 mxge_pio_copy(&tx->lanai[idx], 1462 &src[cnt], sizeof(*src)); 1463 mb(); 1464 } 1465 } 1466 1467 /* 1468 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1469 * at most 32 bytes at a time, so as to avoid involving the software 1470 * pio handler in the nic. We re-write the first segment's flags 1471 * to mark them valid only after writing the entire chain 1472 */ 1473 1474 static inline void 1475 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1476 int cnt) 1477 { 1478 int idx, i; 1479 uint32_t *src_ints; 1480 volatile uint32_t *dst_ints; 1481 mcp_kreq_ether_send_t *srcp; 1482 volatile mcp_kreq_ether_send_t *dstp, *dst; 1483 uint8_t last_flags; 1484 1485 idx = tx->req & tx->mask; 1486 1487 last_flags = src->flags; 1488 src->flags = 0; 1489 mb(); 1490 dst = dstp = &tx->lanai[idx]; 1491 srcp = src; 1492 1493 if ((idx + cnt) < tx->mask) { 1494 for (i = 0; i < (cnt - 1); i += 2) { 1495 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1496 mb(); /* force write every 32 bytes */ 1497 srcp += 2; 1498 dstp += 2; 1499 } 1500 } else { 1501 /* submit all but the first request, and ensure 1502 that it is submitted below */ 1503 mxge_submit_req_backwards(tx, src, cnt); 1504 i = 0; 1505 } 1506 if (i < cnt) { 1507 /* submit the first request */ 1508 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1509 mb(); /* barrier before setting valid flag */ 1510 } 1511 1512 /* re-write the last 32-bits with the valid flags */ 1513 src->flags = last_flags; 1514 src_ints = (uint32_t *)src; 1515 src_ints+=3; 1516 dst_ints = (volatile uint32_t *)dst; 1517 dst_ints+=3; 1518 *dst_ints = *src_ints; 1519 tx->req += cnt; 1520 mb(); 1521 } 1522 1523 static void 1524 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, 1525 int ip_off) 1526 { 1527 mxge_tx_buf_t *tx; 1528 mcp_kreq_ether_send_t *req; 1529 bus_dma_segment_t *seg; 1530 struct ip *ip; 1531 struct tcphdr *tcp; 1532 uint32_t low, high_swapped; 1533 int len, seglen, cum_len, cum_len_next; 1534 int next_is_first, chop, cnt, rdma_count, small; 1535 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1536 uint8_t flags, flags_next; 1537 static int once; 1538 1539 mss = m->m_pkthdr.tso_segsz; 1540 1541 /* negative cum_len signifies to the 1542 * send loop that we are still in the 1543 * header portion of the TSO packet. 1544 */ 1545 1546 /* ensure we have the ethernet, IP and TCP 1547 header together in the first mbuf, copy 1548 it to a scratch buffer if not */ 1549 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1550 m_copydata(m, 0, ip_off + sizeof (*ip), 1551 sc->scratch); 1552 ip = (struct ip *)(sc->scratch + ip_off); 1553 } else { 1554 ip = (struct ip *)(mtod(m, char *) + ip_off); 1555 } 1556 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1557 + sizeof (*tcp))) { 1558 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1559 + sizeof (*tcp), sc->scratch); 1560 ip = (struct ip *)(mtod(m, char *) + ip_off); 1561 } 1562 1563 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1564 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1565 1566 /* TSO implies checksum offload on this hardware */ 1567 cksum_offset = ip_off + (ip->ip_hl << 2); 1568 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1569 1570 1571 /* for TSO, pseudo_hdr_offset holds mss. 1572 * The firmware figures out where to put 1573 * the checksum by parsing the header. */ 1574 pseudo_hdr_offset = htobe16(mss); 1575 1576 tx = &sc->tx; 1577 req = tx->req_list; 1578 seg = tx->seg_list; 1579 cnt = 0; 1580 rdma_count = 0; 1581 /* "rdma_count" is the number of RDMAs belonging to the 1582 * current packet BEFORE the current send request. For 1583 * non-TSO packets, this is equal to "count". 1584 * For TSO packets, rdma_count needs to be reset 1585 * to 0 after a segment cut. 1586 * 1587 * The rdma_count field of the send request is 1588 * the number of RDMAs of the packet starting at 1589 * that request. For TSO send requests with one ore more cuts 1590 * in the middle, this is the number of RDMAs starting 1591 * after the last cut in the request. All previous 1592 * segments before the last cut implicitly have 1 RDMA. 1593 * 1594 * Since the number of RDMAs is not known beforehand, 1595 * it must be filled-in retroactively - after each 1596 * segmentation cut or at the end of the entire packet. 1597 */ 1598 1599 while (busdma_seg_cnt) { 1600 /* Break the busdma segment up into pieces*/ 1601 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1602 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1603 len = seg->ds_len; 1604 1605 while (len) { 1606 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1607 seglen = len; 1608 cum_len_next = cum_len + seglen; 1609 (req-rdma_count)->rdma_count = rdma_count + 1; 1610 if (__predict_true(cum_len >= 0)) { 1611 /* payload */ 1612 chop = (cum_len_next > mss); 1613 cum_len_next = cum_len_next % mss; 1614 next_is_first = (cum_len_next == 0); 1615 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1616 flags_next |= next_is_first * 1617 MXGEFW_FLAGS_FIRST; 1618 rdma_count |= -(chop | next_is_first); 1619 rdma_count += chop & !next_is_first; 1620 } else if (cum_len_next >= 0) { 1621 /* header ends */ 1622 rdma_count = -1; 1623 cum_len_next = 0; 1624 seglen = -cum_len; 1625 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1626 flags_next = MXGEFW_FLAGS_TSO_PLD | 1627 MXGEFW_FLAGS_FIRST | 1628 (small * MXGEFW_FLAGS_SMALL); 1629 } 1630 1631 req->addr_high = high_swapped; 1632 req->addr_low = htobe32(low); 1633 req->pseudo_hdr_offset = pseudo_hdr_offset; 1634 req->pad = 0; 1635 req->rdma_count = 1; 1636 req->length = htobe16(seglen); 1637 req->cksum_offset = cksum_offset; 1638 req->flags = flags | ((cum_len & 1) * 1639 MXGEFW_FLAGS_ALIGN_ODD); 1640 low += seglen; 1641 len -= seglen; 1642 cum_len = cum_len_next; 1643 flags = flags_next; 1644 req++; 1645 cnt++; 1646 rdma_count++; 1647 if (__predict_false(cksum_offset > seglen)) 1648 cksum_offset -= seglen; 1649 else 1650 cksum_offset = 0; 1651 if (__predict_false(cnt > tx->max_desc)) 1652 goto drop; 1653 } 1654 busdma_seg_cnt--; 1655 seg++; 1656 } 1657 (req-rdma_count)->rdma_count = rdma_count; 1658 1659 do { 1660 req--; 1661 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1662 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1663 1664 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1665 mxge_submit_req(tx, tx->req_list, cnt); 1666 return; 1667 1668 drop: 1669 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1670 m_freem(m); 1671 sc->ifp->if_oerrors++; 1672 if (!once) { 1673 printf("tx->max_desc exceeded via TSO!\n"); 1674 printf("mss = %d, %ld, %d!\n", mss, 1675 (long)seg - (long)tx->seg_list, tx->max_desc); 1676 once = 1; 1677 } 1678 return; 1679 1680 } 1681 1682 /* 1683 * We reproduce the software vlan tag insertion from 1684 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1685 * vlan tag insertion. We need to advertise this in order to have the 1686 * vlan interface respect our csum offload flags. 1687 */ 1688 static struct mbuf * 1689 mxge_vlan_tag_insert(struct mbuf *m) 1690 { 1691 struct ether_vlan_header *evl; 1692 1693 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1694 if (__predict_false(m == NULL)) 1695 return NULL; 1696 if (m->m_len < sizeof(*evl)) { 1697 m = m_pullup(m, sizeof(*evl)); 1698 if (__predict_false(m == NULL)) 1699 return NULL; 1700 } 1701 /* 1702 * Transform the Ethernet header into an Ethernet header 1703 * with 802.1Q encapsulation. 1704 */ 1705 evl = mtod(m, struct ether_vlan_header *); 1706 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1707 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1708 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1709 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1710 m->m_flags &= ~M_VLANTAG; 1711 return m; 1712 } 1713 1714 static void 1715 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1716 { 1717 mcp_kreq_ether_send_t *req; 1718 bus_dma_segment_t *seg; 1719 struct mbuf *m_tmp; 1720 struct ifnet *ifp; 1721 mxge_tx_buf_t *tx; 1722 struct ip *ip; 1723 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1724 uint16_t pseudo_hdr_offset; 1725 uint8_t flags, cksum_offset; 1726 1727 1728 1729 ifp = sc->ifp; 1730 tx = &sc->tx; 1731 1732 ip_off = sizeof (struct ether_header); 1733 if (m->m_flags & M_VLANTAG) { 1734 m = mxge_vlan_tag_insert(m); 1735 if (__predict_false(m == NULL)) 1736 goto drop; 1737 ip_off += ETHER_VLAN_ENCAP_LEN; 1738 } 1739 1740 /* (try to) map the frame for DMA */ 1741 idx = tx->req & tx->mask; 1742 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1743 m, tx->seg_list, &cnt, 1744 BUS_DMA_NOWAIT); 1745 if (__predict_false(err == EFBIG)) { 1746 /* Too many segments in the chain. Try 1747 to defrag */ 1748 m_tmp = m_defrag(m, M_NOWAIT); 1749 if (m_tmp == NULL) { 1750 goto drop; 1751 } 1752 sc->tx_defrag++; 1753 m = m_tmp; 1754 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1755 tx->info[idx].map, 1756 m, tx->seg_list, &cnt, 1757 BUS_DMA_NOWAIT); 1758 } 1759 if (__predict_false(err != 0)) { 1760 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1761 " packet len = %d\n", err, m->m_pkthdr.len); 1762 goto drop; 1763 } 1764 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1765 BUS_DMASYNC_PREWRITE); 1766 tx->info[idx].m = m; 1767 1768 1769 /* TSO is different enough, we handle it in another routine */ 1770 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1771 mxge_encap_tso(sc, m, cnt, ip_off); 1772 return; 1773 } 1774 1775 req = tx->req_list; 1776 cksum_offset = 0; 1777 pseudo_hdr_offset = 0; 1778 flags = MXGEFW_FLAGS_NO_TSO; 1779 1780 /* checksum offloading? */ 1781 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1782 /* ensure ip header is in first mbuf, copy 1783 it to a scratch buffer if not */ 1784 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1785 m_copydata(m, 0, ip_off + sizeof (*ip), 1786 sc->scratch); 1787 ip = (struct ip *)(sc->scratch + ip_off); 1788 } else { 1789 ip = (struct ip *)(mtod(m, char *) + ip_off); 1790 } 1791 cksum_offset = ip_off + (ip->ip_hl << 2); 1792 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1793 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1794 req->cksum_offset = cksum_offset; 1795 flags |= MXGEFW_FLAGS_CKSUM; 1796 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1797 } else { 1798 odd_flag = 0; 1799 } 1800 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1801 flags |= MXGEFW_FLAGS_SMALL; 1802 1803 /* convert segments into a request list */ 1804 cum_len = 0; 1805 seg = tx->seg_list; 1806 req->flags = MXGEFW_FLAGS_FIRST; 1807 for (i = 0; i < cnt; i++) { 1808 req->addr_low = 1809 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1810 req->addr_high = 1811 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1812 req->length = htobe16(seg->ds_len); 1813 req->cksum_offset = cksum_offset; 1814 if (cksum_offset > seg->ds_len) 1815 cksum_offset -= seg->ds_len; 1816 else 1817 cksum_offset = 0; 1818 req->pseudo_hdr_offset = pseudo_hdr_offset; 1819 req->pad = 0; /* complete solid 16-byte block */ 1820 req->rdma_count = 1; 1821 req->flags |= flags | ((cum_len & 1) * odd_flag); 1822 cum_len += seg->ds_len; 1823 seg++; 1824 req++; 1825 req->flags = 0; 1826 } 1827 req--; 1828 /* pad runts to 60 bytes */ 1829 if (cum_len < 60) { 1830 req++; 1831 req->addr_low = 1832 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1833 req->addr_high = 1834 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1835 req->length = htobe16(60 - cum_len); 1836 req->cksum_offset = 0; 1837 req->pseudo_hdr_offset = pseudo_hdr_offset; 1838 req->pad = 0; /* complete solid 16-byte block */ 1839 req->rdma_count = 1; 1840 req->flags |= flags | ((cum_len & 1) * odd_flag); 1841 cnt++; 1842 } 1843 1844 tx->req_list[0].rdma_count = cnt; 1845 #if 0 1846 /* print what the firmware will see */ 1847 for (i = 0; i < cnt; i++) { 1848 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1849 "cso:%d, flags:0x%x, rdma:%d\n", 1850 i, (int)ntohl(tx->req_list[i].addr_high), 1851 (int)ntohl(tx->req_list[i].addr_low), 1852 (int)ntohs(tx->req_list[i].length), 1853 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1854 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1855 tx->req_list[i].rdma_count); 1856 } 1857 printf("--------------\n"); 1858 #endif 1859 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1860 mxge_submit_req(tx, tx->req_list, cnt); 1861 return; 1862 1863 drop: 1864 m_freem(m); 1865 ifp->if_oerrors++; 1866 return; 1867 } 1868 1869 1870 1871 1872 static inline void 1873 mxge_start_locked(mxge_softc_t *sc) 1874 { 1875 struct mbuf *m; 1876 struct ifnet *ifp; 1877 mxge_tx_buf_t *tx; 1878 1879 ifp = sc->ifp; 1880 tx = &sc->tx; 1881 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 1882 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1883 if (m == NULL) { 1884 return; 1885 } 1886 /* let BPF see it */ 1887 BPF_MTAP(ifp, m); 1888 1889 /* give it to the nic */ 1890 mxge_encap(sc, m); 1891 } 1892 /* ran out of transmit slots */ 1893 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1894 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1895 tx->stall++; 1896 } 1897 } 1898 1899 static void 1900 mxge_start(struct ifnet *ifp) 1901 { 1902 mxge_softc_t *sc = ifp->if_softc; 1903 1904 1905 mtx_lock(&sc->tx_mtx); 1906 mxge_start_locked(sc); 1907 mtx_unlock(&sc->tx_mtx); 1908 } 1909 1910 /* 1911 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1912 * at most 32 bytes at a time, so as to avoid involving the software 1913 * pio handler in the nic. We re-write the first segment's low 1914 * DMA address to mark it valid only after we write the entire chunk 1915 * in a burst 1916 */ 1917 static inline void 1918 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1919 mcp_kreq_ether_recv_t *src) 1920 { 1921 uint32_t low; 1922 1923 low = src->addr_low; 1924 src->addr_low = 0xffffffff; 1925 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1926 mb(); 1927 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1928 mb(); 1929 src->addr_low = low; 1930 dst->addr_low = low; 1931 mb(); 1932 } 1933 1934 static int 1935 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1936 { 1937 bus_dma_segment_t seg; 1938 struct mbuf *m; 1939 mxge_rx_buf_t *rx = &sc->rx_small; 1940 int cnt, err; 1941 1942 m = m_gethdr(M_DONTWAIT, MT_DATA); 1943 if (m == NULL) { 1944 rx->alloc_fail++; 1945 err = ENOBUFS; 1946 goto done; 1947 } 1948 m->m_len = MHLEN; 1949 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1950 &seg, &cnt, BUS_DMA_NOWAIT); 1951 if (err != 0) { 1952 m_free(m); 1953 goto done; 1954 } 1955 rx->info[idx].m = m; 1956 rx->shadow[idx].addr_low = 1957 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1958 rx->shadow[idx].addr_high = 1959 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1960 1961 done: 1962 if ((idx & 7) == 7) 1963 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 1964 return err; 1965 } 1966 1967 static int 1968 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1969 { 1970 bus_dma_segment_t seg[3]; 1971 struct mbuf *m; 1972 mxge_rx_buf_t *rx = &sc->rx_big; 1973 int cnt, err, i; 1974 1975 if (rx->cl_size == MCLBYTES) 1976 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 1977 else 1978 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 1979 if (m == NULL) { 1980 rx->alloc_fail++; 1981 err = ENOBUFS; 1982 goto done; 1983 } 1984 m->m_len = rx->cl_size; 1985 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1986 seg, &cnt, BUS_DMA_NOWAIT); 1987 if (err != 0) { 1988 m_free(m); 1989 goto done; 1990 } 1991 rx->info[idx].m = m; 1992 1993 for (i = 0; i < cnt; i++) { 1994 rx->shadow[idx + i].addr_low = 1995 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 1996 rx->shadow[idx + i].addr_high = 1997 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 1998 } 1999 2000 2001 done: 2002 for (i = 0; i < rx->nbufs; i++) { 2003 if ((idx & 7) == 7) { 2004 mxge_submit_8rx(&rx->lanai[idx - 7], 2005 &rx->shadow[idx - 7]); 2006 } 2007 idx++; 2008 } 2009 return err; 2010 } 2011 2012 /* 2013 * Myri10GE hardware checksums are not valid if the sender 2014 * padded the frame with non-zero padding. This is because 2015 * the firmware just does a simple 16-bit 1s complement 2016 * checksum across the entire frame, excluding the first 14 2017 * bytes. It is best to simply to check the checksum and 2018 * tell the stack about it only if the checksum is good 2019 */ 2020 2021 static inline uint16_t 2022 mxge_rx_csum(struct mbuf *m, int csum) 2023 { 2024 struct ether_header *eh; 2025 struct ip *ip; 2026 uint16_t c; 2027 2028 eh = mtod(m, struct ether_header *); 2029 2030 /* only deal with IPv4 TCP & UDP for now */ 2031 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2032 return 1; 2033 ip = (struct ip *)(eh + 1); 2034 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2035 ip->ip_p != IPPROTO_UDP)) 2036 return 1; 2037 2038 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2039 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2040 - (ip->ip_hl << 2) + ip->ip_p)); 2041 c ^= 0xffff; 2042 return (c); 2043 } 2044 2045 static void 2046 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2047 { 2048 struct ether_vlan_header *evl; 2049 struct ether_header *eh; 2050 uint32_t partial; 2051 2052 evl = mtod(m, struct ether_vlan_header *); 2053 eh = mtod(m, struct ether_header *); 2054 2055 /* 2056 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2057 * after what the firmware thought was the end of the ethernet 2058 * header. 2059 */ 2060 2061 /* put checksum into host byte order */ 2062 *csum = ntohs(*csum); 2063 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2064 (*csum) += ~partial; 2065 (*csum) += ((*csum) < ~partial); 2066 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2067 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2068 2069 /* restore checksum to network byte order; 2070 later consumers expect this */ 2071 *csum = htons(*csum); 2072 2073 /* save the tag */ 2074 m->m_flags |= M_VLANTAG; 2075 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2076 2077 /* 2078 * Remove the 802.1q header by copying the Ethernet 2079 * addresses over it and adjusting the beginning of 2080 * the data in the mbuf. The encapsulated Ethernet 2081 * type field is already in place. 2082 */ 2083 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2084 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2085 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2086 } 2087 2088 2089 static inline void 2090 mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2091 { 2092 struct ifnet *ifp; 2093 struct mbuf *m; 2094 struct ether_header *eh; 2095 mxge_rx_buf_t *rx; 2096 bus_dmamap_t old_map; 2097 int idx; 2098 uint16_t tcpudp_csum; 2099 2100 ifp = sc->ifp; 2101 rx = &sc->rx_big; 2102 idx = rx->cnt & rx->mask; 2103 rx->cnt += rx->nbufs; 2104 /* save a pointer to the received mbuf */ 2105 m = rx->info[idx].m; 2106 /* try to replace the received mbuf */ 2107 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 2108 /* drop the frame -- the old mbuf is re-cycled */ 2109 ifp->if_ierrors++; 2110 return; 2111 } 2112 2113 /* unmap the received buffer */ 2114 old_map = rx->info[idx].map; 2115 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2116 bus_dmamap_unload(rx->dmat, old_map); 2117 2118 /* swap the bus_dmamap_t's */ 2119 rx->info[idx].map = rx->extra_map; 2120 rx->extra_map = old_map; 2121 2122 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2123 * aligned */ 2124 m->m_data += MXGEFW_PAD; 2125 2126 m->m_pkthdr.rcvif = ifp; 2127 m->m_len = m->m_pkthdr.len = len; 2128 ifp->if_ipackets++; 2129 eh = mtod(m, struct ether_header *); 2130 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2131 mxge_vlan_tag_remove(m, &csum); 2132 } 2133 /* if the checksum is valid, mark it in the mbuf header */ 2134 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2135 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2136 return; 2137 /* otherwise, it was a UDP frame, or a TCP frame which 2138 we could not do LRO on. Tell the stack that the 2139 checksum is good */ 2140 m->m_pkthdr.csum_data = 0xffff; 2141 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2142 } 2143 /* pass the frame up the stack */ 2144 (*ifp->if_input)(ifp, m); 2145 } 2146 2147 static inline void 2148 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2149 { 2150 struct ifnet *ifp; 2151 struct ether_header *eh; 2152 struct mbuf *m; 2153 mxge_rx_buf_t *rx; 2154 bus_dmamap_t old_map; 2155 int idx; 2156 uint16_t tcpudp_csum; 2157 2158 ifp = sc->ifp; 2159 rx = &sc->rx_small; 2160 idx = rx->cnt & rx->mask; 2161 rx->cnt++; 2162 /* save a pointer to the received mbuf */ 2163 m = rx->info[idx].m; 2164 /* try to replace the received mbuf */ 2165 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2166 /* drop the frame -- the old mbuf is re-cycled */ 2167 ifp->if_ierrors++; 2168 return; 2169 } 2170 2171 /* unmap the received buffer */ 2172 old_map = rx->info[idx].map; 2173 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2174 bus_dmamap_unload(rx->dmat, old_map); 2175 2176 /* swap the bus_dmamap_t's */ 2177 rx->info[idx].map = rx->extra_map; 2178 rx->extra_map = old_map; 2179 2180 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2181 * aligned */ 2182 m->m_data += MXGEFW_PAD; 2183 2184 m->m_pkthdr.rcvif = ifp; 2185 m->m_len = m->m_pkthdr.len = len; 2186 ifp->if_ipackets++; 2187 eh = mtod(m, struct ether_header *); 2188 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2189 mxge_vlan_tag_remove(m, &csum); 2190 } 2191 /* if the checksum is valid, mark it in the mbuf header */ 2192 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2193 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2194 return; 2195 /* otherwise, it was a UDP frame, or a TCP frame which 2196 we could not do LRO on. Tell the stack that the 2197 checksum is good */ 2198 m->m_pkthdr.csum_data = 0xffff; 2199 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2200 } 2201 2202 /* pass the frame up the stack */ 2203 (*ifp->if_input)(ifp, m); 2204 } 2205 2206 static inline void 2207 mxge_clean_rx_done(mxge_softc_t *sc) 2208 { 2209 mxge_rx_done_t *rx_done = &sc->rx_done; 2210 struct lro_entry *lro; 2211 int limit = 0; 2212 uint16_t length; 2213 uint16_t checksum; 2214 2215 2216 while (rx_done->entry[rx_done->idx].length != 0) { 2217 length = ntohs(rx_done->entry[rx_done->idx].length); 2218 rx_done->entry[rx_done->idx].length = 0; 2219 checksum = rx_done->entry[rx_done->idx].checksum; 2220 if (length <= (MHLEN - MXGEFW_PAD)) 2221 mxge_rx_done_small(sc, length, checksum); 2222 else 2223 mxge_rx_done_big(sc, length, checksum); 2224 rx_done->cnt++; 2225 rx_done->idx = rx_done->cnt & rx_done->mask; 2226 2227 /* limit potential for livelock */ 2228 if (__predict_false(++limit > 2 * rx_done->mask)) 2229 break; 2230 } 2231 while(!SLIST_EMPTY(&sc->lro_active)) { 2232 lro = SLIST_FIRST(&sc->lro_active); 2233 SLIST_REMOVE_HEAD(&sc->lro_active, next); 2234 mxge_lro_flush(sc, lro); 2235 } 2236 } 2237 2238 2239 static inline void 2240 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2241 { 2242 struct ifnet *ifp; 2243 mxge_tx_buf_t *tx; 2244 struct mbuf *m; 2245 bus_dmamap_t map; 2246 int idx, limit; 2247 2248 limit = 0; 2249 tx = &sc->tx; 2250 ifp = sc->ifp; 2251 while (tx->pkt_done != mcp_idx) { 2252 idx = tx->done & tx->mask; 2253 tx->done++; 2254 m = tx->info[idx].m; 2255 /* mbuf and DMA map only attached to the first 2256 segment per-mbuf */ 2257 if (m != NULL) { 2258 ifp->if_opackets++; 2259 tx->info[idx].m = NULL; 2260 map = tx->info[idx].map; 2261 bus_dmamap_unload(tx->dmat, map); 2262 m_freem(m); 2263 } 2264 if (tx->info[idx].flag) { 2265 tx->info[idx].flag = 0; 2266 tx->pkt_done++; 2267 } 2268 /* limit potential for livelock by only handling 2269 2 full tx rings per call */ 2270 if (__predict_false(++limit > 2 * tx->mask)) 2271 break; 2272 } 2273 2274 /* If we have space, clear IFF_OACTIVE to tell the stack that 2275 its OK to send packets */ 2276 2277 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2278 tx->req - tx->done < (tx->mask + 1)/4) { 2279 mtx_lock(&sc->tx_mtx); 2280 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2281 sc->tx.wake++; 2282 mxge_start_locked(sc); 2283 mtx_unlock(&sc->tx_mtx); 2284 } 2285 } 2286 2287 static void 2288 mxge_intr(void *arg) 2289 { 2290 mxge_softc_t *sc = arg; 2291 mcp_irq_data_t *stats = sc->fw_stats; 2292 mxge_tx_buf_t *tx = &sc->tx; 2293 mxge_rx_done_t *rx_done = &sc->rx_done; 2294 uint32_t send_done_count; 2295 uint8_t valid; 2296 2297 2298 /* make sure the DMA has finished */ 2299 if (!stats->valid) { 2300 return; 2301 } 2302 valid = stats->valid; 2303 2304 if (!sc->msi_enabled) { 2305 /* lower legacy IRQ */ 2306 *sc->irq_deassert = 0; 2307 if (!mxge_deassert_wait) 2308 /* don't wait for conf. that irq is low */ 2309 stats->valid = 0; 2310 } else { 2311 stats->valid = 0; 2312 } 2313 2314 /* loop while waiting for legacy irq deassertion */ 2315 do { 2316 /* check for transmit completes and receives */ 2317 send_done_count = be32toh(stats->send_done_count); 2318 while ((send_done_count != tx->pkt_done) || 2319 (rx_done->entry[rx_done->idx].length != 0)) { 2320 mxge_tx_done(sc, (int)send_done_count); 2321 mxge_clean_rx_done(sc); 2322 send_done_count = be32toh(stats->send_done_count); 2323 } 2324 } while (*((volatile uint8_t *) &stats->valid)); 2325 2326 if (__predict_false(stats->stats_updated)) { 2327 if (sc->link_state != stats->link_up) { 2328 sc->link_state = stats->link_up; 2329 if (sc->link_state) { 2330 if_link_state_change(sc->ifp, LINK_STATE_UP); 2331 if (mxge_verbose) 2332 device_printf(sc->dev, "link up\n"); 2333 } else { 2334 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2335 if (mxge_verbose) 2336 device_printf(sc->dev, "link down\n"); 2337 } 2338 } 2339 if (sc->rdma_tags_available != 2340 be32toh(sc->fw_stats->rdma_tags_available)) { 2341 sc->rdma_tags_available = 2342 be32toh(sc->fw_stats->rdma_tags_available); 2343 device_printf(sc->dev, "RDMA timed out! %d tags " 2344 "left\n", sc->rdma_tags_available); 2345 } 2346 sc->down_cnt += stats->link_down; 2347 } 2348 2349 /* check to see if we have rx token to pass back */ 2350 if (valid & 0x1) 2351 *sc->irq_claim = be32toh(3); 2352 *(sc->irq_claim + 1) = be32toh(3); 2353 } 2354 2355 static void 2356 mxge_init(void *arg) 2357 { 2358 } 2359 2360 2361 2362 static void 2363 mxge_free_mbufs(mxge_softc_t *sc) 2364 { 2365 int i; 2366 2367 for (i = 0; i <= sc->rx_big.mask; i++) { 2368 if (sc->rx_big.info[i].m == NULL) 2369 continue; 2370 bus_dmamap_unload(sc->rx_big.dmat, 2371 sc->rx_big.info[i].map); 2372 m_freem(sc->rx_big.info[i].m); 2373 sc->rx_big.info[i].m = NULL; 2374 } 2375 2376 for (i = 0; i <= sc->rx_small.mask; i++) { 2377 if (sc->rx_small.info[i].m == NULL) 2378 continue; 2379 bus_dmamap_unload(sc->rx_small.dmat, 2380 sc->rx_small.info[i].map); 2381 m_freem(sc->rx_small.info[i].m); 2382 sc->rx_small.info[i].m = NULL; 2383 } 2384 2385 for (i = 0; i <= sc->tx.mask; i++) { 2386 sc->tx.info[i].flag = 0; 2387 if (sc->tx.info[i].m == NULL) 2388 continue; 2389 bus_dmamap_unload(sc->tx.dmat, 2390 sc->tx.info[i].map); 2391 m_freem(sc->tx.info[i].m); 2392 sc->tx.info[i].m = NULL; 2393 } 2394 } 2395 2396 static void 2397 mxge_free_rings(mxge_softc_t *sc) 2398 { 2399 int i; 2400 2401 if (sc->rx_done.entry != NULL) 2402 mxge_dma_free(&sc->rx_done.dma); 2403 sc->rx_done.entry = NULL; 2404 if (sc->tx.req_bytes != NULL) 2405 free(sc->tx.req_bytes, M_DEVBUF); 2406 if (sc->tx.seg_list != NULL) 2407 free(sc->tx.seg_list, M_DEVBUF); 2408 if (sc->rx_small.shadow != NULL) 2409 free(sc->rx_small.shadow, M_DEVBUF); 2410 if (sc->rx_big.shadow != NULL) 2411 free(sc->rx_big.shadow, M_DEVBUF); 2412 if (sc->tx.info != NULL) { 2413 if (sc->tx.dmat != NULL) { 2414 for (i = 0; i <= sc->tx.mask; i++) { 2415 bus_dmamap_destroy(sc->tx.dmat, 2416 sc->tx.info[i].map); 2417 } 2418 bus_dma_tag_destroy(sc->tx.dmat); 2419 } 2420 free(sc->tx.info, M_DEVBUF); 2421 } 2422 if (sc->rx_small.info != NULL) { 2423 if (sc->rx_small.dmat != NULL) { 2424 for (i = 0; i <= sc->rx_small.mask; i++) { 2425 bus_dmamap_destroy(sc->rx_small.dmat, 2426 sc->rx_small.info[i].map); 2427 } 2428 bus_dmamap_destroy(sc->rx_small.dmat, 2429 sc->rx_small.extra_map); 2430 bus_dma_tag_destroy(sc->rx_small.dmat); 2431 } 2432 free(sc->rx_small.info, M_DEVBUF); 2433 } 2434 if (sc->rx_big.info != NULL) { 2435 if (sc->rx_big.dmat != NULL) { 2436 for (i = 0; i <= sc->rx_big.mask; i++) { 2437 bus_dmamap_destroy(sc->rx_big.dmat, 2438 sc->rx_big.info[i].map); 2439 } 2440 bus_dmamap_destroy(sc->rx_big.dmat, 2441 sc->rx_big.extra_map); 2442 bus_dma_tag_destroy(sc->rx_big.dmat); 2443 } 2444 free(sc->rx_big.info, M_DEVBUF); 2445 } 2446 } 2447 2448 static int 2449 mxge_alloc_rings(mxge_softc_t *sc) 2450 { 2451 mxge_cmd_t cmd; 2452 int tx_ring_size, rx_ring_size; 2453 int tx_ring_entries, rx_ring_entries; 2454 int i, err; 2455 unsigned long bytes; 2456 2457 /* get ring sizes */ 2458 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2459 tx_ring_size = cmd.data0; 2460 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2461 if (err != 0) { 2462 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2463 goto abort_with_nothing; 2464 } 2465 2466 rx_ring_size = cmd.data0; 2467 2468 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2469 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2470 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2471 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2472 IFQ_SET_READY(&sc->ifp->if_snd); 2473 2474 sc->tx.mask = tx_ring_entries - 1; 2475 sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2476 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2477 sc->rx_done.mask = (2 * rx_ring_entries) - 1; 2478 2479 err = ENOMEM; 2480 2481 /* allocate interrupt queues */ 2482 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 2483 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2484 if (err != 0) 2485 goto abort_with_nothing; 2486 sc->rx_done.entry = sc->rx_done.dma.addr; 2487 bzero(sc->rx_done.entry, bytes); 2488 2489 /* allocate the tx request copy block */ 2490 bytes = 8 + 2491 sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); 2492 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2493 if (sc->tx.req_bytes == NULL) 2494 goto abort_with_alloc; 2495 /* ensure req_list entries are aligned to 8 bytes */ 2496 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2497 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2498 2499 /* allocate the tx busdma segment list */ 2500 bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; 2501 sc->tx.seg_list = (bus_dma_segment_t *) 2502 malloc(bytes, M_DEVBUF, M_WAITOK); 2503 if (sc->tx.seg_list == NULL) 2504 goto abort_with_alloc; 2505 2506 /* allocate the rx shadow rings */ 2507 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2508 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2509 if (sc->rx_small.shadow == NULL) 2510 goto abort_with_alloc; 2511 2512 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2513 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2514 if (sc->rx_big.shadow == NULL) 2515 goto abort_with_alloc; 2516 2517 /* allocate the host info rings */ 2518 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2519 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2520 if (sc->tx.info == NULL) 2521 goto abort_with_alloc; 2522 2523 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2524 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2525 if (sc->rx_small.info == NULL) 2526 goto abort_with_alloc; 2527 2528 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2529 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2530 if (sc->rx_big.info == NULL) 2531 goto abort_with_alloc; 2532 2533 /* allocate the busdma resources */ 2534 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2535 1, /* alignment */ 2536 sc->tx.boundary, /* boundary */ 2537 BUS_SPACE_MAXADDR, /* low */ 2538 BUS_SPACE_MAXADDR, /* high */ 2539 NULL, NULL, /* filter */ 2540 65536 + 256, /* maxsize */ 2541 sc->tx.max_desc - 2, /* num segs */ 2542 sc->tx.boundary, /* maxsegsize */ 2543 BUS_DMA_ALLOCNOW, /* flags */ 2544 NULL, NULL, /* lock */ 2545 &sc->tx.dmat); /* tag */ 2546 2547 if (err != 0) { 2548 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2549 err); 2550 goto abort_with_alloc; 2551 } 2552 2553 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2554 1, /* alignment */ 2555 4096, /* boundary */ 2556 BUS_SPACE_MAXADDR, /* low */ 2557 BUS_SPACE_MAXADDR, /* high */ 2558 NULL, NULL, /* filter */ 2559 MHLEN, /* maxsize */ 2560 1, /* num segs */ 2561 MHLEN, /* maxsegsize */ 2562 BUS_DMA_ALLOCNOW, /* flags */ 2563 NULL, NULL, /* lock */ 2564 &sc->rx_small.dmat); /* tag */ 2565 if (err != 0) { 2566 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2567 err); 2568 goto abort_with_alloc; 2569 } 2570 2571 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2572 1, /* alignment */ 2573 4096, /* boundary */ 2574 BUS_SPACE_MAXADDR, /* low */ 2575 BUS_SPACE_MAXADDR, /* high */ 2576 NULL, NULL, /* filter */ 2577 3*4096, /* maxsize */ 2578 3, /* num segs */ 2579 4096, /* maxsegsize */ 2580 BUS_DMA_ALLOCNOW, /* flags */ 2581 NULL, NULL, /* lock */ 2582 &sc->rx_big.dmat); /* tag */ 2583 if (err != 0) { 2584 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2585 err); 2586 goto abort_with_alloc; 2587 } 2588 2589 /* now use these tags to setup dmamaps for each slot 2590 in each ring */ 2591 for (i = 0; i <= sc->tx.mask; i++) { 2592 err = bus_dmamap_create(sc->tx.dmat, 0, 2593 &sc->tx.info[i].map); 2594 if (err != 0) { 2595 device_printf(sc->dev, "Err %d tx dmamap\n", 2596 err); 2597 goto abort_with_alloc; 2598 } 2599 } 2600 for (i = 0; i <= sc->rx_small.mask; i++) { 2601 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2602 &sc->rx_small.info[i].map); 2603 if (err != 0) { 2604 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2605 err); 2606 goto abort_with_alloc; 2607 } 2608 } 2609 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2610 &sc->rx_small.extra_map); 2611 if (err != 0) { 2612 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2613 err); 2614 goto abort_with_alloc; 2615 } 2616 2617 for (i = 0; i <= sc->rx_big.mask; i++) { 2618 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2619 &sc->rx_big.info[i].map); 2620 if (err != 0) { 2621 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2622 err); 2623 goto abort_with_alloc; 2624 } 2625 } 2626 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2627 &sc->rx_big.extra_map); 2628 if (err != 0) { 2629 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2630 err); 2631 goto abort_with_alloc; 2632 } 2633 return 0; 2634 2635 abort_with_alloc: 2636 mxge_free_rings(sc); 2637 2638 abort_with_nothing: 2639 return err; 2640 } 2641 2642 static void 2643 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 2644 { 2645 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 2646 2647 if (bufsize < MCLBYTES) { 2648 /* easy, everything fits in a single buffer */ 2649 *big_buf_size = MCLBYTES; 2650 *cl_size = MCLBYTES; 2651 *nbufs = 1; 2652 return; 2653 } 2654 2655 if (bufsize < MJUMPAGESIZE) { 2656 /* still easy, everything still fits in a single buffer */ 2657 *big_buf_size = MJUMPAGESIZE; 2658 *cl_size = MJUMPAGESIZE; 2659 *nbufs = 1; 2660 return; 2661 } 2662 /* now we need to use virtually contiguous buffers */ 2663 *cl_size = MJUM9BYTES; 2664 *big_buf_size = 4096; 2665 *nbufs = mtu / 4096 + 1; 2666 /* needs to be a power of two, so round up */ 2667 if (*nbufs == 3) 2668 *nbufs = 4; 2669 } 2670 2671 static int 2672 mxge_open(mxge_softc_t *sc) 2673 { 2674 mxge_cmd_t cmd; 2675 int i, err, big_bytes; 2676 bus_dmamap_t map; 2677 bus_addr_t bus; 2678 struct lro_entry *lro_entry; 2679 2680 SLIST_INIT(&sc->lro_free); 2681 SLIST_INIT(&sc->lro_active); 2682 2683 for (i = 0; i < sc->lro_cnt; i++) { 2684 lro_entry = (struct lro_entry *) 2685 malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); 2686 if (lro_entry == NULL) { 2687 sc->lro_cnt = i; 2688 break; 2689 } 2690 SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); 2691 } 2692 2693 /* Copy the MAC address in case it was overridden */ 2694 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2695 2696 err = mxge_reset(sc, 1); 2697 if (err != 0) { 2698 device_printf(sc->dev, "failed to reset\n"); 2699 return EIO; 2700 } 2701 2702 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, 2703 &sc->rx_big.cl_size, &sc->rx_big.nbufs); 2704 2705 cmd.data0 = sc->rx_big.nbufs; 2706 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 2707 &cmd); 2708 /* error is only meaningful if we're trying to set 2709 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 2710 if (err && sc->rx_big.nbufs > 1) { 2711 device_printf(sc->dev, 2712 "Failed to set alway-use-n to %d\n", 2713 sc->rx_big.nbufs); 2714 return EIO; 2715 } 2716 /* get the lanai pointers to the send and receive rings */ 2717 2718 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2719 sc->tx.lanai = 2720 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2721 err |= mxge_send_cmd(sc, 2722 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2723 sc->rx_small.lanai = 2724 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2725 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2726 sc->rx_big.lanai = 2727 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2728 2729 if (err != 0) { 2730 device_printf(sc->dev, 2731 "failed to get ring sizes or locations\n"); 2732 return EIO; 2733 } 2734 2735 /* stock receive rings */ 2736 for (i = 0; i <= sc->rx_small.mask; i++) { 2737 map = sc->rx_small.info[i].map; 2738 err = mxge_get_buf_small(sc, map, i); 2739 if (err) { 2740 device_printf(sc->dev, "alloced %d/%d smalls\n", 2741 i, sc->rx_small.mask + 1); 2742 goto abort; 2743 } 2744 } 2745 for (i = 0; i <= sc->rx_big.mask; i++) { 2746 sc->rx_big.shadow[i].addr_low = 0xffffffff; 2747 sc->rx_big.shadow[i].addr_high = 0xffffffff; 2748 } 2749 for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { 2750 map = sc->rx_big.info[i].map; 2751 err = mxge_get_buf_big(sc, map, i); 2752 if (err) { 2753 device_printf(sc->dev, "alloced %d/%d bigs\n", 2754 i, sc->rx_big.mask + 1); 2755 goto abort; 2756 } 2757 } 2758 2759 /* Give the firmware the mtu and the big and small buffer 2760 sizes. The firmware wants the big buf size to be a power 2761 of two. Luckily, FreeBSD's clusters are powers of two */ 2762 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2763 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2764 cmd.data0 = MHLEN - MXGEFW_PAD; 2765 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2766 &cmd); 2767 cmd.data0 = big_bytes; 2768 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2769 2770 if (err != 0) { 2771 device_printf(sc->dev, "failed to setup params\n"); 2772 goto abort; 2773 } 2774 2775 /* Now give him the pointer to the stats block */ 2776 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2777 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2778 cmd.data2 = sizeof(struct mcp_irq_data); 2779 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2780 2781 if (err != 0) { 2782 bus = sc->fw_stats_dma.bus_addr; 2783 bus += offsetof(struct mcp_irq_data, send_done_count); 2784 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2785 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2786 err = mxge_send_cmd(sc, 2787 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2788 &cmd); 2789 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2790 sc->fw_multicast_support = 0; 2791 } else { 2792 sc->fw_multicast_support = 1; 2793 } 2794 2795 if (err != 0) { 2796 device_printf(sc->dev, "failed to setup params\n"); 2797 goto abort; 2798 } 2799 2800 /* Finally, start the firmware running */ 2801 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2802 if (err) { 2803 device_printf(sc->dev, "Couldn't bring up link\n"); 2804 goto abort; 2805 } 2806 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2807 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2808 2809 return 0; 2810 2811 2812 abort: 2813 mxge_free_mbufs(sc); 2814 2815 return err; 2816 } 2817 2818 static int 2819 mxge_close(mxge_softc_t *sc) 2820 { 2821 struct lro_entry *lro_entry; 2822 mxge_cmd_t cmd; 2823 int err, old_down_cnt; 2824 2825 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2826 old_down_cnt = sc->down_cnt; 2827 mb(); 2828 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2829 if (err) { 2830 device_printf(sc->dev, "Couldn't bring down link\n"); 2831 } 2832 if (old_down_cnt == sc->down_cnt) { 2833 /* wait for down irq */ 2834 DELAY(10 * sc->intr_coal_delay); 2835 } 2836 if (old_down_cnt == sc->down_cnt) { 2837 device_printf(sc->dev, "never got down irq\n"); 2838 } 2839 2840 mxge_free_mbufs(sc); 2841 2842 while (!SLIST_EMPTY(&sc->lro_free)) { 2843 lro_entry = SLIST_FIRST(&sc->lro_free); 2844 SLIST_REMOVE_HEAD(&sc->lro_free, next); 2845 } 2846 return 0; 2847 } 2848 2849 static void 2850 mxge_setup_cfg_space(mxge_softc_t *sc) 2851 { 2852 device_t dev = sc->dev; 2853 int reg; 2854 uint16_t cmd, lnk, pectl; 2855 2856 /* find the PCIe link width and set max read request to 4KB*/ 2857 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2858 lnk = pci_read_config(dev, reg + 0x12, 2); 2859 sc->link_width = (lnk >> 4) & 0x3f; 2860 2861 pectl = pci_read_config(dev, reg + 0x8, 2); 2862 pectl = (pectl & ~0x7000) | (5 << 12); 2863 pci_write_config(dev, reg + 0x8, pectl, 2); 2864 } 2865 2866 /* Enable DMA and Memory space access */ 2867 pci_enable_busmaster(dev); 2868 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2869 cmd |= PCIM_CMD_MEMEN; 2870 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2871 } 2872 2873 static uint32_t 2874 mxge_read_reboot(mxge_softc_t *sc) 2875 { 2876 device_t dev = sc->dev; 2877 uint32_t vs; 2878 2879 /* find the vendor specific offset */ 2880 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2881 device_printf(sc->dev, 2882 "could not find vendor specific offset\n"); 2883 return (uint32_t)-1; 2884 } 2885 /* enable read32 mode */ 2886 pci_write_config(dev, vs + 0x10, 0x3, 1); 2887 /* tell NIC which register to read */ 2888 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2889 return (pci_read_config(dev, vs + 0x14, 4)); 2890 } 2891 2892 static void 2893 mxge_watchdog_reset(mxge_softc_t *sc) 2894 { 2895 int err; 2896 uint32_t reboot; 2897 uint16_t cmd; 2898 2899 err = ENXIO; 2900 2901 device_printf(sc->dev, "Watchdog reset!\n"); 2902 2903 /* 2904 * check to see if the NIC rebooted. If it did, then all of 2905 * PCI config space has been reset, and things like the 2906 * busmaster bit will be zero. If this is the case, then we 2907 * must restore PCI config space before the NIC can be used 2908 * again 2909 */ 2910 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2911 if (cmd == 0xffff) { 2912 /* 2913 * maybe the watchdog caught the NIC rebooting; wait 2914 * up to 100ms for it to finish. If it does not come 2915 * back, then give up 2916 */ 2917 DELAY(1000*100); 2918 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2919 if (cmd == 0xffff) { 2920 device_printf(sc->dev, "NIC disappeared!\n"); 2921 goto abort; 2922 } 2923 } 2924 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2925 /* print the reboot status */ 2926 reboot = mxge_read_reboot(sc); 2927 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2928 reboot); 2929 /* restore PCI configuration space */ 2930 2931 /* XXXX waiting for pci_cfg_restore() to be exported */ 2932 goto abort; /* just abort for now */ 2933 2934 /* and redo any changes we made to our config space */ 2935 mxge_setup_cfg_space(sc); 2936 } else { 2937 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2938 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2939 sc->tx.req, sc->tx.done); 2940 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2941 sc->tx.pkt_done, 2942 be32toh(sc->fw_stats->send_done_count)); 2943 } 2944 2945 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2946 mxge_close(sc); 2947 err = mxge_open(sc); 2948 } 2949 2950 abort: 2951 /* 2952 * stop the watchdog if the nic is dead, to avoid spamming the 2953 * console 2954 */ 2955 if (err != 0) { 2956 callout_stop(&sc->co_hdl); 2957 } 2958 } 2959 2960 static void 2961 mxge_watchdog(mxge_softc_t *sc) 2962 { 2963 mxge_tx_buf_t *tx = &sc->tx; 2964 2965 /* see if we have outstanding transmits, which 2966 have been pending for more than mxge_ticks */ 2967 if (tx->req != tx->done && 2968 tx->watchdog_req != tx->watchdog_done && 2969 tx->done == tx->watchdog_done) 2970 mxge_watchdog_reset(sc); 2971 2972 tx->watchdog_req = tx->req; 2973 tx->watchdog_done = tx->done; 2974 } 2975 2976 static void 2977 mxge_tick(void *arg) 2978 { 2979 mxge_softc_t *sc = arg; 2980 2981 2982 /* Synchronize with possible callout reset/stop. */ 2983 if (callout_pending(&sc->co_hdl) || 2984 !callout_active(&sc->co_hdl)) { 2985 mtx_unlock(&sc->driver_mtx); 2986 return; 2987 } 2988 2989 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2990 mxge_watchdog(sc); 2991 } 2992 2993 static int 2994 mxge_media_change(struct ifnet *ifp) 2995 { 2996 return EINVAL; 2997 } 2998 2999 static int 3000 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3001 { 3002 struct ifnet *ifp = sc->ifp; 3003 int real_mtu, old_mtu; 3004 int err = 0; 3005 3006 3007 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3008 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3009 return EINVAL; 3010 mtx_lock(&sc->driver_mtx); 3011 old_mtu = ifp->if_mtu; 3012 ifp->if_mtu = mtu; 3013 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3014 callout_stop(&sc->co_hdl); 3015 mxge_close(sc); 3016 err = mxge_open(sc); 3017 if (err != 0) { 3018 ifp->if_mtu = old_mtu; 3019 mxge_close(sc); 3020 (void) mxge_open(sc); 3021 } 3022 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3023 } 3024 mtx_unlock(&sc->driver_mtx); 3025 return err; 3026 } 3027 3028 static void 3029 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3030 { 3031 mxge_softc_t *sc = ifp->if_softc; 3032 3033 3034 if (sc == NULL) 3035 return; 3036 ifmr->ifm_status = IFM_AVALID; 3037 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 3038 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3039 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 3040 } 3041 3042 static int 3043 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3044 { 3045 mxge_softc_t *sc = ifp->if_softc; 3046 struct ifreq *ifr = (struct ifreq *)data; 3047 int err, mask; 3048 3049 err = 0; 3050 switch (command) { 3051 case SIOCSIFADDR: 3052 case SIOCGIFADDR: 3053 err = ether_ioctl(ifp, command, data); 3054 break; 3055 3056 case SIOCSIFMTU: 3057 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3058 break; 3059 3060 case SIOCSIFFLAGS: 3061 mtx_lock(&sc->driver_mtx); 3062 if (ifp->if_flags & IFF_UP) { 3063 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3064 err = mxge_open(sc); 3065 callout_reset(&sc->co_hdl, mxge_ticks, 3066 mxge_tick, sc); 3067 } else { 3068 /* take care of promis can allmulti 3069 flag chages */ 3070 mxge_change_promisc(sc, 3071 ifp->if_flags & IFF_PROMISC); 3072 mxge_set_multicast_list(sc); 3073 } 3074 } else { 3075 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3076 mxge_close(sc); 3077 callout_stop(&sc->co_hdl); 3078 } 3079 } 3080 mtx_unlock(&sc->driver_mtx); 3081 break; 3082 3083 case SIOCADDMULTI: 3084 case SIOCDELMULTI: 3085 mtx_lock(&sc->driver_mtx); 3086 mxge_set_multicast_list(sc); 3087 mtx_unlock(&sc->driver_mtx); 3088 break; 3089 3090 case SIOCSIFCAP: 3091 mtx_lock(&sc->driver_mtx); 3092 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3093 if (mask & IFCAP_TXCSUM) { 3094 if (IFCAP_TXCSUM & ifp->if_capenable) { 3095 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3096 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3097 | CSUM_TSO); 3098 } else { 3099 ifp->if_capenable |= IFCAP_TXCSUM; 3100 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3101 } 3102 } else if (mask & IFCAP_RXCSUM) { 3103 if (IFCAP_RXCSUM & ifp->if_capenable) { 3104 ifp->if_capenable &= ~IFCAP_RXCSUM; 3105 sc->csum_flag = 0; 3106 } else { 3107 ifp->if_capenable |= IFCAP_RXCSUM; 3108 sc->csum_flag = 1; 3109 } 3110 } 3111 if (mask & IFCAP_TSO4) { 3112 if (IFCAP_TSO4 & ifp->if_capenable) { 3113 ifp->if_capenable &= ~IFCAP_TSO4; 3114 ifp->if_hwassist &= ~CSUM_TSO; 3115 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3116 ifp->if_capenable |= IFCAP_TSO4; 3117 ifp->if_hwassist |= CSUM_TSO; 3118 } else { 3119 printf("mxge requires tx checksum offload" 3120 " be enabled to use TSO\n"); 3121 err = EINVAL; 3122 } 3123 } 3124 3125 if (mask & IFCAP_VLAN_HWTAGGING) 3126 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3127 mtx_unlock(&sc->driver_mtx); 3128 VLAN_CAPABILITIES(ifp); 3129 3130 break; 3131 3132 case SIOCGIFMEDIA: 3133 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3134 &sc->media, command); 3135 break; 3136 3137 default: 3138 err = ENOTTY; 3139 } 3140 return err; 3141 } 3142 3143 static void 3144 mxge_fetch_tunables(mxge_softc_t *sc) 3145 { 3146 3147 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3148 &mxge_flow_control); 3149 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3150 &mxge_intr_coal_delay); 3151 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3152 &mxge_nvidia_ecrc_enable); 3153 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3154 &mxge_force_firmware); 3155 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3156 &mxge_deassert_wait); 3157 TUNABLE_INT_FETCH("hw.mxge.verbose", 3158 &mxge_verbose); 3159 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3160 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3161 3162 if (bootverbose) 3163 mxge_verbose = 1; 3164 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3165 mxge_intr_coal_delay = 30; 3166 if (mxge_ticks == 0) 3167 mxge_ticks = hz; 3168 sc->pause = mxge_flow_control; 3169 3170 } 3171 3172 static int 3173 mxge_attach(device_t dev) 3174 { 3175 mxge_softc_t *sc = device_get_softc(dev); 3176 struct ifnet *ifp; 3177 int count, rid, err; 3178 3179 sc->dev = dev; 3180 mxge_fetch_tunables(sc); 3181 3182 err = bus_dma_tag_create(NULL, /* parent */ 3183 1, /* alignment */ 3184 4096, /* boundary */ 3185 BUS_SPACE_MAXADDR, /* low */ 3186 BUS_SPACE_MAXADDR, /* high */ 3187 NULL, NULL, /* filter */ 3188 65536 + 256, /* maxsize */ 3189 MXGE_MAX_SEND_DESC, /* num segs */ 3190 4096, /* maxsegsize */ 3191 0, /* flags */ 3192 NULL, NULL, /* lock */ 3193 &sc->parent_dmat); /* tag */ 3194 3195 if (err != 0) { 3196 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3197 err); 3198 goto abort_with_nothing; 3199 } 3200 3201 ifp = sc->ifp = if_alloc(IFT_ETHER); 3202 if (ifp == NULL) { 3203 device_printf(dev, "can not if_alloc()\n"); 3204 err = ENOSPC; 3205 goto abort_with_parent_dmat; 3206 } 3207 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3208 device_get_nameunit(dev)); 3209 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3210 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3211 device_get_nameunit(dev)); 3212 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3213 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3214 "%s:drv", device_get_nameunit(dev)); 3215 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3216 MTX_NETWORK_LOCK, MTX_DEF); 3217 3218 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3219 3220 mxge_setup_cfg_space(sc); 3221 3222 /* Map the board into the kernel */ 3223 rid = PCIR_BARS; 3224 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3225 ~0, 1, RF_ACTIVE); 3226 if (sc->mem_res == NULL) { 3227 device_printf(dev, "could not map memory\n"); 3228 err = ENXIO; 3229 goto abort_with_lock; 3230 } 3231 sc->sram = rman_get_virtual(sc->mem_res); 3232 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3233 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3234 device_printf(dev, "impossible memory region size %ld\n", 3235 rman_get_size(sc->mem_res)); 3236 err = ENXIO; 3237 goto abort_with_mem_res; 3238 } 3239 3240 /* make NULL terminated copy of the EEPROM strings section of 3241 lanai SRAM */ 3242 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3243 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3244 rman_get_bushandle(sc->mem_res), 3245 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3246 sc->eeprom_strings, 3247 MXGE_EEPROM_STRINGS_SIZE - 2); 3248 err = mxge_parse_strings(sc); 3249 if (err != 0) 3250 goto abort_with_mem_res; 3251 3252 /* Enable write combining for efficient use of PCIe bus */ 3253 mxge_enable_wc(sc); 3254 3255 /* Allocate the out of band dma memory */ 3256 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3257 sizeof (mxge_cmd_t), 64); 3258 if (err != 0) 3259 goto abort_with_mem_res; 3260 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3261 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3262 if (err != 0) 3263 goto abort_with_cmd_dma; 3264 3265 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3266 sizeof (*sc->fw_stats), 64); 3267 if (err != 0) 3268 goto abort_with_zeropad_dma; 3269 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3270 3271 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3272 if (err != 0) 3273 goto abort_with_fw_stats; 3274 3275 /* Add our ithread */ 3276 count = pci_msi_count(dev); 3277 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3278 rid = 1; 3279 sc->msi_enabled = 1; 3280 } else { 3281 rid = 0; 3282 } 3283 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3284 1, RF_SHAREABLE | RF_ACTIVE); 3285 if (sc->irq_res == NULL) { 3286 device_printf(dev, "could not alloc interrupt\n"); 3287 goto abort_with_dmabench; 3288 } 3289 if (mxge_verbose) 3290 device_printf(dev, "using %s irq %ld\n", 3291 sc->msi_enabled ? "MSI" : "INTx", 3292 rman_get_start(sc->irq_res)); 3293 /* select & load the firmware */ 3294 err = mxge_select_firmware(sc); 3295 if (err != 0) 3296 goto abort_with_irq_res; 3297 sc->intr_coal_delay = mxge_intr_coal_delay; 3298 err = mxge_reset(sc, 0); 3299 if (err != 0) 3300 goto abort_with_irq_res; 3301 3302 err = mxge_alloc_rings(sc); 3303 if (err != 0) { 3304 device_printf(sc->dev, "failed to allocate rings\n"); 3305 goto abort_with_irq_res; 3306 } 3307 3308 err = bus_setup_intr(sc->dev, sc->irq_res, 3309 INTR_TYPE_NET | INTR_MPSAFE, 3310 NULL, mxge_intr, sc, &sc->ih); 3311 if (err != 0) { 3312 goto abort_with_rings; 3313 } 3314 /* hook into the network stack */ 3315 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3316 ifp->if_baudrate = 100000000; 3317 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3318 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 3319 3320 sc->max_mtu = mxge_max_mtu(sc); 3321 if (sc->max_mtu >= 9000) 3322 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3323 else 3324 device_printf(dev, "MTU limited to %d. Install " 3325 "latest firmware for 9000 byte jumbo support\n", 3326 sc->max_mtu - ETHER_HDR_LEN); 3327 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3328 ifp->if_capenable = ifp->if_capabilities; 3329 sc->csum_flag = 1; 3330 ifp->if_init = mxge_init; 3331 ifp->if_softc = sc; 3332 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3333 ifp->if_ioctl = mxge_ioctl; 3334 ifp->if_start = mxge_start; 3335 ether_ifattach(ifp, sc->mac_addr); 3336 /* ether_ifattach sets mtu to 1500 */ 3337 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 3338 ifp->if_mtu = 9000; 3339 3340 /* Initialise the ifmedia structure */ 3341 ifmedia_init(&sc->media, 0, mxge_media_change, 3342 mxge_media_status); 3343 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3344 mxge_add_sysctls(sc); 3345 return 0; 3346 3347 abort_with_rings: 3348 mxge_free_rings(sc); 3349 abort_with_irq_res: 3350 bus_release_resource(dev, SYS_RES_IRQ, 3351 sc->msi_enabled ? 1 : 0, sc->irq_res); 3352 if (sc->msi_enabled) 3353 pci_release_msi(dev); 3354 abort_with_dmabench: 3355 mxge_dma_free(&sc->dmabench_dma); 3356 abort_with_fw_stats: 3357 mxge_dma_free(&sc->fw_stats_dma); 3358 abort_with_zeropad_dma: 3359 mxge_dma_free(&sc->zeropad_dma); 3360 abort_with_cmd_dma: 3361 mxge_dma_free(&sc->cmd_dma); 3362 abort_with_mem_res: 3363 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3364 abort_with_lock: 3365 pci_disable_busmaster(dev); 3366 mtx_destroy(&sc->cmd_mtx); 3367 mtx_destroy(&sc->tx_mtx); 3368 mtx_destroy(&sc->driver_mtx); 3369 if_free(ifp); 3370 abort_with_parent_dmat: 3371 bus_dma_tag_destroy(sc->parent_dmat); 3372 3373 abort_with_nothing: 3374 return err; 3375 } 3376 3377 static int 3378 mxge_detach(device_t dev) 3379 { 3380 mxge_softc_t *sc = device_get_softc(dev); 3381 3382 if (sc->ifp->if_vlantrunk != NULL) { 3383 device_printf(sc->dev, 3384 "Detach vlans before removing module\n"); 3385 return EBUSY; 3386 } 3387 mtx_lock(&sc->driver_mtx); 3388 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3389 mxge_close(sc); 3390 callout_stop(&sc->co_hdl); 3391 mtx_unlock(&sc->driver_mtx); 3392 ether_ifdetach(sc->ifp); 3393 ifmedia_removeall(&sc->media); 3394 mxge_dummy_rdma(sc, 0); 3395 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3396 mxge_free_rings(sc); 3397 bus_release_resource(dev, SYS_RES_IRQ, 3398 sc->msi_enabled ? 1 : 0, sc->irq_res); 3399 if (sc->msi_enabled) 3400 pci_release_msi(dev); 3401 3402 sc->rx_done.entry = NULL; 3403 mxge_dma_free(&sc->rx_done.dma); 3404 mxge_dma_free(&sc->fw_stats_dma); 3405 mxge_dma_free(&sc->dmabench_dma); 3406 mxge_dma_free(&sc->zeropad_dma); 3407 mxge_dma_free(&sc->cmd_dma); 3408 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3409 pci_disable_busmaster(dev); 3410 mtx_destroy(&sc->cmd_mtx); 3411 mtx_destroy(&sc->tx_mtx); 3412 mtx_destroy(&sc->driver_mtx); 3413 if_free(sc->ifp); 3414 bus_dma_tag_destroy(sc->parent_dmat); 3415 return 0; 3416 } 3417 3418 static int 3419 mxge_shutdown(device_t dev) 3420 { 3421 return 0; 3422 } 3423 3424 /* 3425 This file uses Myri10GE driver indentation. 3426 3427 Local Variables: 3428 c-file-style:"linux" 3429 tab-width:8 3430 End: 3431 */ 3432