1 /****************************************************************************** 2 3 Copyright (c) 2006, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/linker.h> 40 #include <sys/firmware.h> 41 #include <sys/endian.h> 42 #include <sys/sockio.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/memrange.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 #include <net/zlib.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <machine/bus.h> 72 #include <machine/in_cksum.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/pci/pcireg.h> 78 #include <dev/pci/pcivar.h> 79 80 #include <vm/vm.h> /* for pmap_mapdev() */ 81 #include <vm/pmap.h> 82 83 #if defined(__i386) || defined(__amd64) 84 #include <machine/specialreg.h> 85 #endif 86 87 #include <dev/mxge/mxge_mcp.h> 88 #include <dev/mxge/mcp_gen_header.h> 89 #include <dev/mxge/if_mxge_var.h> 90 91 /* tunable params */ 92 static int mxge_nvidia_ecrc_enable = 1; 93 static int mxge_force_firmware = 0; 94 static int mxge_intr_coal_delay = 30; 95 static int mxge_deassert_wait = 1; 96 static int mxge_flow_control = 1; 97 static int mxge_verbose = 0; 98 static int mxge_lro_cnt = 8; 99 static int mxge_ticks; 100 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 101 static char *mxge_fw_aligned = "mxge_eth_z8e"; 102 103 static int mxge_probe(device_t dev); 104 static int mxge_attach(device_t dev); 105 static int mxge_detach(device_t dev); 106 static int mxge_shutdown(device_t dev); 107 static void mxge_intr(void *arg); 108 109 static device_method_t mxge_methods[] = 110 { 111 /* Device interface */ 112 DEVMETHOD(device_probe, mxge_probe), 113 DEVMETHOD(device_attach, mxge_attach), 114 DEVMETHOD(device_detach, mxge_detach), 115 DEVMETHOD(device_shutdown, mxge_shutdown), 116 {0, 0} 117 }; 118 119 static driver_t mxge_driver = 120 { 121 "mxge", 122 mxge_methods, 123 sizeof(mxge_softc_t), 124 }; 125 126 static devclass_t mxge_devclass; 127 128 /* Declare ourselves to be a child of the PCI bus.*/ 129 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 130 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 131 132 static int mxge_load_firmware(mxge_softc_t *sc); 133 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 134 static int mxge_close(mxge_softc_t *sc); 135 static int mxge_open(mxge_softc_t *sc); 136 static void mxge_tick(void *arg); 137 138 static int 139 mxge_probe(device_t dev) 140 { 141 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 142 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 143 device_set_desc(dev, "Myri10G-PCIE-8A"); 144 return 0; 145 } 146 return ENXIO; 147 } 148 149 static void 150 mxge_enable_wc(mxge_softc_t *sc) 151 { 152 struct mem_range_desc mrdesc; 153 vm_paddr_t pa; 154 vm_offset_t len; 155 int err, action; 156 157 len = rman_get_size(sc->mem_res); 158 #if defined(__i386) || defined(__amd64) 159 err = pmap_change_attr((vm_offset_t) sc->sram, 160 len, PAT_WRITE_COMBINING); 161 if (err == 0) 162 return; 163 else 164 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 165 err); 166 #endif 167 pa = rman_get_start(sc->mem_res); 168 mrdesc.mr_base = pa; 169 mrdesc.mr_len = len; 170 mrdesc.mr_flags = MDF_WRITECOMBINE; 171 action = MEMRANGE_SET_UPDATE; 172 strcpy((char *)&mrdesc.mr_owner, "mxge"); 173 err = mem_range_attr_set(&mrdesc, &action); 174 if (err != 0) { 175 device_printf(sc->dev, 176 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 177 (unsigned long)pa, (unsigned long)len, err); 178 } else { 179 sc->wc = 1; 180 } 181 } 182 183 184 /* callback to get our DMA address */ 185 static void 186 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 187 int error) 188 { 189 if (error == 0) { 190 *(bus_addr_t *) arg = segs->ds_addr; 191 } 192 } 193 194 static int 195 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 196 bus_size_t alignment) 197 { 198 int err; 199 device_t dev = sc->dev; 200 201 /* allocate DMAable memory tags */ 202 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 203 alignment, /* alignment */ 204 4096, /* boundary */ 205 BUS_SPACE_MAXADDR, /* low */ 206 BUS_SPACE_MAXADDR, /* high */ 207 NULL, NULL, /* filter */ 208 bytes, /* maxsize */ 209 1, /* num segs */ 210 4096, /* maxsegsize */ 211 BUS_DMA_COHERENT, /* flags */ 212 NULL, NULL, /* lock */ 213 &dma->dmat); /* tag */ 214 if (err != 0) { 215 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 216 return err; 217 } 218 219 /* allocate DMAable memory & map */ 220 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 221 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 222 | BUS_DMA_ZERO), &dma->map); 223 if (err != 0) { 224 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 225 goto abort_with_dmat; 226 } 227 228 /* load the memory */ 229 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 230 mxge_dmamap_callback, 231 (void *)&dma->bus_addr, 0); 232 if (err != 0) { 233 device_printf(dev, "couldn't load map (err = %d)\n", err); 234 goto abort_with_mem; 235 } 236 return 0; 237 238 abort_with_mem: 239 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 240 abort_with_dmat: 241 (void)bus_dma_tag_destroy(dma->dmat); 242 return err; 243 } 244 245 246 static void 247 mxge_dma_free(mxge_dma_t *dma) 248 { 249 bus_dmamap_unload(dma->dmat, dma->map); 250 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 251 (void)bus_dma_tag_destroy(dma->dmat); 252 } 253 254 /* 255 * The eeprom strings on the lanaiX have the format 256 * SN=x\0 257 * MAC=x:x:x:x:x:x\0 258 * PC=text\0 259 */ 260 261 static int 262 mxge_parse_strings(mxge_softc_t *sc) 263 { 264 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 265 266 char *ptr, *limit; 267 int i, found_mac; 268 269 ptr = sc->eeprom_strings; 270 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 271 found_mac = 0; 272 while (ptr < limit && *ptr != '\0') { 273 if (memcmp(ptr, "MAC=", 4) == 0) { 274 ptr += 1; 275 sc->mac_addr_string = ptr; 276 for (i = 0; i < 6; i++) { 277 ptr += 3; 278 if ((ptr + 2) > limit) 279 goto abort; 280 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 281 found_mac = 1; 282 } 283 } else if (memcmp(ptr, "PC=", 3) == 0) { 284 ptr += 3; 285 strncpy(sc->product_code_string, ptr, 286 sizeof (sc->product_code_string) - 1); 287 } else if (memcmp(ptr, "SN=", 3) == 0) { 288 ptr += 3; 289 strncpy(sc->serial_number_string, ptr, 290 sizeof (sc->serial_number_string) - 1); 291 } 292 MXGE_NEXT_STRING(ptr); 293 } 294 295 if (found_mac) 296 return 0; 297 298 abort: 299 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 300 301 return ENXIO; 302 } 303 304 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 305 static void 306 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 307 { 308 uint32_t val; 309 unsigned long base, off; 310 char *va, *cfgptr; 311 device_t pdev, mcp55; 312 uint16_t vendor_id, device_id, word; 313 uintptr_t bus, slot, func, ivend, idev; 314 uint32_t *ptr32; 315 316 317 if (!mxge_nvidia_ecrc_enable) 318 return; 319 320 pdev = device_get_parent(device_get_parent(sc->dev)); 321 if (pdev == NULL) { 322 device_printf(sc->dev, "could not find parent?\n"); 323 return; 324 } 325 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 326 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 327 328 if (vendor_id != 0x10de) 329 return; 330 331 base = 0; 332 333 if (device_id == 0x005d) { 334 /* ck804, base address is magic */ 335 base = 0xe0000000UL; 336 } else if (device_id >= 0x0374 && device_id <= 0x378) { 337 /* mcp55, base address stored in chipset */ 338 mcp55 = pci_find_bsf(0, 0, 0); 339 if (mcp55 && 340 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 341 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 342 word = pci_read_config(mcp55, 0x90, 2); 343 base = ((unsigned long)word & 0x7ffeU) << 25; 344 } 345 } 346 if (!base) 347 return; 348 349 /* XXXX 350 Test below is commented because it is believed that doing 351 config read/write beyond 0xff will access the config space 352 for the next larger function. Uncomment this and remove 353 the hacky pmap_mapdev() way of accessing config space when 354 FreeBSD grows support for extended pcie config space access 355 */ 356 #if 0 357 /* See if we can, by some miracle, access the extended 358 config space */ 359 val = pci_read_config(pdev, 0x178, 4); 360 if (val != 0xffffffff) { 361 val |= 0x40; 362 pci_write_config(pdev, 0x178, val, 4); 363 return; 364 } 365 #endif 366 /* Rather than using normal pci config space writes, we must 367 * map the Nvidia config space ourselves. This is because on 368 * opteron/nvidia class machine the 0xe000000 mapping is 369 * handled by the nvidia chipset, that means the internal PCI 370 * device (the on-chip northbridge), or the amd-8131 bridge 371 * and things behind them are not visible by this method. 372 */ 373 374 BUS_READ_IVAR(device_get_parent(pdev), pdev, 375 PCI_IVAR_BUS, &bus); 376 BUS_READ_IVAR(device_get_parent(pdev), pdev, 377 PCI_IVAR_SLOT, &slot); 378 BUS_READ_IVAR(device_get_parent(pdev), pdev, 379 PCI_IVAR_FUNCTION, &func); 380 BUS_READ_IVAR(device_get_parent(pdev), pdev, 381 PCI_IVAR_VENDOR, &ivend); 382 BUS_READ_IVAR(device_get_parent(pdev), pdev, 383 PCI_IVAR_DEVICE, &idev); 384 385 off = base 386 + 0x00100000UL * (unsigned long)bus 387 + 0x00001000UL * (unsigned long)(func 388 + 8 * slot); 389 390 /* map it into the kernel */ 391 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 392 393 394 if (va == NULL) { 395 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 396 return; 397 } 398 /* get a pointer to the config space mapped into the kernel */ 399 cfgptr = va + (off & PAGE_MASK); 400 401 /* make sure that we can really access it */ 402 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 403 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 404 if (! (vendor_id == ivend && device_id == idev)) { 405 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 406 vendor_id, device_id); 407 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 408 return; 409 } 410 411 ptr32 = (uint32_t*)(cfgptr + 0x178); 412 val = *ptr32; 413 414 if (val == 0xffffffff) { 415 device_printf(sc->dev, "extended mapping failed\n"); 416 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 417 return; 418 } 419 *ptr32 = val | 0x40; 420 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 421 if (mxge_verbose) 422 device_printf(sc->dev, 423 "Enabled ECRC on upstream Nvidia bridge " 424 "at %d:%d:%d\n", 425 (int)bus, (int)slot, (int)func); 426 return; 427 } 428 #else 429 static void 430 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 431 { 432 device_printf(sc->dev, 433 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 434 return; 435 } 436 #endif 437 438 439 static int 440 mxge_dma_test(mxge_softc_t *sc, int test_type) 441 { 442 mxge_cmd_t cmd; 443 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 444 int status; 445 uint32_t len; 446 char *test = " "; 447 448 449 /* Run a small DMA test. 450 * The magic multipliers to the length tell the firmware 451 * to do DMA read, write, or read+write tests. The 452 * results are returned in cmd.data0. The upper 16 453 * bits of the return is the number of transfers completed. 454 * The lower 16 bits is the time in 0.5us ticks that the 455 * transfers took to complete. 456 */ 457 458 len = sc->tx.boundary; 459 460 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 461 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 462 cmd.data2 = len * 0x10000; 463 status = mxge_send_cmd(sc, test_type, &cmd); 464 if (status != 0) { 465 test = "read"; 466 goto abort; 467 } 468 sc->read_dma = ((cmd.data0>>16) * len * 2) / 469 (cmd.data0 & 0xffff); 470 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 471 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 472 cmd.data2 = len * 0x1; 473 status = mxge_send_cmd(sc, test_type, &cmd); 474 if (status != 0) { 475 test = "write"; 476 goto abort; 477 } 478 sc->write_dma = ((cmd.data0>>16) * len * 2) / 479 (cmd.data0 & 0xffff); 480 481 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 482 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 483 cmd.data2 = len * 0x10001; 484 status = mxge_send_cmd(sc, test_type, &cmd); 485 if (status != 0) { 486 test = "read/write"; 487 goto abort; 488 } 489 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 490 (cmd.data0 & 0xffff); 491 492 abort: 493 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 494 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 495 test, status); 496 497 return status; 498 } 499 500 /* 501 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 502 * when the PCI-E Completion packets are aligned on an 8-byte 503 * boundary. Some PCI-E chip sets always align Completion packets; on 504 * the ones that do not, the alignment can be enforced by enabling 505 * ECRC generation (if supported). 506 * 507 * When PCI-E Completion packets are not aligned, it is actually more 508 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 509 * 510 * If the driver can neither enable ECRC nor verify that it has 511 * already been enabled, then it must use a firmware image which works 512 * around unaligned completion packets (ethp_z8e.dat), and it should 513 * also ensure that it never gives the device a Read-DMA which is 514 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 515 * enabled, then the driver should use the aligned (eth_z8e.dat) 516 * firmware image, and set tx.boundary to 4KB. 517 */ 518 519 static int 520 mxge_firmware_probe(mxge_softc_t *sc) 521 { 522 device_t dev = sc->dev; 523 int reg, status; 524 uint16_t pectl; 525 526 sc->tx.boundary = 4096; 527 /* 528 * Verify the max read request size was set to 4KB 529 * before trying the test with 4KB. 530 */ 531 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 532 pectl = pci_read_config(dev, reg + 0x8, 2); 533 if ((pectl & (5 << 12)) != (5 << 12)) { 534 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 535 pectl); 536 sc->tx.boundary = 2048; 537 } 538 } 539 540 /* 541 * load the optimized firmware (which assumes aligned PCIe 542 * completions) in order to see if it works on this host. 543 */ 544 sc->fw_name = mxge_fw_aligned; 545 status = mxge_load_firmware(sc); 546 if (status != 0) { 547 return status; 548 } 549 550 /* 551 * Enable ECRC if possible 552 */ 553 mxge_enable_nvidia_ecrc(sc); 554 555 /* 556 * Run a DMA test which watches for unaligned completions and 557 * aborts on the first one seen. 558 */ 559 560 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 561 if (status == 0) 562 return 0; /* keep the aligned firmware */ 563 564 if (status != E2BIG) 565 device_printf(dev, "DMA test failed: %d\n", status); 566 if (status == ENOSYS) 567 device_printf(dev, "Falling back to ethp! " 568 "Please install up to date fw\n"); 569 return status; 570 } 571 572 static int 573 mxge_select_firmware(mxge_softc_t *sc) 574 { 575 int aligned = 0; 576 577 578 if (mxge_force_firmware != 0) { 579 if (mxge_force_firmware == 1) 580 aligned = 1; 581 else 582 aligned = 0; 583 if (mxge_verbose) 584 device_printf(sc->dev, 585 "Assuming %s completions (forced)\n", 586 aligned ? "aligned" : "unaligned"); 587 goto abort; 588 } 589 590 /* if the PCIe link width is 4 or less, we can use the aligned 591 firmware and skip any checks */ 592 if (sc->link_width != 0 && sc->link_width <= 4) { 593 device_printf(sc->dev, 594 "PCIe x%d Link, expect reduced performance\n", 595 sc->link_width); 596 aligned = 1; 597 goto abort; 598 } 599 600 if (0 == mxge_firmware_probe(sc)) 601 return 0; 602 603 abort: 604 if (aligned) { 605 sc->fw_name = mxge_fw_aligned; 606 sc->tx.boundary = 4096; 607 } else { 608 sc->fw_name = mxge_fw_unaligned; 609 sc->tx.boundary = 2048; 610 } 611 return (mxge_load_firmware(sc)); 612 } 613 614 union qualhack 615 { 616 const char *ro_char; 617 char *rw_char; 618 }; 619 620 static int 621 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 622 { 623 624 625 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 626 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 627 be32toh(hdr->mcp_type)); 628 return EIO; 629 } 630 631 /* save firmware version for sysctl */ 632 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 633 if (mxge_verbose) 634 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 635 636 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 637 &sc->fw_ver_minor, &sc->fw_ver_tiny); 638 639 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 640 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 641 device_printf(sc->dev, "Found firmware version %s\n", 642 sc->fw_version); 643 device_printf(sc->dev, "Driver needs %d.%d\n", 644 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 645 return EINVAL; 646 } 647 return 0; 648 649 } 650 651 static int 652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 653 { 654 const struct firmware *fw; 655 const mcp_gen_header_t *hdr; 656 unsigned hdr_offset; 657 const char *fw_data; 658 union qualhack hack; 659 int status; 660 unsigned int i; 661 char dummy; 662 663 664 fw = firmware_get(sc->fw_name); 665 666 if (fw == NULL) { 667 device_printf(sc->dev, "Could not find firmware image %s\n", 668 sc->fw_name); 669 return ENOENT; 670 } 671 if (fw->datasize > *limit || 672 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 673 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 674 sc->fw_name, (int)fw->datasize, (int) *limit); 675 status = ENOSPC; 676 goto abort_with_fw; 677 } 678 *limit = fw->datasize; 679 680 /* check id */ 681 fw_data = (const char *)fw->data; 682 hdr_offset = htobe32(*(const uint32_t *) 683 (fw_data + MCP_HEADER_PTR_OFFSET)); 684 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 685 device_printf(sc->dev, "Bad firmware file"); 686 status = EIO; 687 goto abort_with_fw; 688 } 689 hdr = (const void*)(fw_data + hdr_offset); 690 691 status = mxge_validate_firmware(sc, hdr); 692 if (status != 0) 693 goto abort_with_fw; 694 695 hack.ro_char = fw_data; 696 /* Copy the inflated firmware to NIC SRAM. */ 697 for (i = 0; i < *limit; i += 256) { 698 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 699 hack.rw_char + i, 700 min(256U, (unsigned)(*limit - i))); 701 mb(); 702 dummy = *sc->sram; 703 mb(); 704 } 705 706 status = 0; 707 abort_with_fw: 708 firmware_put(fw, FIRMWARE_UNLOAD); 709 return status; 710 } 711 712 /* 713 * Enable or disable periodic RDMAs from the host to make certain 714 * chipsets resend dropped PCIe messages 715 */ 716 717 static void 718 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 719 { 720 char buf_bytes[72]; 721 volatile uint32_t *confirm; 722 volatile char *submit; 723 uint32_t *buf, dma_low, dma_high; 724 int i; 725 726 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 727 728 /* clear confirmation addr */ 729 confirm = (volatile uint32_t *)sc->cmd; 730 *confirm = 0; 731 mb(); 732 733 /* send an rdma command to the PCIe engine, and wait for the 734 response in the confirmation address. The firmware should 735 write a -1 there to indicate it is alive and well 736 */ 737 738 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 739 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 740 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 741 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 742 buf[2] = htobe32(0xffffffff); /* confirm data */ 743 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 744 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 745 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 746 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 747 buf[5] = htobe32(enable); /* enable? */ 748 749 750 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 751 752 mxge_pio_copy(submit, buf, 64); 753 mb(); 754 DELAY(1000); 755 mb(); 756 i = 0; 757 while (*confirm != 0xffffffff && i < 20) { 758 DELAY(1000); 759 i++; 760 } 761 if (*confirm != 0xffffffff) { 762 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 763 (enable ? "enable" : "disable"), confirm, 764 *confirm); 765 } 766 return; 767 } 768 769 static int 770 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 771 { 772 mcp_cmd_t *buf; 773 char buf_bytes[sizeof(*buf) + 8]; 774 volatile mcp_cmd_response_t *response = sc->cmd; 775 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 776 uint32_t dma_low, dma_high; 777 int err, sleep_total = 0; 778 779 /* ensure buf is aligned to 8 bytes */ 780 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 781 782 buf->data0 = htobe32(data->data0); 783 buf->data1 = htobe32(data->data1); 784 buf->data2 = htobe32(data->data2); 785 buf->cmd = htobe32(cmd); 786 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 787 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 788 789 buf->response_addr.low = htobe32(dma_low); 790 buf->response_addr.high = htobe32(dma_high); 791 mtx_lock(&sc->cmd_mtx); 792 response->result = 0xffffffff; 793 mb(); 794 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 795 796 /* wait up to 20ms */ 797 err = EAGAIN; 798 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 799 bus_dmamap_sync(sc->cmd_dma.dmat, 800 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 801 mb(); 802 switch (be32toh(response->result)) { 803 case 0: 804 data->data0 = be32toh(response->data); 805 err = 0; 806 break; 807 case 0xffffffff: 808 DELAY(1000); 809 break; 810 case MXGEFW_CMD_UNKNOWN: 811 err = ENOSYS; 812 break; 813 case MXGEFW_CMD_ERROR_UNALIGNED: 814 err = E2BIG; 815 break; 816 default: 817 device_printf(sc->dev, 818 "mxge: command %d " 819 "failed, result = %d\n", 820 cmd, be32toh(response->result)); 821 err = ENXIO; 822 break; 823 } 824 if (err != EAGAIN) 825 break; 826 } 827 if (err == EAGAIN) 828 device_printf(sc->dev, "mxge: command %d timed out" 829 "result = %d\n", 830 cmd, be32toh(response->result)); 831 mtx_unlock(&sc->cmd_mtx); 832 return err; 833 } 834 835 static int 836 mxge_adopt_running_firmware(mxge_softc_t *sc) 837 { 838 struct mcp_gen_header *hdr; 839 const size_t bytes = sizeof (struct mcp_gen_header); 840 size_t hdr_offset; 841 int status; 842 843 /* find running firmware header */ 844 hdr_offset = htobe32(*(volatile uint32_t *) 845 (sc->sram + MCP_HEADER_PTR_OFFSET)); 846 847 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 848 device_printf(sc->dev, 849 "Running firmware has bad header offset (%d)\n", 850 (int)hdr_offset); 851 return EIO; 852 } 853 854 /* copy header of running firmware from SRAM to host memory to 855 * validate firmware */ 856 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 857 if (hdr == NULL) { 858 device_printf(sc->dev, "could not malloc firmware hdr\n"); 859 return ENOMEM; 860 } 861 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 862 rman_get_bushandle(sc->mem_res), 863 hdr_offset, (char *)hdr, bytes); 864 status = mxge_validate_firmware(sc, hdr); 865 free(hdr, M_DEVBUF); 866 867 /* 868 * check to see if adopted firmware has bug where adopting 869 * it will cause broadcasts to be filtered unless the NIC 870 * is kept in ALLMULTI mode 871 */ 872 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 873 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 874 sc->adopted_rx_filter_bug = 1; 875 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 876 "working around rx filter bug\n", 877 sc->fw_ver_major, sc->fw_ver_minor, 878 sc->fw_ver_tiny); 879 } 880 881 return status; 882 } 883 884 885 static int 886 mxge_load_firmware(mxge_softc_t *sc) 887 { 888 volatile uint32_t *confirm; 889 volatile char *submit; 890 char buf_bytes[72]; 891 uint32_t *buf, size, dma_low, dma_high; 892 int status, i; 893 894 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 895 896 size = sc->sram_size; 897 status = mxge_load_firmware_helper(sc, &size); 898 if (status) { 899 /* Try to use the currently running firmware, if 900 it is new enough */ 901 status = mxge_adopt_running_firmware(sc); 902 if (status) { 903 device_printf(sc->dev, 904 "failed to adopt running firmware\n"); 905 return status; 906 } 907 device_printf(sc->dev, 908 "Successfully adopted running firmware\n"); 909 if (sc->tx.boundary == 4096) { 910 device_printf(sc->dev, 911 "Using firmware currently running on NIC" 912 ". For optimal\n"); 913 device_printf(sc->dev, 914 "performance consider loading optimized " 915 "firmware\n"); 916 } 917 sc->fw_name = mxge_fw_unaligned; 918 sc->tx.boundary = 2048; 919 return 0; 920 } 921 /* clear confirmation addr */ 922 confirm = (volatile uint32_t *)sc->cmd; 923 *confirm = 0; 924 mb(); 925 /* send a reload command to the bootstrap MCP, and wait for the 926 response in the confirmation address. The firmware should 927 write a -1 there to indicate it is alive and well 928 */ 929 930 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 931 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 932 933 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 934 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 935 buf[2] = htobe32(0xffffffff); /* confirm data */ 936 937 /* FIX: All newest firmware should un-protect the bottom of 938 the sram before handoff. However, the very first interfaces 939 do not. Therefore the handoff copy must skip the first 8 bytes 940 */ 941 /* where the code starts*/ 942 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 943 buf[4] = htobe32(size - 8); /* length of code */ 944 buf[5] = htobe32(8); /* where to copy to */ 945 buf[6] = htobe32(0); /* where to jump to */ 946 947 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 948 mxge_pio_copy(submit, buf, 64); 949 mb(); 950 DELAY(1000); 951 mb(); 952 i = 0; 953 while (*confirm != 0xffffffff && i < 20) { 954 DELAY(1000*10); 955 i++; 956 bus_dmamap_sync(sc->cmd_dma.dmat, 957 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 958 } 959 if (*confirm != 0xffffffff) { 960 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 961 confirm, *confirm); 962 963 return ENXIO; 964 } 965 return 0; 966 } 967 968 static int 969 mxge_update_mac_address(mxge_softc_t *sc) 970 { 971 mxge_cmd_t cmd; 972 uint8_t *addr = sc->mac_addr; 973 int status; 974 975 976 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 977 | (addr[2] << 8) | addr[3]); 978 979 cmd.data1 = ((addr[4] << 8) | (addr[5])); 980 981 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 982 return status; 983 } 984 985 static int 986 mxge_change_pause(mxge_softc_t *sc, int pause) 987 { 988 mxge_cmd_t cmd; 989 int status; 990 991 if (pause) 992 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 993 &cmd); 994 else 995 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 996 &cmd); 997 998 if (status) { 999 device_printf(sc->dev, "Failed to set flow control mode\n"); 1000 return ENXIO; 1001 } 1002 sc->pause = pause; 1003 return 0; 1004 } 1005 1006 static void 1007 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1008 { 1009 mxge_cmd_t cmd; 1010 int status; 1011 1012 if (promisc) 1013 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1014 &cmd); 1015 else 1016 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1017 &cmd); 1018 1019 if (status) { 1020 device_printf(sc->dev, "Failed to set promisc mode\n"); 1021 } 1022 } 1023 1024 static void 1025 mxge_set_multicast_list(mxge_softc_t *sc) 1026 { 1027 mxge_cmd_t cmd; 1028 struct ifmultiaddr *ifma; 1029 struct ifnet *ifp = sc->ifp; 1030 int err; 1031 1032 /* This firmware is known to not support multicast */ 1033 if (!sc->fw_multicast_support) 1034 return; 1035 1036 /* Disable multicast filtering while we play with the lists*/ 1037 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1038 if (err != 0) { 1039 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1040 " error status: %d\n", err); 1041 return; 1042 } 1043 1044 if (sc->adopted_rx_filter_bug) 1045 return; 1046 1047 if (ifp->if_flags & IFF_ALLMULTI) 1048 /* request to disable multicast filtering, so quit here */ 1049 return; 1050 1051 /* Flush all the filters */ 1052 1053 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1054 if (err != 0) { 1055 device_printf(sc->dev, 1056 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1057 ", error status: %d\n", err); 1058 return; 1059 } 1060 1061 /* Walk the multicast list, and add each address */ 1062 1063 IF_ADDR_LOCK(ifp); 1064 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1065 if (ifma->ifma_addr->sa_family != AF_LINK) 1066 continue; 1067 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1068 &cmd.data0, 4); 1069 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1070 &cmd.data1, 2); 1071 cmd.data0 = htonl(cmd.data0); 1072 cmd.data1 = htonl(cmd.data1); 1073 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1074 if (err != 0) { 1075 device_printf(sc->dev, "Failed " 1076 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1077 "%d\t", err); 1078 /* abort, leaving multicast filtering off */ 1079 IF_ADDR_UNLOCK(ifp); 1080 return; 1081 } 1082 } 1083 IF_ADDR_UNLOCK(ifp); 1084 /* Enable multicast filtering */ 1085 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1086 if (err != 0) { 1087 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1088 ", error status: %d\n", err); 1089 } 1090 } 1091 1092 static int 1093 mxge_max_mtu(mxge_softc_t *sc) 1094 { 1095 mxge_cmd_t cmd; 1096 int status; 1097 1098 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1099 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1100 1101 /* try to set nbufs to see if it we can 1102 use virtually contiguous jumbos */ 1103 cmd.data0 = 0; 1104 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1105 &cmd); 1106 if (status == 0) 1107 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1108 1109 /* otherwise, we're limited to MJUMPAGESIZE */ 1110 return MJUMPAGESIZE - MXGEFW_PAD; 1111 } 1112 1113 static int 1114 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1115 { 1116 1117 mxge_cmd_t cmd; 1118 size_t bytes; 1119 int status; 1120 1121 /* try to send a reset command to the card to see if it 1122 is alive */ 1123 memset(&cmd, 0, sizeof (cmd)); 1124 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1125 if (status != 0) { 1126 device_printf(sc->dev, "failed reset\n"); 1127 return ENXIO; 1128 } 1129 1130 mxge_dummy_rdma(sc, 1); 1131 1132 if (interrupts_setup) { 1133 /* Now exchange information about interrupts */ 1134 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 1135 memset(sc->rx_done.entry, 0, bytes); 1136 cmd.data0 = (uint32_t)bytes; 1137 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1138 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1139 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1140 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1141 } 1142 1143 status |= mxge_send_cmd(sc, 1144 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1145 1146 1147 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1148 1149 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1150 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1151 1152 1153 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1154 &cmd); 1155 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1156 if (status != 0) { 1157 device_printf(sc->dev, "failed set interrupt parameters\n"); 1158 return status; 1159 } 1160 1161 1162 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1163 1164 1165 /* run a DMA benchmark */ 1166 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1167 1168 /* reset mcp/driver shared state back to 0 */ 1169 sc->rx_done.idx = 0; 1170 sc->rx_done.cnt = 0; 1171 sc->tx.req = 0; 1172 sc->tx.done = 0; 1173 sc->tx.pkt_done = 0; 1174 sc->tx.wake = 0; 1175 sc->tx_defrag = 0; 1176 sc->tx.stall = 0; 1177 sc->rx_big.cnt = 0; 1178 sc->rx_small.cnt = 0; 1179 sc->rdma_tags_available = 15; 1180 sc->fw_stats->valid = 0; 1181 sc->fw_stats->send_done_count = 0; 1182 sc->lro_bad_csum = 0; 1183 sc->lro_queued = 0; 1184 sc->lro_flushed = 0; 1185 status = mxge_update_mac_address(sc); 1186 mxge_change_promisc(sc, 0); 1187 mxge_change_pause(sc, sc->pause); 1188 mxge_set_multicast_list(sc); 1189 return status; 1190 } 1191 1192 static int 1193 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1194 { 1195 mxge_softc_t *sc; 1196 unsigned int intr_coal_delay; 1197 int err; 1198 1199 sc = arg1; 1200 intr_coal_delay = sc->intr_coal_delay; 1201 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1202 if (err != 0) { 1203 return err; 1204 } 1205 if (intr_coal_delay == sc->intr_coal_delay) 1206 return 0; 1207 1208 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1209 return EINVAL; 1210 1211 mtx_lock(&sc->driver_mtx); 1212 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1213 sc->intr_coal_delay = intr_coal_delay; 1214 1215 mtx_unlock(&sc->driver_mtx); 1216 return err; 1217 } 1218 1219 static int 1220 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1221 { 1222 mxge_softc_t *sc; 1223 unsigned int enabled; 1224 int err; 1225 1226 sc = arg1; 1227 enabled = sc->pause; 1228 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1229 if (err != 0) { 1230 return err; 1231 } 1232 if (enabled == sc->pause) 1233 return 0; 1234 1235 mtx_lock(&sc->driver_mtx); 1236 err = mxge_change_pause(sc, enabled); 1237 mtx_unlock(&sc->driver_mtx); 1238 return err; 1239 } 1240 1241 static int 1242 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1243 { 1244 struct ifnet *ifp; 1245 int err; 1246 1247 ifp = sc->ifp; 1248 if (lro_cnt == 0) 1249 ifp->if_capenable &= ~IFCAP_LRO; 1250 else 1251 ifp->if_capenable |= IFCAP_LRO; 1252 sc->lro_cnt = lro_cnt; 1253 callout_stop(&sc->co_hdl); 1254 mxge_close(sc); 1255 err = mxge_open(sc); 1256 if (err == 0) 1257 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 1258 return err; 1259 } 1260 1261 static int 1262 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1263 { 1264 mxge_softc_t *sc; 1265 unsigned int lro_cnt; 1266 int err; 1267 1268 sc = arg1; 1269 lro_cnt = sc->lro_cnt; 1270 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1271 if (err != 0) 1272 return err; 1273 1274 if (lro_cnt == sc->lro_cnt) 1275 return 0; 1276 1277 if (lro_cnt > 128) 1278 return EINVAL; 1279 1280 mtx_lock(&sc->driver_mtx); 1281 err = mxge_change_lro_locked(sc, lro_cnt); 1282 mtx_unlock(&sc->driver_mtx); 1283 return err; 1284 } 1285 1286 static int 1287 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1288 { 1289 int err; 1290 1291 if (arg1 == NULL) 1292 return EFAULT; 1293 arg2 = be32toh(*(int *)arg1); 1294 arg1 = NULL; 1295 err = sysctl_handle_int(oidp, arg1, arg2, req); 1296 1297 return err; 1298 } 1299 1300 static void 1301 mxge_add_sysctls(mxge_softc_t *sc) 1302 { 1303 struct sysctl_ctx_list *ctx; 1304 struct sysctl_oid_list *children; 1305 mcp_irq_data_t *fw; 1306 1307 ctx = device_get_sysctl_ctx(sc->dev); 1308 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1309 fw = sc->fw_stats; 1310 1311 /* random information */ 1312 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1313 "firmware_version", 1314 CTLFLAG_RD, &sc->fw_version, 1315 0, "firmware version"); 1316 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1317 "serial_number", 1318 CTLFLAG_RD, &sc->serial_number_string, 1319 0, "serial number"); 1320 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1321 "product_code", 1322 CTLFLAG_RD, &sc->product_code_string, 1323 0, "product_code"); 1324 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1325 "pcie_link_width", 1326 CTLFLAG_RD, &sc->link_width, 1327 0, "tx_boundary"); 1328 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1329 "tx_boundary", 1330 CTLFLAG_RD, &sc->tx.boundary, 1331 0, "tx_boundary"); 1332 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1333 "write_combine", 1334 CTLFLAG_RD, &sc->wc, 1335 0, "write combining PIO?"); 1336 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1337 "read_dma_MBs", 1338 CTLFLAG_RD, &sc->read_dma, 1339 0, "DMA Read speed in MB/s"); 1340 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1341 "write_dma_MBs", 1342 CTLFLAG_RD, &sc->write_dma, 1343 0, "DMA Write speed in MB/s"); 1344 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1345 "read_write_dma_MBs", 1346 CTLFLAG_RD, &sc->read_write_dma, 1347 0, "DMA concurrent Read/Write speed in MB/s"); 1348 1349 1350 /* performance related tunables */ 1351 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1352 "intr_coal_delay", 1353 CTLTYPE_INT|CTLFLAG_RW, sc, 1354 0, mxge_change_intr_coal, 1355 "I", "interrupt coalescing delay in usecs"); 1356 1357 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1358 "flow_control_enabled", 1359 CTLTYPE_INT|CTLFLAG_RW, sc, 1360 0, mxge_change_flow_control, 1361 "I", "interrupt coalescing delay in usecs"); 1362 1363 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1364 "deassert_wait", 1365 CTLFLAG_RW, &mxge_deassert_wait, 1366 0, "Wait for IRQ line to go low in ihandler"); 1367 1368 /* stats block from firmware is in network byte order. 1369 Need to swap it */ 1370 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1371 "link_up", 1372 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1373 0, mxge_handle_be32, 1374 "I", "link up"); 1375 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1376 "rdma_tags_available", 1377 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1378 0, mxge_handle_be32, 1379 "I", "rdma_tags_available"); 1380 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1381 "dropped_bad_crc32", 1382 CTLTYPE_INT|CTLFLAG_RD, 1383 &fw->dropped_bad_crc32, 1384 0, mxge_handle_be32, 1385 "I", "dropped_bad_crc32"); 1386 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1387 "dropped_bad_phy", 1388 CTLTYPE_INT|CTLFLAG_RD, 1389 &fw->dropped_bad_phy, 1390 0, mxge_handle_be32, 1391 "I", "dropped_bad_phy"); 1392 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1393 "dropped_link_error_or_filtered", 1394 CTLTYPE_INT|CTLFLAG_RD, 1395 &fw->dropped_link_error_or_filtered, 1396 0, mxge_handle_be32, 1397 "I", "dropped_link_error_or_filtered"); 1398 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1399 "dropped_link_overflow", 1400 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1401 0, mxge_handle_be32, 1402 "I", "dropped_link_overflow"); 1403 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1404 "dropped_multicast_filtered", 1405 CTLTYPE_INT|CTLFLAG_RD, 1406 &fw->dropped_multicast_filtered, 1407 0, mxge_handle_be32, 1408 "I", "dropped_multicast_filtered"); 1409 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1410 "dropped_no_big_buffer", 1411 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1412 0, mxge_handle_be32, 1413 "I", "dropped_no_big_buffer"); 1414 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1415 "dropped_no_small_buffer", 1416 CTLTYPE_INT|CTLFLAG_RD, 1417 &fw->dropped_no_small_buffer, 1418 0, mxge_handle_be32, 1419 "I", "dropped_no_small_buffer"); 1420 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1421 "dropped_overrun", 1422 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1423 0, mxge_handle_be32, 1424 "I", "dropped_overrun"); 1425 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1426 "dropped_pause", 1427 CTLTYPE_INT|CTLFLAG_RD, 1428 &fw->dropped_pause, 1429 0, mxge_handle_be32, 1430 "I", "dropped_pause"); 1431 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1432 "dropped_runt", 1433 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1434 0, mxge_handle_be32, 1435 "I", "dropped_runt"); 1436 1437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1438 "dropped_unicast_filtered", 1439 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1440 0, mxge_handle_be32, 1441 "I", "dropped_unicast_filtered"); 1442 1443 /* host counters exported for debugging */ 1444 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1445 "rx_small_cnt", 1446 CTLFLAG_RD, &sc->rx_small.cnt, 1447 0, "rx_small_cnt"); 1448 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1449 "rx_big_cnt", 1450 CTLFLAG_RD, &sc->rx_big.cnt, 1451 0, "rx_small_cnt"); 1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1453 "tx_req", 1454 CTLFLAG_RD, &sc->tx.req, 1455 0, "tx_req"); 1456 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1457 "tx_done", 1458 CTLFLAG_RD, &sc->tx.done, 1459 0, "tx_done"); 1460 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1461 "tx_pkt_done", 1462 CTLFLAG_RD, &sc->tx.pkt_done, 1463 0, "tx_done"); 1464 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1465 "tx_stall", 1466 CTLFLAG_RD, &sc->tx.stall, 1467 0, "tx_stall"); 1468 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1469 "tx_wake", 1470 CTLFLAG_RD, &sc->tx.wake, 1471 0, "tx_wake"); 1472 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1473 "tx_defrag", 1474 CTLFLAG_RD, &sc->tx_defrag, 1475 0, "tx_defrag"); 1476 1477 /* verbose printing? */ 1478 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1479 "verbose", 1480 CTLFLAG_RW, &mxge_verbose, 1481 0, "verbose printing"); 1482 1483 /* lro */ 1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1485 "lro_cnt", 1486 CTLTYPE_INT|CTLFLAG_RW, sc, 1487 0, mxge_change_lro, 1488 "I", "number of lro merge queues"); 1489 1490 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1491 "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, 1492 0, "number of lro merge queues flushed"); 1493 1494 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1495 "lro_queued", CTLFLAG_RD, &sc->lro_queued, 1496 0, "number of frames appended to lro merge queues"); 1497 1498 } 1499 1500 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1501 backwards one at a time and handle ring wraps */ 1502 1503 static inline void 1504 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1505 mcp_kreq_ether_send_t *src, int cnt) 1506 { 1507 int idx, starting_slot; 1508 starting_slot = tx->req; 1509 while (cnt > 1) { 1510 cnt--; 1511 idx = (starting_slot + cnt) & tx->mask; 1512 mxge_pio_copy(&tx->lanai[idx], 1513 &src[cnt], sizeof(*src)); 1514 mb(); 1515 } 1516 } 1517 1518 /* 1519 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1520 * at most 32 bytes at a time, so as to avoid involving the software 1521 * pio handler in the nic. We re-write the first segment's flags 1522 * to mark them valid only after writing the entire chain 1523 */ 1524 1525 static inline void 1526 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1527 int cnt) 1528 { 1529 int idx, i; 1530 uint32_t *src_ints; 1531 volatile uint32_t *dst_ints; 1532 mcp_kreq_ether_send_t *srcp; 1533 volatile mcp_kreq_ether_send_t *dstp, *dst; 1534 uint8_t last_flags; 1535 1536 idx = tx->req & tx->mask; 1537 1538 last_flags = src->flags; 1539 src->flags = 0; 1540 mb(); 1541 dst = dstp = &tx->lanai[idx]; 1542 srcp = src; 1543 1544 if ((idx + cnt) < tx->mask) { 1545 for (i = 0; i < (cnt - 1); i += 2) { 1546 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1547 mb(); /* force write every 32 bytes */ 1548 srcp += 2; 1549 dstp += 2; 1550 } 1551 } else { 1552 /* submit all but the first request, and ensure 1553 that it is submitted below */ 1554 mxge_submit_req_backwards(tx, src, cnt); 1555 i = 0; 1556 } 1557 if (i < cnt) { 1558 /* submit the first request */ 1559 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1560 mb(); /* barrier before setting valid flag */ 1561 } 1562 1563 /* re-write the last 32-bits with the valid flags */ 1564 src->flags = last_flags; 1565 src_ints = (uint32_t *)src; 1566 src_ints+=3; 1567 dst_ints = (volatile uint32_t *)dst; 1568 dst_ints+=3; 1569 *dst_ints = *src_ints; 1570 tx->req += cnt; 1571 mb(); 1572 } 1573 1574 static void 1575 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, 1576 int ip_off) 1577 { 1578 mxge_tx_buf_t *tx; 1579 mcp_kreq_ether_send_t *req; 1580 bus_dma_segment_t *seg; 1581 struct ip *ip; 1582 struct tcphdr *tcp; 1583 uint32_t low, high_swapped; 1584 int len, seglen, cum_len, cum_len_next; 1585 int next_is_first, chop, cnt, rdma_count, small; 1586 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1587 uint8_t flags, flags_next; 1588 static int once; 1589 1590 mss = m->m_pkthdr.tso_segsz; 1591 1592 /* negative cum_len signifies to the 1593 * send loop that we are still in the 1594 * header portion of the TSO packet. 1595 */ 1596 1597 /* ensure we have the ethernet, IP and TCP 1598 header together in the first mbuf, copy 1599 it to a scratch buffer if not */ 1600 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1601 m_copydata(m, 0, ip_off + sizeof (*ip), 1602 sc->scratch); 1603 ip = (struct ip *)(sc->scratch + ip_off); 1604 } else { 1605 ip = (struct ip *)(mtod(m, char *) + ip_off); 1606 } 1607 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1608 + sizeof (*tcp))) { 1609 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1610 + sizeof (*tcp), sc->scratch); 1611 ip = (struct ip *)(mtod(m, char *) + ip_off); 1612 } 1613 1614 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1615 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1616 1617 /* TSO implies checksum offload on this hardware */ 1618 cksum_offset = ip_off + (ip->ip_hl << 2); 1619 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1620 1621 1622 /* for TSO, pseudo_hdr_offset holds mss. 1623 * The firmware figures out where to put 1624 * the checksum by parsing the header. */ 1625 pseudo_hdr_offset = htobe16(mss); 1626 1627 tx = &sc->tx; 1628 req = tx->req_list; 1629 seg = tx->seg_list; 1630 cnt = 0; 1631 rdma_count = 0; 1632 /* "rdma_count" is the number of RDMAs belonging to the 1633 * current packet BEFORE the current send request. For 1634 * non-TSO packets, this is equal to "count". 1635 * For TSO packets, rdma_count needs to be reset 1636 * to 0 after a segment cut. 1637 * 1638 * The rdma_count field of the send request is 1639 * the number of RDMAs of the packet starting at 1640 * that request. For TSO send requests with one ore more cuts 1641 * in the middle, this is the number of RDMAs starting 1642 * after the last cut in the request. All previous 1643 * segments before the last cut implicitly have 1 RDMA. 1644 * 1645 * Since the number of RDMAs is not known beforehand, 1646 * it must be filled-in retroactively - after each 1647 * segmentation cut or at the end of the entire packet. 1648 */ 1649 1650 while (busdma_seg_cnt) { 1651 /* Break the busdma segment up into pieces*/ 1652 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1653 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1654 len = seg->ds_len; 1655 1656 while (len) { 1657 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1658 seglen = len; 1659 cum_len_next = cum_len + seglen; 1660 (req-rdma_count)->rdma_count = rdma_count + 1; 1661 if (__predict_true(cum_len >= 0)) { 1662 /* payload */ 1663 chop = (cum_len_next > mss); 1664 cum_len_next = cum_len_next % mss; 1665 next_is_first = (cum_len_next == 0); 1666 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1667 flags_next |= next_is_first * 1668 MXGEFW_FLAGS_FIRST; 1669 rdma_count |= -(chop | next_is_first); 1670 rdma_count += chop & !next_is_first; 1671 } else if (cum_len_next >= 0) { 1672 /* header ends */ 1673 rdma_count = -1; 1674 cum_len_next = 0; 1675 seglen = -cum_len; 1676 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1677 flags_next = MXGEFW_FLAGS_TSO_PLD | 1678 MXGEFW_FLAGS_FIRST | 1679 (small * MXGEFW_FLAGS_SMALL); 1680 } 1681 1682 req->addr_high = high_swapped; 1683 req->addr_low = htobe32(low); 1684 req->pseudo_hdr_offset = pseudo_hdr_offset; 1685 req->pad = 0; 1686 req->rdma_count = 1; 1687 req->length = htobe16(seglen); 1688 req->cksum_offset = cksum_offset; 1689 req->flags = flags | ((cum_len & 1) * 1690 MXGEFW_FLAGS_ALIGN_ODD); 1691 low += seglen; 1692 len -= seglen; 1693 cum_len = cum_len_next; 1694 flags = flags_next; 1695 req++; 1696 cnt++; 1697 rdma_count++; 1698 if (__predict_false(cksum_offset > seglen)) 1699 cksum_offset -= seglen; 1700 else 1701 cksum_offset = 0; 1702 if (__predict_false(cnt > tx->max_desc)) 1703 goto drop; 1704 } 1705 busdma_seg_cnt--; 1706 seg++; 1707 } 1708 (req-rdma_count)->rdma_count = rdma_count; 1709 1710 do { 1711 req--; 1712 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1713 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1714 1715 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1716 mxge_submit_req(tx, tx->req_list, cnt); 1717 return; 1718 1719 drop: 1720 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1721 m_freem(m); 1722 sc->ifp->if_oerrors++; 1723 if (!once) { 1724 printf("tx->max_desc exceeded via TSO!\n"); 1725 printf("mss = %d, %ld, %d!\n", mss, 1726 (long)seg - (long)tx->seg_list, tx->max_desc); 1727 once = 1; 1728 } 1729 return; 1730 1731 } 1732 1733 /* 1734 * We reproduce the software vlan tag insertion from 1735 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1736 * vlan tag insertion. We need to advertise this in order to have the 1737 * vlan interface respect our csum offload flags. 1738 */ 1739 static struct mbuf * 1740 mxge_vlan_tag_insert(struct mbuf *m) 1741 { 1742 struct ether_vlan_header *evl; 1743 1744 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1745 if (__predict_false(m == NULL)) 1746 return NULL; 1747 if (m->m_len < sizeof(*evl)) { 1748 m = m_pullup(m, sizeof(*evl)); 1749 if (__predict_false(m == NULL)) 1750 return NULL; 1751 } 1752 /* 1753 * Transform the Ethernet header into an Ethernet header 1754 * with 802.1Q encapsulation. 1755 */ 1756 evl = mtod(m, struct ether_vlan_header *); 1757 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1758 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1759 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1760 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1761 m->m_flags &= ~M_VLANTAG; 1762 return m; 1763 } 1764 1765 static void 1766 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1767 { 1768 mcp_kreq_ether_send_t *req; 1769 bus_dma_segment_t *seg; 1770 struct mbuf *m_tmp; 1771 struct ifnet *ifp; 1772 mxge_tx_buf_t *tx; 1773 struct ip *ip; 1774 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1775 uint16_t pseudo_hdr_offset; 1776 uint8_t flags, cksum_offset; 1777 1778 1779 1780 ifp = sc->ifp; 1781 tx = &sc->tx; 1782 1783 ip_off = sizeof (struct ether_header); 1784 if (m->m_flags & M_VLANTAG) { 1785 m = mxge_vlan_tag_insert(m); 1786 if (__predict_false(m == NULL)) 1787 goto drop; 1788 ip_off += ETHER_VLAN_ENCAP_LEN; 1789 } 1790 1791 /* (try to) map the frame for DMA */ 1792 idx = tx->req & tx->mask; 1793 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1794 m, tx->seg_list, &cnt, 1795 BUS_DMA_NOWAIT); 1796 if (__predict_false(err == EFBIG)) { 1797 /* Too many segments in the chain. Try 1798 to defrag */ 1799 m_tmp = m_defrag(m, M_NOWAIT); 1800 if (m_tmp == NULL) { 1801 goto drop; 1802 } 1803 sc->tx_defrag++; 1804 m = m_tmp; 1805 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1806 tx->info[idx].map, 1807 m, tx->seg_list, &cnt, 1808 BUS_DMA_NOWAIT); 1809 } 1810 if (__predict_false(err != 0)) { 1811 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1812 " packet len = %d\n", err, m->m_pkthdr.len); 1813 goto drop; 1814 } 1815 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1816 BUS_DMASYNC_PREWRITE); 1817 tx->info[idx].m = m; 1818 1819 1820 /* TSO is different enough, we handle it in another routine */ 1821 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1822 mxge_encap_tso(sc, m, cnt, ip_off); 1823 return; 1824 } 1825 1826 req = tx->req_list; 1827 cksum_offset = 0; 1828 pseudo_hdr_offset = 0; 1829 flags = MXGEFW_FLAGS_NO_TSO; 1830 1831 /* checksum offloading? */ 1832 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1833 /* ensure ip header is in first mbuf, copy 1834 it to a scratch buffer if not */ 1835 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1836 m_copydata(m, 0, ip_off + sizeof (*ip), 1837 sc->scratch); 1838 ip = (struct ip *)(sc->scratch + ip_off); 1839 } else { 1840 ip = (struct ip *)(mtod(m, char *) + ip_off); 1841 } 1842 cksum_offset = ip_off + (ip->ip_hl << 2); 1843 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1844 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1845 req->cksum_offset = cksum_offset; 1846 flags |= MXGEFW_FLAGS_CKSUM; 1847 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1848 } else { 1849 odd_flag = 0; 1850 } 1851 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1852 flags |= MXGEFW_FLAGS_SMALL; 1853 1854 /* convert segments into a request list */ 1855 cum_len = 0; 1856 seg = tx->seg_list; 1857 req->flags = MXGEFW_FLAGS_FIRST; 1858 for (i = 0; i < cnt; i++) { 1859 req->addr_low = 1860 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1861 req->addr_high = 1862 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1863 req->length = htobe16(seg->ds_len); 1864 req->cksum_offset = cksum_offset; 1865 if (cksum_offset > seg->ds_len) 1866 cksum_offset -= seg->ds_len; 1867 else 1868 cksum_offset = 0; 1869 req->pseudo_hdr_offset = pseudo_hdr_offset; 1870 req->pad = 0; /* complete solid 16-byte block */ 1871 req->rdma_count = 1; 1872 req->flags |= flags | ((cum_len & 1) * odd_flag); 1873 cum_len += seg->ds_len; 1874 seg++; 1875 req++; 1876 req->flags = 0; 1877 } 1878 req--; 1879 /* pad runts to 60 bytes */ 1880 if (cum_len < 60) { 1881 req++; 1882 req->addr_low = 1883 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1884 req->addr_high = 1885 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1886 req->length = htobe16(60 - cum_len); 1887 req->cksum_offset = 0; 1888 req->pseudo_hdr_offset = pseudo_hdr_offset; 1889 req->pad = 0; /* complete solid 16-byte block */ 1890 req->rdma_count = 1; 1891 req->flags |= flags | ((cum_len & 1) * odd_flag); 1892 cnt++; 1893 } 1894 1895 tx->req_list[0].rdma_count = cnt; 1896 #if 0 1897 /* print what the firmware will see */ 1898 for (i = 0; i < cnt; i++) { 1899 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1900 "cso:%d, flags:0x%x, rdma:%d\n", 1901 i, (int)ntohl(tx->req_list[i].addr_high), 1902 (int)ntohl(tx->req_list[i].addr_low), 1903 (int)ntohs(tx->req_list[i].length), 1904 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1905 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1906 tx->req_list[i].rdma_count); 1907 } 1908 printf("--------------\n"); 1909 #endif 1910 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1911 mxge_submit_req(tx, tx->req_list, cnt); 1912 return; 1913 1914 drop: 1915 m_freem(m); 1916 ifp->if_oerrors++; 1917 return; 1918 } 1919 1920 1921 1922 1923 static inline void 1924 mxge_start_locked(mxge_softc_t *sc) 1925 { 1926 struct mbuf *m; 1927 struct ifnet *ifp; 1928 mxge_tx_buf_t *tx; 1929 1930 ifp = sc->ifp; 1931 tx = &sc->tx; 1932 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 1933 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1934 if (m == NULL) { 1935 return; 1936 } 1937 /* let BPF see it */ 1938 BPF_MTAP(ifp, m); 1939 1940 /* give it to the nic */ 1941 mxge_encap(sc, m); 1942 } 1943 /* ran out of transmit slots */ 1944 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1945 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1946 tx->stall++; 1947 } 1948 } 1949 1950 static void 1951 mxge_start(struct ifnet *ifp) 1952 { 1953 mxge_softc_t *sc = ifp->if_softc; 1954 1955 1956 mtx_lock(&sc->tx_mtx); 1957 mxge_start_locked(sc); 1958 mtx_unlock(&sc->tx_mtx); 1959 } 1960 1961 /* 1962 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1963 * at most 32 bytes at a time, so as to avoid involving the software 1964 * pio handler in the nic. We re-write the first segment's low 1965 * DMA address to mark it valid only after we write the entire chunk 1966 * in a burst 1967 */ 1968 static inline void 1969 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1970 mcp_kreq_ether_recv_t *src) 1971 { 1972 uint32_t low; 1973 1974 low = src->addr_low; 1975 src->addr_low = 0xffffffff; 1976 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1977 mb(); 1978 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1979 mb(); 1980 src->addr_low = low; 1981 dst->addr_low = low; 1982 mb(); 1983 } 1984 1985 static int 1986 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1987 { 1988 bus_dma_segment_t seg; 1989 struct mbuf *m; 1990 mxge_rx_buf_t *rx = &sc->rx_small; 1991 int cnt, err; 1992 1993 m = m_gethdr(M_DONTWAIT, MT_DATA); 1994 if (m == NULL) { 1995 rx->alloc_fail++; 1996 err = ENOBUFS; 1997 goto done; 1998 } 1999 m->m_len = MHLEN; 2000 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2001 &seg, &cnt, BUS_DMA_NOWAIT); 2002 if (err != 0) { 2003 m_free(m); 2004 goto done; 2005 } 2006 rx->info[idx].m = m; 2007 rx->shadow[idx].addr_low = 2008 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2009 rx->shadow[idx].addr_high = 2010 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2011 2012 done: 2013 if ((idx & 7) == 7) 2014 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2015 return err; 2016 } 2017 2018 static int 2019 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 2020 { 2021 bus_dma_segment_t seg[3]; 2022 struct mbuf *m; 2023 mxge_rx_buf_t *rx = &sc->rx_big; 2024 int cnt, err, i; 2025 2026 if (rx->cl_size == MCLBYTES) 2027 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2028 else 2029 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2030 if (m == NULL) { 2031 rx->alloc_fail++; 2032 err = ENOBUFS; 2033 goto done; 2034 } 2035 m->m_len = rx->cl_size; 2036 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2037 seg, &cnt, BUS_DMA_NOWAIT); 2038 if (err != 0) { 2039 m_free(m); 2040 goto done; 2041 } 2042 rx->info[idx].m = m; 2043 2044 for (i = 0; i < cnt; i++) { 2045 rx->shadow[idx + i].addr_low = 2046 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2047 rx->shadow[idx + i].addr_high = 2048 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2049 } 2050 2051 2052 done: 2053 for (i = 0; i < rx->nbufs; i++) { 2054 if ((idx & 7) == 7) { 2055 mxge_submit_8rx(&rx->lanai[idx - 7], 2056 &rx->shadow[idx - 7]); 2057 } 2058 idx++; 2059 } 2060 return err; 2061 } 2062 2063 /* 2064 * Myri10GE hardware checksums are not valid if the sender 2065 * padded the frame with non-zero padding. This is because 2066 * the firmware just does a simple 16-bit 1s complement 2067 * checksum across the entire frame, excluding the first 14 2068 * bytes. It is best to simply to check the checksum and 2069 * tell the stack about it only if the checksum is good 2070 */ 2071 2072 static inline uint16_t 2073 mxge_rx_csum(struct mbuf *m, int csum) 2074 { 2075 struct ether_header *eh; 2076 struct ip *ip; 2077 uint16_t c; 2078 2079 eh = mtod(m, struct ether_header *); 2080 2081 /* only deal with IPv4 TCP & UDP for now */ 2082 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2083 return 1; 2084 ip = (struct ip *)(eh + 1); 2085 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2086 ip->ip_p != IPPROTO_UDP)) 2087 return 1; 2088 2089 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2090 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2091 - (ip->ip_hl << 2) + ip->ip_p)); 2092 c ^= 0xffff; 2093 return (c); 2094 } 2095 2096 static void 2097 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2098 { 2099 struct ether_vlan_header *evl; 2100 struct ether_header *eh; 2101 uint32_t partial; 2102 2103 evl = mtod(m, struct ether_vlan_header *); 2104 eh = mtod(m, struct ether_header *); 2105 2106 /* 2107 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2108 * after what the firmware thought was the end of the ethernet 2109 * header. 2110 */ 2111 2112 /* put checksum into host byte order */ 2113 *csum = ntohs(*csum); 2114 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2115 (*csum) += ~partial; 2116 (*csum) += ((*csum) < ~partial); 2117 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2118 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2119 2120 /* restore checksum to network byte order; 2121 later consumers expect this */ 2122 *csum = htons(*csum); 2123 2124 /* save the tag */ 2125 m->m_flags |= M_VLANTAG; 2126 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2127 2128 /* 2129 * Remove the 802.1q header by copying the Ethernet 2130 * addresses over it and adjusting the beginning of 2131 * the data in the mbuf. The encapsulated Ethernet 2132 * type field is already in place. 2133 */ 2134 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2135 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2136 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2137 } 2138 2139 2140 static inline void 2141 mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2142 { 2143 struct ifnet *ifp; 2144 struct mbuf *m; 2145 struct ether_header *eh; 2146 mxge_rx_buf_t *rx; 2147 bus_dmamap_t old_map; 2148 int idx; 2149 uint16_t tcpudp_csum; 2150 2151 ifp = sc->ifp; 2152 rx = &sc->rx_big; 2153 idx = rx->cnt & rx->mask; 2154 rx->cnt += rx->nbufs; 2155 /* save a pointer to the received mbuf */ 2156 m = rx->info[idx].m; 2157 /* try to replace the received mbuf */ 2158 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 2159 /* drop the frame -- the old mbuf is re-cycled */ 2160 ifp->if_ierrors++; 2161 return; 2162 } 2163 2164 /* unmap the received buffer */ 2165 old_map = rx->info[idx].map; 2166 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2167 bus_dmamap_unload(rx->dmat, old_map); 2168 2169 /* swap the bus_dmamap_t's */ 2170 rx->info[idx].map = rx->extra_map; 2171 rx->extra_map = old_map; 2172 2173 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2174 * aligned */ 2175 m->m_data += MXGEFW_PAD; 2176 2177 m->m_pkthdr.rcvif = ifp; 2178 m->m_len = m->m_pkthdr.len = len; 2179 ifp->if_ipackets++; 2180 eh = mtod(m, struct ether_header *); 2181 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2182 mxge_vlan_tag_remove(m, &csum); 2183 } 2184 /* if the checksum is valid, mark it in the mbuf header */ 2185 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2186 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2187 return; 2188 /* otherwise, it was a UDP frame, or a TCP frame which 2189 we could not do LRO on. Tell the stack that the 2190 checksum is good */ 2191 m->m_pkthdr.csum_data = 0xffff; 2192 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2193 } 2194 /* pass the frame up the stack */ 2195 (*ifp->if_input)(ifp, m); 2196 } 2197 2198 static inline void 2199 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2200 { 2201 struct ifnet *ifp; 2202 struct ether_header *eh; 2203 struct mbuf *m; 2204 mxge_rx_buf_t *rx; 2205 bus_dmamap_t old_map; 2206 int idx; 2207 uint16_t tcpudp_csum; 2208 2209 ifp = sc->ifp; 2210 rx = &sc->rx_small; 2211 idx = rx->cnt & rx->mask; 2212 rx->cnt++; 2213 /* save a pointer to the received mbuf */ 2214 m = rx->info[idx].m; 2215 /* try to replace the received mbuf */ 2216 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2217 /* drop the frame -- the old mbuf is re-cycled */ 2218 ifp->if_ierrors++; 2219 return; 2220 } 2221 2222 /* unmap the received buffer */ 2223 old_map = rx->info[idx].map; 2224 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2225 bus_dmamap_unload(rx->dmat, old_map); 2226 2227 /* swap the bus_dmamap_t's */ 2228 rx->info[idx].map = rx->extra_map; 2229 rx->extra_map = old_map; 2230 2231 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2232 * aligned */ 2233 m->m_data += MXGEFW_PAD; 2234 2235 m->m_pkthdr.rcvif = ifp; 2236 m->m_len = m->m_pkthdr.len = len; 2237 ifp->if_ipackets++; 2238 eh = mtod(m, struct ether_header *); 2239 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2240 mxge_vlan_tag_remove(m, &csum); 2241 } 2242 /* if the checksum is valid, mark it in the mbuf header */ 2243 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2244 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2245 return; 2246 /* otherwise, it was a UDP frame, or a TCP frame which 2247 we could not do LRO on. Tell the stack that the 2248 checksum is good */ 2249 m->m_pkthdr.csum_data = 0xffff; 2250 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2251 } 2252 2253 /* pass the frame up the stack */ 2254 (*ifp->if_input)(ifp, m); 2255 } 2256 2257 static inline void 2258 mxge_clean_rx_done(mxge_softc_t *sc) 2259 { 2260 mxge_rx_done_t *rx_done = &sc->rx_done; 2261 struct lro_entry *lro; 2262 int limit = 0; 2263 uint16_t length; 2264 uint16_t checksum; 2265 2266 2267 while (rx_done->entry[rx_done->idx].length != 0) { 2268 length = ntohs(rx_done->entry[rx_done->idx].length); 2269 rx_done->entry[rx_done->idx].length = 0; 2270 checksum = rx_done->entry[rx_done->idx].checksum; 2271 if (length <= (MHLEN - MXGEFW_PAD)) 2272 mxge_rx_done_small(sc, length, checksum); 2273 else 2274 mxge_rx_done_big(sc, length, checksum); 2275 rx_done->cnt++; 2276 rx_done->idx = rx_done->cnt & rx_done->mask; 2277 2278 /* limit potential for livelock */ 2279 if (__predict_false(++limit > 2 * rx_done->mask)) 2280 break; 2281 } 2282 while(!SLIST_EMPTY(&sc->lro_active)) { 2283 lro = SLIST_FIRST(&sc->lro_active); 2284 SLIST_REMOVE_HEAD(&sc->lro_active, next); 2285 mxge_lro_flush(sc, lro); 2286 } 2287 } 2288 2289 2290 static inline void 2291 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2292 { 2293 struct ifnet *ifp; 2294 mxge_tx_buf_t *tx; 2295 struct mbuf *m; 2296 bus_dmamap_t map; 2297 int idx, limit; 2298 2299 limit = 0; 2300 tx = &sc->tx; 2301 ifp = sc->ifp; 2302 while (tx->pkt_done != mcp_idx) { 2303 idx = tx->done & tx->mask; 2304 tx->done++; 2305 m = tx->info[idx].m; 2306 /* mbuf and DMA map only attached to the first 2307 segment per-mbuf */ 2308 if (m != NULL) { 2309 ifp->if_opackets++; 2310 tx->info[idx].m = NULL; 2311 map = tx->info[idx].map; 2312 bus_dmamap_unload(tx->dmat, map); 2313 m_freem(m); 2314 } 2315 if (tx->info[idx].flag) { 2316 tx->info[idx].flag = 0; 2317 tx->pkt_done++; 2318 } 2319 /* limit potential for livelock by only handling 2320 2 full tx rings per call */ 2321 if (__predict_false(++limit > 2 * tx->mask)) 2322 break; 2323 } 2324 2325 /* If we have space, clear IFF_OACTIVE to tell the stack that 2326 its OK to send packets */ 2327 2328 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2329 tx->req - tx->done < (tx->mask + 1)/4) { 2330 mtx_lock(&sc->tx_mtx); 2331 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2332 sc->tx.wake++; 2333 mxge_start_locked(sc); 2334 mtx_unlock(&sc->tx_mtx); 2335 } 2336 } 2337 2338 static void 2339 mxge_intr(void *arg) 2340 { 2341 mxge_softc_t *sc = arg; 2342 mcp_irq_data_t *stats = sc->fw_stats; 2343 mxge_tx_buf_t *tx = &sc->tx; 2344 mxge_rx_done_t *rx_done = &sc->rx_done; 2345 uint32_t send_done_count; 2346 uint8_t valid; 2347 2348 2349 /* make sure the DMA has finished */ 2350 if (!stats->valid) { 2351 return; 2352 } 2353 valid = stats->valid; 2354 2355 if (!sc->msi_enabled) { 2356 /* lower legacy IRQ */ 2357 *sc->irq_deassert = 0; 2358 if (!mxge_deassert_wait) 2359 /* don't wait for conf. that irq is low */ 2360 stats->valid = 0; 2361 } else { 2362 stats->valid = 0; 2363 } 2364 2365 /* loop while waiting for legacy irq deassertion */ 2366 do { 2367 /* check for transmit completes and receives */ 2368 send_done_count = be32toh(stats->send_done_count); 2369 while ((send_done_count != tx->pkt_done) || 2370 (rx_done->entry[rx_done->idx].length != 0)) { 2371 mxge_tx_done(sc, (int)send_done_count); 2372 mxge_clean_rx_done(sc); 2373 send_done_count = be32toh(stats->send_done_count); 2374 } 2375 } while (*((volatile uint8_t *) &stats->valid)); 2376 2377 if (__predict_false(stats->stats_updated)) { 2378 if (sc->link_state != stats->link_up) { 2379 sc->link_state = stats->link_up; 2380 if (sc->link_state) { 2381 if_link_state_change(sc->ifp, LINK_STATE_UP); 2382 if (mxge_verbose) 2383 device_printf(sc->dev, "link up\n"); 2384 } else { 2385 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2386 if (mxge_verbose) 2387 device_printf(sc->dev, "link down\n"); 2388 } 2389 } 2390 if (sc->rdma_tags_available != 2391 be32toh(sc->fw_stats->rdma_tags_available)) { 2392 sc->rdma_tags_available = 2393 be32toh(sc->fw_stats->rdma_tags_available); 2394 device_printf(sc->dev, "RDMA timed out! %d tags " 2395 "left\n", sc->rdma_tags_available); 2396 } 2397 sc->down_cnt += stats->link_down; 2398 } 2399 2400 /* check to see if we have rx token to pass back */ 2401 if (valid & 0x1) 2402 *sc->irq_claim = be32toh(3); 2403 *(sc->irq_claim + 1) = be32toh(3); 2404 } 2405 2406 static void 2407 mxge_init(void *arg) 2408 { 2409 } 2410 2411 2412 2413 static void 2414 mxge_free_mbufs(mxge_softc_t *sc) 2415 { 2416 int i; 2417 2418 for (i = 0; i <= sc->rx_big.mask; i++) { 2419 if (sc->rx_big.info[i].m == NULL) 2420 continue; 2421 bus_dmamap_unload(sc->rx_big.dmat, 2422 sc->rx_big.info[i].map); 2423 m_freem(sc->rx_big.info[i].m); 2424 sc->rx_big.info[i].m = NULL; 2425 } 2426 2427 for (i = 0; i <= sc->rx_small.mask; i++) { 2428 if (sc->rx_small.info[i].m == NULL) 2429 continue; 2430 bus_dmamap_unload(sc->rx_small.dmat, 2431 sc->rx_small.info[i].map); 2432 m_freem(sc->rx_small.info[i].m); 2433 sc->rx_small.info[i].m = NULL; 2434 } 2435 2436 for (i = 0; i <= sc->tx.mask; i++) { 2437 sc->tx.info[i].flag = 0; 2438 if (sc->tx.info[i].m == NULL) 2439 continue; 2440 bus_dmamap_unload(sc->tx.dmat, 2441 sc->tx.info[i].map); 2442 m_freem(sc->tx.info[i].m); 2443 sc->tx.info[i].m = NULL; 2444 } 2445 } 2446 2447 static void 2448 mxge_free_rings(mxge_softc_t *sc) 2449 { 2450 int i; 2451 2452 if (sc->rx_done.entry != NULL) 2453 mxge_dma_free(&sc->rx_done.dma); 2454 sc->rx_done.entry = NULL; 2455 if (sc->tx.req_bytes != NULL) 2456 free(sc->tx.req_bytes, M_DEVBUF); 2457 if (sc->tx.seg_list != NULL) 2458 free(sc->tx.seg_list, M_DEVBUF); 2459 if (sc->rx_small.shadow != NULL) 2460 free(sc->rx_small.shadow, M_DEVBUF); 2461 if (sc->rx_big.shadow != NULL) 2462 free(sc->rx_big.shadow, M_DEVBUF); 2463 if (sc->tx.info != NULL) { 2464 if (sc->tx.dmat != NULL) { 2465 for (i = 0; i <= sc->tx.mask; i++) { 2466 bus_dmamap_destroy(sc->tx.dmat, 2467 sc->tx.info[i].map); 2468 } 2469 bus_dma_tag_destroy(sc->tx.dmat); 2470 } 2471 free(sc->tx.info, M_DEVBUF); 2472 } 2473 if (sc->rx_small.info != NULL) { 2474 if (sc->rx_small.dmat != NULL) { 2475 for (i = 0; i <= sc->rx_small.mask; i++) { 2476 bus_dmamap_destroy(sc->rx_small.dmat, 2477 sc->rx_small.info[i].map); 2478 } 2479 bus_dmamap_destroy(sc->rx_small.dmat, 2480 sc->rx_small.extra_map); 2481 bus_dma_tag_destroy(sc->rx_small.dmat); 2482 } 2483 free(sc->rx_small.info, M_DEVBUF); 2484 } 2485 if (sc->rx_big.info != NULL) { 2486 if (sc->rx_big.dmat != NULL) { 2487 for (i = 0; i <= sc->rx_big.mask; i++) { 2488 bus_dmamap_destroy(sc->rx_big.dmat, 2489 sc->rx_big.info[i].map); 2490 } 2491 bus_dmamap_destroy(sc->rx_big.dmat, 2492 sc->rx_big.extra_map); 2493 bus_dma_tag_destroy(sc->rx_big.dmat); 2494 } 2495 free(sc->rx_big.info, M_DEVBUF); 2496 } 2497 } 2498 2499 static int 2500 mxge_alloc_rings(mxge_softc_t *sc) 2501 { 2502 mxge_cmd_t cmd; 2503 int tx_ring_size, rx_ring_size; 2504 int tx_ring_entries, rx_ring_entries; 2505 int i, err; 2506 unsigned long bytes; 2507 2508 /* get ring sizes */ 2509 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2510 tx_ring_size = cmd.data0; 2511 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2512 if (err != 0) { 2513 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2514 goto abort_with_nothing; 2515 } 2516 2517 rx_ring_size = cmd.data0; 2518 2519 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2520 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2521 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2522 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2523 IFQ_SET_READY(&sc->ifp->if_snd); 2524 2525 sc->tx.mask = tx_ring_entries - 1; 2526 sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2527 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2528 sc->rx_done.mask = (2 * rx_ring_entries) - 1; 2529 2530 err = ENOMEM; 2531 2532 /* allocate interrupt queues */ 2533 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 2534 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2535 if (err != 0) 2536 goto abort_with_nothing; 2537 sc->rx_done.entry = sc->rx_done.dma.addr; 2538 bzero(sc->rx_done.entry, bytes); 2539 2540 /* allocate the tx request copy block */ 2541 bytes = 8 + 2542 sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); 2543 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2544 if (sc->tx.req_bytes == NULL) 2545 goto abort_with_alloc; 2546 /* ensure req_list entries are aligned to 8 bytes */ 2547 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2548 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2549 2550 /* allocate the tx busdma segment list */ 2551 bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; 2552 sc->tx.seg_list = (bus_dma_segment_t *) 2553 malloc(bytes, M_DEVBUF, M_WAITOK); 2554 if (sc->tx.seg_list == NULL) 2555 goto abort_with_alloc; 2556 2557 /* allocate the rx shadow rings */ 2558 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2559 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2560 if (sc->rx_small.shadow == NULL) 2561 goto abort_with_alloc; 2562 2563 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2564 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2565 if (sc->rx_big.shadow == NULL) 2566 goto abort_with_alloc; 2567 2568 /* allocate the host info rings */ 2569 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2570 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2571 if (sc->tx.info == NULL) 2572 goto abort_with_alloc; 2573 2574 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2575 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2576 if (sc->rx_small.info == NULL) 2577 goto abort_with_alloc; 2578 2579 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2580 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2581 if (sc->rx_big.info == NULL) 2582 goto abort_with_alloc; 2583 2584 /* allocate the busdma resources */ 2585 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2586 1, /* alignment */ 2587 sc->tx.boundary, /* boundary */ 2588 BUS_SPACE_MAXADDR, /* low */ 2589 BUS_SPACE_MAXADDR, /* high */ 2590 NULL, NULL, /* filter */ 2591 65536 + 256, /* maxsize */ 2592 sc->tx.max_desc - 2, /* num segs */ 2593 sc->tx.boundary, /* maxsegsize */ 2594 BUS_DMA_ALLOCNOW, /* flags */ 2595 NULL, NULL, /* lock */ 2596 &sc->tx.dmat); /* tag */ 2597 2598 if (err != 0) { 2599 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2600 err); 2601 goto abort_with_alloc; 2602 } 2603 2604 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2605 1, /* alignment */ 2606 4096, /* boundary */ 2607 BUS_SPACE_MAXADDR, /* low */ 2608 BUS_SPACE_MAXADDR, /* high */ 2609 NULL, NULL, /* filter */ 2610 MHLEN, /* maxsize */ 2611 1, /* num segs */ 2612 MHLEN, /* maxsegsize */ 2613 BUS_DMA_ALLOCNOW, /* flags */ 2614 NULL, NULL, /* lock */ 2615 &sc->rx_small.dmat); /* tag */ 2616 if (err != 0) { 2617 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2618 err); 2619 goto abort_with_alloc; 2620 } 2621 2622 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2623 1, /* alignment */ 2624 4096, /* boundary */ 2625 BUS_SPACE_MAXADDR, /* low */ 2626 BUS_SPACE_MAXADDR, /* high */ 2627 NULL, NULL, /* filter */ 2628 3*4096, /* maxsize */ 2629 3, /* num segs */ 2630 4096, /* maxsegsize */ 2631 BUS_DMA_ALLOCNOW, /* flags */ 2632 NULL, NULL, /* lock */ 2633 &sc->rx_big.dmat); /* tag */ 2634 if (err != 0) { 2635 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2636 err); 2637 goto abort_with_alloc; 2638 } 2639 2640 /* now use these tags to setup dmamaps for each slot 2641 in each ring */ 2642 for (i = 0; i <= sc->tx.mask; i++) { 2643 err = bus_dmamap_create(sc->tx.dmat, 0, 2644 &sc->tx.info[i].map); 2645 if (err != 0) { 2646 device_printf(sc->dev, "Err %d tx dmamap\n", 2647 err); 2648 goto abort_with_alloc; 2649 } 2650 } 2651 for (i = 0; i <= sc->rx_small.mask; i++) { 2652 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2653 &sc->rx_small.info[i].map); 2654 if (err != 0) { 2655 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2656 err); 2657 goto abort_with_alloc; 2658 } 2659 } 2660 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2661 &sc->rx_small.extra_map); 2662 if (err != 0) { 2663 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2664 err); 2665 goto abort_with_alloc; 2666 } 2667 2668 for (i = 0; i <= sc->rx_big.mask; i++) { 2669 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2670 &sc->rx_big.info[i].map); 2671 if (err != 0) { 2672 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2673 err); 2674 goto abort_with_alloc; 2675 } 2676 } 2677 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2678 &sc->rx_big.extra_map); 2679 if (err != 0) { 2680 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2681 err); 2682 goto abort_with_alloc; 2683 } 2684 return 0; 2685 2686 abort_with_alloc: 2687 mxge_free_rings(sc); 2688 2689 abort_with_nothing: 2690 return err; 2691 } 2692 2693 static void 2694 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 2695 { 2696 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 2697 2698 if (bufsize < MCLBYTES) { 2699 /* easy, everything fits in a single buffer */ 2700 *big_buf_size = MCLBYTES; 2701 *cl_size = MCLBYTES; 2702 *nbufs = 1; 2703 return; 2704 } 2705 2706 if (bufsize < MJUMPAGESIZE) { 2707 /* still easy, everything still fits in a single buffer */ 2708 *big_buf_size = MJUMPAGESIZE; 2709 *cl_size = MJUMPAGESIZE; 2710 *nbufs = 1; 2711 return; 2712 } 2713 /* now we need to use virtually contiguous buffers */ 2714 *cl_size = MJUM9BYTES; 2715 *big_buf_size = 4096; 2716 *nbufs = mtu / 4096 + 1; 2717 /* needs to be a power of two, so round up */ 2718 if (*nbufs == 3) 2719 *nbufs = 4; 2720 } 2721 2722 static int 2723 mxge_open(mxge_softc_t *sc) 2724 { 2725 mxge_cmd_t cmd; 2726 int i, err, big_bytes; 2727 bus_dmamap_t map; 2728 bus_addr_t bus; 2729 struct lro_entry *lro_entry; 2730 2731 SLIST_INIT(&sc->lro_free); 2732 SLIST_INIT(&sc->lro_active); 2733 2734 for (i = 0; i < sc->lro_cnt; i++) { 2735 lro_entry = (struct lro_entry *) 2736 malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); 2737 if (lro_entry == NULL) { 2738 sc->lro_cnt = i; 2739 break; 2740 } 2741 SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); 2742 } 2743 2744 /* Copy the MAC address in case it was overridden */ 2745 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2746 2747 err = mxge_reset(sc, 1); 2748 if (err != 0) { 2749 device_printf(sc->dev, "failed to reset\n"); 2750 return EIO; 2751 } 2752 2753 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, 2754 &sc->rx_big.cl_size, &sc->rx_big.nbufs); 2755 2756 cmd.data0 = sc->rx_big.nbufs; 2757 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 2758 &cmd); 2759 /* error is only meaningful if we're trying to set 2760 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 2761 if (err && sc->rx_big.nbufs > 1) { 2762 device_printf(sc->dev, 2763 "Failed to set alway-use-n to %d\n", 2764 sc->rx_big.nbufs); 2765 return EIO; 2766 } 2767 /* get the lanai pointers to the send and receive rings */ 2768 2769 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2770 sc->tx.lanai = 2771 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2772 err |= mxge_send_cmd(sc, 2773 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2774 sc->rx_small.lanai = 2775 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2776 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2777 sc->rx_big.lanai = 2778 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2779 2780 if (err != 0) { 2781 device_printf(sc->dev, 2782 "failed to get ring sizes or locations\n"); 2783 return EIO; 2784 } 2785 2786 /* stock receive rings */ 2787 for (i = 0; i <= sc->rx_small.mask; i++) { 2788 map = sc->rx_small.info[i].map; 2789 err = mxge_get_buf_small(sc, map, i); 2790 if (err) { 2791 device_printf(sc->dev, "alloced %d/%d smalls\n", 2792 i, sc->rx_small.mask + 1); 2793 goto abort; 2794 } 2795 } 2796 for (i = 0; i <= sc->rx_big.mask; i++) { 2797 sc->rx_big.shadow[i].addr_low = 0xffffffff; 2798 sc->rx_big.shadow[i].addr_high = 0xffffffff; 2799 } 2800 for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { 2801 map = sc->rx_big.info[i].map; 2802 err = mxge_get_buf_big(sc, map, i); 2803 if (err) { 2804 device_printf(sc->dev, "alloced %d/%d bigs\n", 2805 i, sc->rx_big.mask + 1); 2806 goto abort; 2807 } 2808 } 2809 2810 /* Give the firmware the mtu and the big and small buffer 2811 sizes. The firmware wants the big buf size to be a power 2812 of two. Luckily, FreeBSD's clusters are powers of two */ 2813 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2814 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2815 cmd.data0 = MHLEN - MXGEFW_PAD; 2816 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2817 &cmd); 2818 cmd.data0 = big_bytes; 2819 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2820 2821 if (err != 0) { 2822 device_printf(sc->dev, "failed to setup params\n"); 2823 goto abort; 2824 } 2825 2826 /* Now give him the pointer to the stats block */ 2827 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2828 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2829 cmd.data2 = sizeof(struct mcp_irq_data); 2830 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2831 2832 if (err != 0) { 2833 bus = sc->fw_stats_dma.bus_addr; 2834 bus += offsetof(struct mcp_irq_data, send_done_count); 2835 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2836 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2837 err = mxge_send_cmd(sc, 2838 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2839 &cmd); 2840 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2841 sc->fw_multicast_support = 0; 2842 } else { 2843 sc->fw_multicast_support = 1; 2844 } 2845 2846 if (err != 0) { 2847 device_printf(sc->dev, "failed to setup params\n"); 2848 goto abort; 2849 } 2850 2851 /* Finally, start the firmware running */ 2852 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2853 if (err) { 2854 device_printf(sc->dev, "Couldn't bring up link\n"); 2855 goto abort; 2856 } 2857 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2858 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2859 2860 return 0; 2861 2862 2863 abort: 2864 mxge_free_mbufs(sc); 2865 2866 return err; 2867 } 2868 2869 static int 2870 mxge_close(mxge_softc_t *sc) 2871 { 2872 struct lro_entry *lro_entry; 2873 mxge_cmd_t cmd; 2874 int err, old_down_cnt; 2875 2876 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2877 old_down_cnt = sc->down_cnt; 2878 mb(); 2879 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2880 if (err) { 2881 device_printf(sc->dev, "Couldn't bring down link\n"); 2882 } 2883 if (old_down_cnt == sc->down_cnt) { 2884 /* wait for down irq */ 2885 DELAY(10 * sc->intr_coal_delay); 2886 } 2887 if (old_down_cnt == sc->down_cnt) { 2888 device_printf(sc->dev, "never got down irq\n"); 2889 } 2890 2891 mxge_free_mbufs(sc); 2892 2893 while (!SLIST_EMPTY(&sc->lro_free)) { 2894 lro_entry = SLIST_FIRST(&sc->lro_free); 2895 SLIST_REMOVE_HEAD(&sc->lro_free, next); 2896 } 2897 return 0; 2898 } 2899 2900 static void 2901 mxge_setup_cfg_space(mxge_softc_t *sc) 2902 { 2903 device_t dev = sc->dev; 2904 int reg; 2905 uint16_t cmd, lnk, pectl; 2906 2907 /* find the PCIe link width and set max read request to 4KB*/ 2908 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2909 lnk = pci_read_config(dev, reg + 0x12, 2); 2910 sc->link_width = (lnk >> 4) & 0x3f; 2911 2912 pectl = pci_read_config(dev, reg + 0x8, 2); 2913 pectl = (pectl & ~0x7000) | (5 << 12); 2914 pci_write_config(dev, reg + 0x8, pectl, 2); 2915 } 2916 2917 /* Enable DMA and Memory space access */ 2918 pci_enable_busmaster(dev); 2919 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2920 cmd |= PCIM_CMD_MEMEN; 2921 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2922 } 2923 2924 static uint32_t 2925 mxge_read_reboot(mxge_softc_t *sc) 2926 { 2927 device_t dev = sc->dev; 2928 uint32_t vs; 2929 2930 /* find the vendor specific offset */ 2931 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2932 device_printf(sc->dev, 2933 "could not find vendor specific offset\n"); 2934 return (uint32_t)-1; 2935 } 2936 /* enable read32 mode */ 2937 pci_write_config(dev, vs + 0x10, 0x3, 1); 2938 /* tell NIC which register to read */ 2939 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2940 return (pci_read_config(dev, vs + 0x14, 4)); 2941 } 2942 2943 static void 2944 mxge_watchdog_reset(mxge_softc_t *sc) 2945 { 2946 int err; 2947 uint32_t reboot; 2948 uint16_t cmd; 2949 2950 err = ENXIO; 2951 2952 device_printf(sc->dev, "Watchdog reset!\n"); 2953 2954 /* 2955 * check to see if the NIC rebooted. If it did, then all of 2956 * PCI config space has been reset, and things like the 2957 * busmaster bit will be zero. If this is the case, then we 2958 * must restore PCI config space before the NIC can be used 2959 * again 2960 */ 2961 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2962 if (cmd == 0xffff) { 2963 /* 2964 * maybe the watchdog caught the NIC rebooting; wait 2965 * up to 100ms for it to finish. If it does not come 2966 * back, then give up 2967 */ 2968 DELAY(1000*100); 2969 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2970 if (cmd == 0xffff) { 2971 device_printf(sc->dev, "NIC disappeared!\n"); 2972 goto abort; 2973 } 2974 } 2975 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2976 /* print the reboot status */ 2977 reboot = mxge_read_reboot(sc); 2978 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2979 reboot); 2980 /* restore PCI configuration space */ 2981 2982 /* XXXX waiting for pci_cfg_restore() to be exported */ 2983 goto abort; /* just abort for now */ 2984 2985 /* and redo any changes we made to our config space */ 2986 mxge_setup_cfg_space(sc); 2987 } else { 2988 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2989 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2990 sc->tx.req, sc->tx.done); 2991 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2992 sc->tx.pkt_done, 2993 be32toh(sc->fw_stats->send_done_count)); 2994 } 2995 2996 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2997 mxge_close(sc); 2998 err = mxge_open(sc); 2999 } 3000 3001 abort: 3002 /* 3003 * stop the watchdog if the nic is dead, to avoid spamming the 3004 * console 3005 */ 3006 if (err != 0) { 3007 callout_stop(&sc->co_hdl); 3008 } 3009 } 3010 3011 static void 3012 mxge_watchdog(mxge_softc_t *sc) 3013 { 3014 mxge_tx_buf_t *tx = &sc->tx; 3015 3016 /* see if we have outstanding transmits, which 3017 have been pending for more than mxge_ticks */ 3018 if (tx->req != tx->done && 3019 tx->watchdog_req != tx->watchdog_done && 3020 tx->done == tx->watchdog_done) 3021 mxge_watchdog_reset(sc); 3022 3023 tx->watchdog_req = tx->req; 3024 tx->watchdog_done = tx->done; 3025 } 3026 3027 static void 3028 mxge_tick(void *arg) 3029 { 3030 mxge_softc_t *sc = arg; 3031 3032 3033 /* Synchronize with possible callout reset/stop. */ 3034 if (callout_pending(&sc->co_hdl) || 3035 !callout_active(&sc->co_hdl)) { 3036 mtx_unlock(&sc->driver_mtx); 3037 return; 3038 } 3039 3040 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3041 mxge_watchdog(sc); 3042 } 3043 3044 static int 3045 mxge_media_change(struct ifnet *ifp) 3046 { 3047 return EINVAL; 3048 } 3049 3050 static int 3051 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3052 { 3053 struct ifnet *ifp = sc->ifp; 3054 int real_mtu, old_mtu; 3055 int err = 0; 3056 3057 3058 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3059 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3060 return EINVAL; 3061 mtx_lock(&sc->driver_mtx); 3062 old_mtu = ifp->if_mtu; 3063 ifp->if_mtu = mtu; 3064 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3065 callout_stop(&sc->co_hdl); 3066 mxge_close(sc); 3067 err = mxge_open(sc); 3068 if (err != 0) { 3069 ifp->if_mtu = old_mtu; 3070 mxge_close(sc); 3071 (void) mxge_open(sc); 3072 } 3073 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3074 } 3075 mtx_unlock(&sc->driver_mtx); 3076 return err; 3077 } 3078 3079 static void 3080 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3081 { 3082 mxge_softc_t *sc = ifp->if_softc; 3083 3084 3085 if (sc == NULL) 3086 return; 3087 ifmr->ifm_status = IFM_AVALID; 3088 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 3089 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3090 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 3091 } 3092 3093 static int 3094 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3095 { 3096 mxge_softc_t *sc = ifp->if_softc; 3097 struct ifreq *ifr = (struct ifreq *)data; 3098 int err, mask; 3099 3100 err = 0; 3101 switch (command) { 3102 case SIOCSIFADDR: 3103 case SIOCGIFADDR: 3104 err = ether_ioctl(ifp, command, data); 3105 break; 3106 3107 case SIOCSIFMTU: 3108 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3109 break; 3110 3111 case SIOCSIFFLAGS: 3112 mtx_lock(&sc->driver_mtx); 3113 if (ifp->if_flags & IFF_UP) { 3114 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3115 err = mxge_open(sc); 3116 callout_reset(&sc->co_hdl, mxge_ticks, 3117 mxge_tick, sc); 3118 } else { 3119 /* take care of promis can allmulti 3120 flag chages */ 3121 mxge_change_promisc(sc, 3122 ifp->if_flags & IFF_PROMISC); 3123 mxge_set_multicast_list(sc); 3124 } 3125 } else { 3126 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3127 mxge_close(sc); 3128 callout_stop(&sc->co_hdl); 3129 } 3130 } 3131 mtx_unlock(&sc->driver_mtx); 3132 break; 3133 3134 case SIOCADDMULTI: 3135 case SIOCDELMULTI: 3136 mtx_lock(&sc->driver_mtx); 3137 mxge_set_multicast_list(sc); 3138 mtx_unlock(&sc->driver_mtx); 3139 break; 3140 3141 case SIOCSIFCAP: 3142 mtx_lock(&sc->driver_mtx); 3143 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3144 if (mask & IFCAP_TXCSUM) { 3145 if (IFCAP_TXCSUM & ifp->if_capenable) { 3146 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3147 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3148 | CSUM_TSO); 3149 } else { 3150 ifp->if_capenable |= IFCAP_TXCSUM; 3151 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3152 } 3153 } else if (mask & IFCAP_RXCSUM) { 3154 if (IFCAP_RXCSUM & ifp->if_capenable) { 3155 ifp->if_capenable &= ~IFCAP_RXCSUM; 3156 sc->csum_flag = 0; 3157 } else { 3158 ifp->if_capenable |= IFCAP_RXCSUM; 3159 sc->csum_flag = 1; 3160 } 3161 } 3162 if (mask & IFCAP_TSO4) { 3163 if (IFCAP_TSO4 & ifp->if_capenable) { 3164 ifp->if_capenable &= ~IFCAP_TSO4; 3165 ifp->if_hwassist &= ~CSUM_TSO; 3166 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3167 ifp->if_capenable |= IFCAP_TSO4; 3168 ifp->if_hwassist |= CSUM_TSO; 3169 } else { 3170 printf("mxge requires tx checksum offload" 3171 " be enabled to use TSO\n"); 3172 err = EINVAL; 3173 } 3174 } 3175 if (mask & IFCAP_LRO) { 3176 if (IFCAP_LRO & ifp->if_capenable) 3177 err = mxge_change_lro_locked(sc, 0); 3178 else 3179 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3180 } 3181 if (mask & IFCAP_VLAN_HWTAGGING) 3182 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3183 mtx_unlock(&sc->driver_mtx); 3184 VLAN_CAPABILITIES(ifp); 3185 3186 break; 3187 3188 case SIOCGIFMEDIA: 3189 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3190 &sc->media, command); 3191 break; 3192 3193 default: 3194 err = ENOTTY; 3195 } 3196 return err; 3197 } 3198 3199 static void 3200 mxge_fetch_tunables(mxge_softc_t *sc) 3201 { 3202 3203 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3204 &mxge_flow_control); 3205 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3206 &mxge_intr_coal_delay); 3207 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3208 &mxge_nvidia_ecrc_enable); 3209 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3210 &mxge_force_firmware); 3211 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3212 &mxge_deassert_wait); 3213 TUNABLE_INT_FETCH("hw.mxge.verbose", 3214 &mxge_verbose); 3215 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3216 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3217 printf("%d %d\n", sc->lro_cnt, mxge_lro_cnt); 3218 if (sc->lro_cnt != 0) 3219 mxge_lro_cnt = sc->lro_cnt; 3220 3221 if (bootverbose) 3222 mxge_verbose = 1; 3223 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3224 mxge_intr_coal_delay = 30; 3225 if (mxge_ticks == 0) 3226 mxge_ticks = hz; 3227 sc->pause = mxge_flow_control; 3228 3229 } 3230 3231 static int 3232 mxge_attach(device_t dev) 3233 { 3234 mxge_softc_t *sc = device_get_softc(dev); 3235 struct ifnet *ifp; 3236 int count, rid, err; 3237 3238 sc->dev = dev; 3239 mxge_fetch_tunables(sc); 3240 3241 err = bus_dma_tag_create(NULL, /* parent */ 3242 1, /* alignment */ 3243 4096, /* boundary */ 3244 BUS_SPACE_MAXADDR, /* low */ 3245 BUS_SPACE_MAXADDR, /* high */ 3246 NULL, NULL, /* filter */ 3247 65536 + 256, /* maxsize */ 3248 MXGE_MAX_SEND_DESC, /* num segs */ 3249 4096, /* maxsegsize */ 3250 0, /* flags */ 3251 NULL, NULL, /* lock */ 3252 &sc->parent_dmat); /* tag */ 3253 3254 if (err != 0) { 3255 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3256 err); 3257 goto abort_with_nothing; 3258 } 3259 3260 ifp = sc->ifp = if_alloc(IFT_ETHER); 3261 if (ifp == NULL) { 3262 device_printf(dev, "can not if_alloc()\n"); 3263 err = ENOSPC; 3264 goto abort_with_parent_dmat; 3265 } 3266 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3267 device_get_nameunit(dev)); 3268 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3269 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3270 device_get_nameunit(dev)); 3271 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3272 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3273 "%s:drv", device_get_nameunit(dev)); 3274 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3275 MTX_NETWORK_LOCK, MTX_DEF); 3276 3277 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3278 3279 mxge_setup_cfg_space(sc); 3280 3281 /* Map the board into the kernel */ 3282 rid = PCIR_BARS; 3283 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3284 ~0, 1, RF_ACTIVE); 3285 if (sc->mem_res == NULL) { 3286 device_printf(dev, "could not map memory\n"); 3287 err = ENXIO; 3288 goto abort_with_lock; 3289 } 3290 sc->sram = rman_get_virtual(sc->mem_res); 3291 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3292 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3293 device_printf(dev, "impossible memory region size %ld\n", 3294 rman_get_size(sc->mem_res)); 3295 err = ENXIO; 3296 goto abort_with_mem_res; 3297 } 3298 3299 /* make NULL terminated copy of the EEPROM strings section of 3300 lanai SRAM */ 3301 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3302 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3303 rman_get_bushandle(sc->mem_res), 3304 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3305 sc->eeprom_strings, 3306 MXGE_EEPROM_STRINGS_SIZE - 2); 3307 err = mxge_parse_strings(sc); 3308 if (err != 0) 3309 goto abort_with_mem_res; 3310 3311 /* Enable write combining for efficient use of PCIe bus */ 3312 mxge_enable_wc(sc); 3313 3314 /* Allocate the out of band dma memory */ 3315 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3316 sizeof (mxge_cmd_t), 64); 3317 if (err != 0) 3318 goto abort_with_mem_res; 3319 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3320 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3321 if (err != 0) 3322 goto abort_with_cmd_dma; 3323 3324 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3325 sizeof (*sc->fw_stats), 64); 3326 if (err != 0) 3327 goto abort_with_zeropad_dma; 3328 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3329 3330 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3331 if (err != 0) 3332 goto abort_with_fw_stats; 3333 3334 /* Add our ithread */ 3335 count = pci_msi_count(dev); 3336 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3337 rid = 1; 3338 sc->msi_enabled = 1; 3339 } else { 3340 rid = 0; 3341 } 3342 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3343 1, RF_SHAREABLE | RF_ACTIVE); 3344 if (sc->irq_res == NULL) { 3345 device_printf(dev, "could not alloc interrupt\n"); 3346 goto abort_with_dmabench; 3347 } 3348 if (mxge_verbose) 3349 device_printf(dev, "using %s irq %ld\n", 3350 sc->msi_enabled ? "MSI" : "INTx", 3351 rman_get_start(sc->irq_res)); 3352 /* select & load the firmware */ 3353 err = mxge_select_firmware(sc); 3354 if (err != 0) 3355 goto abort_with_irq_res; 3356 sc->intr_coal_delay = mxge_intr_coal_delay; 3357 err = mxge_reset(sc, 0); 3358 if (err != 0) 3359 goto abort_with_irq_res; 3360 3361 err = mxge_alloc_rings(sc); 3362 if (err != 0) { 3363 device_printf(sc->dev, "failed to allocate rings\n"); 3364 goto abort_with_irq_res; 3365 } 3366 3367 err = bus_setup_intr(sc->dev, sc->irq_res, 3368 INTR_TYPE_NET | INTR_MPSAFE, 3369 NULL, mxge_intr, sc, &sc->ih); 3370 if (err != 0) { 3371 goto abort_with_rings; 3372 } 3373 /* hook into the network stack */ 3374 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3375 ifp->if_baudrate = 100000000; 3376 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3377 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | 3378 IFCAP_VLAN_HWCSUM | IFCAP_LRO; 3379 3380 sc->max_mtu = mxge_max_mtu(sc); 3381 if (sc->max_mtu >= 9000) 3382 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3383 else 3384 device_printf(dev, "MTU limited to %d. Install " 3385 "latest firmware for 9000 byte jumbo support\n", 3386 sc->max_mtu - ETHER_HDR_LEN); 3387 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3388 ifp->if_capenable = ifp->if_capabilities; 3389 if (sc->lro_cnt == 0) 3390 ifp->if_capenable &= ~IFCAP_LRO; 3391 sc->csum_flag = 1; 3392 ifp->if_init = mxge_init; 3393 ifp->if_softc = sc; 3394 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3395 ifp->if_ioctl = mxge_ioctl; 3396 ifp->if_start = mxge_start; 3397 ether_ifattach(ifp, sc->mac_addr); 3398 /* ether_ifattach sets mtu to 1500 */ 3399 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 3400 ifp->if_mtu = 9000; 3401 3402 /* Initialise the ifmedia structure */ 3403 ifmedia_init(&sc->media, 0, mxge_media_change, 3404 mxge_media_status); 3405 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3406 mxge_add_sysctls(sc); 3407 return 0; 3408 3409 abort_with_rings: 3410 mxge_free_rings(sc); 3411 abort_with_irq_res: 3412 bus_release_resource(dev, SYS_RES_IRQ, 3413 sc->msi_enabled ? 1 : 0, sc->irq_res); 3414 if (sc->msi_enabled) 3415 pci_release_msi(dev); 3416 abort_with_dmabench: 3417 mxge_dma_free(&sc->dmabench_dma); 3418 abort_with_fw_stats: 3419 mxge_dma_free(&sc->fw_stats_dma); 3420 abort_with_zeropad_dma: 3421 mxge_dma_free(&sc->zeropad_dma); 3422 abort_with_cmd_dma: 3423 mxge_dma_free(&sc->cmd_dma); 3424 abort_with_mem_res: 3425 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3426 abort_with_lock: 3427 pci_disable_busmaster(dev); 3428 mtx_destroy(&sc->cmd_mtx); 3429 mtx_destroy(&sc->tx_mtx); 3430 mtx_destroy(&sc->driver_mtx); 3431 if_free(ifp); 3432 abort_with_parent_dmat: 3433 bus_dma_tag_destroy(sc->parent_dmat); 3434 3435 abort_with_nothing: 3436 return err; 3437 } 3438 3439 static int 3440 mxge_detach(device_t dev) 3441 { 3442 mxge_softc_t *sc = device_get_softc(dev); 3443 3444 if (sc->ifp->if_vlantrunk != NULL) { 3445 device_printf(sc->dev, 3446 "Detach vlans before removing module\n"); 3447 return EBUSY; 3448 } 3449 mtx_lock(&sc->driver_mtx); 3450 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3451 mxge_close(sc); 3452 callout_stop(&sc->co_hdl); 3453 mtx_unlock(&sc->driver_mtx); 3454 ether_ifdetach(sc->ifp); 3455 ifmedia_removeall(&sc->media); 3456 mxge_dummy_rdma(sc, 0); 3457 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3458 mxge_free_rings(sc); 3459 bus_release_resource(dev, SYS_RES_IRQ, 3460 sc->msi_enabled ? 1 : 0, sc->irq_res); 3461 if (sc->msi_enabled) 3462 pci_release_msi(dev); 3463 3464 sc->rx_done.entry = NULL; 3465 mxge_dma_free(&sc->fw_stats_dma); 3466 mxge_dma_free(&sc->dmabench_dma); 3467 mxge_dma_free(&sc->zeropad_dma); 3468 mxge_dma_free(&sc->cmd_dma); 3469 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3470 pci_disable_busmaster(dev); 3471 mtx_destroy(&sc->cmd_mtx); 3472 mtx_destroy(&sc->tx_mtx); 3473 mtx_destroy(&sc->driver_mtx); 3474 if_free(sc->ifp); 3475 bus_dma_tag_destroy(sc->parent_dmat); 3476 return 0; 3477 } 3478 3479 static int 3480 mxge_shutdown(device_t dev) 3481 { 3482 return 0; 3483 } 3484 3485 /* 3486 This file uses Myri10GE driver indentation. 3487 3488 Local Variables: 3489 c-file-style:"linux" 3490 tab-width:8 3491 End: 3492 */ 3493