1 /****************************************************************************** 2 3 Copyright (c) 2006-2007, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 POSSIBILITY OF SUCH DAMAGE. 27 28 ***************************************************************************/ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/linker.h> 36 #include <sys/firmware.h> 37 #include <sys/endian.h> 38 #include <sys/sockio.h> 39 #include <sys/mbuf.h> 40 #include <sys/malloc.h> 41 #include <sys/kdb.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/module.h> 45 #include <sys/memrange.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/sx.h> 49 50 #include <net/if.h> 51 #include <net/if_arp.h> 52 #include <net/ethernet.h> 53 #include <net/if_dl.h> 54 #include <net/if_media.h> 55 56 #include <net/bpf.h> 57 58 #include <net/if_types.h> 59 #include <net/if_vlan_var.h> 60 #include <net/zlib.h> 61 62 #include <netinet/in_systm.h> 63 #include <netinet/in.h> 64 #include <netinet/ip.h> 65 #include <netinet/tcp.h> 66 67 #include <machine/bus.h> 68 #include <machine/in_cksum.h> 69 #include <machine/resource.h> 70 #include <sys/bus.h> 71 #include <sys/rman.h> 72 73 #include <dev/pci/pcireg.h> 74 #include <dev/pci/pcivar.h> 75 76 #include <vm/vm.h> /* for pmap_mapdev() */ 77 #include <vm/pmap.h> 78 79 #if defined(__i386) || defined(__amd64) 80 #include <machine/specialreg.h> 81 #endif 82 83 #include <dev/mxge/mxge_mcp.h> 84 #include <dev/mxge/mcp_gen_header.h> 85 #include <dev/mxge/if_mxge_var.h> 86 87 /* tunable params */ 88 static int mxge_nvidia_ecrc_enable = 1; 89 static int mxge_force_firmware = 0; 90 static int mxge_intr_coal_delay = 30; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_verbose = 0; 94 static int mxge_lro_cnt = 8; 95 static int mxge_ticks; 96 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 97 static char *mxge_fw_aligned = "mxge_eth_z8e"; 98 99 static int mxge_probe(device_t dev); 100 static int mxge_attach(device_t dev); 101 static int mxge_detach(device_t dev); 102 static int mxge_shutdown(device_t dev); 103 static void mxge_intr(void *arg); 104 105 static device_method_t mxge_methods[] = 106 { 107 /* Device interface */ 108 DEVMETHOD(device_probe, mxge_probe), 109 DEVMETHOD(device_attach, mxge_attach), 110 DEVMETHOD(device_detach, mxge_detach), 111 DEVMETHOD(device_shutdown, mxge_shutdown), 112 {0, 0} 113 }; 114 115 static driver_t mxge_driver = 116 { 117 "mxge", 118 mxge_methods, 119 sizeof(mxge_softc_t), 120 }; 121 122 static devclass_t mxge_devclass; 123 124 /* Declare ourselves to be a child of the PCI bus.*/ 125 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 126 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 127 MODULE_DEPEND(mxge, zlib, 1, 1, 1); 128 129 static int mxge_load_firmware(mxge_softc_t *sc); 130 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 131 static int mxge_close(mxge_softc_t *sc); 132 static int mxge_open(mxge_softc_t *sc); 133 static void mxge_tick(void *arg); 134 135 static int 136 mxge_probe(device_t dev) 137 { 138 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 139 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 140 device_set_desc(dev, "Myri10G-PCIE-8A"); 141 return 0; 142 } 143 return ENXIO; 144 } 145 146 static void 147 mxge_enable_wc(mxge_softc_t *sc) 148 { 149 #if defined(__i386) || defined(__amd64) 150 struct mem_range_desc mrdesc; 151 vm_paddr_t pa; 152 vm_offset_t len; 153 int err, action; 154 155 sc->wc = 1; 156 len = rman_get_size(sc->mem_res); 157 err = pmap_change_attr((vm_offset_t) sc->sram, 158 len, PAT_WRITE_COMBINING); 159 if (err == 0) 160 return; 161 else 162 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 163 err); 164 pa = rman_get_start(sc->mem_res); 165 mrdesc.mr_base = pa; 166 mrdesc.mr_len = len; 167 mrdesc.mr_flags = MDF_WRITECOMBINE; 168 action = MEMRANGE_SET_UPDATE; 169 strcpy((char *)&mrdesc.mr_owner, "mxge"); 170 err = mem_range_attr_set(&mrdesc, &action); 171 if (err != 0) { 172 sc->wc = 0; 173 device_printf(sc->dev, 174 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 175 (unsigned long)pa, (unsigned long)len, err); 176 } 177 #endif 178 } 179 180 181 /* callback to get our DMA address */ 182 static void 183 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 184 int error) 185 { 186 if (error == 0) { 187 *(bus_addr_t *) arg = segs->ds_addr; 188 } 189 } 190 191 static int 192 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 193 bus_size_t alignment) 194 { 195 int err; 196 device_t dev = sc->dev; 197 198 /* allocate DMAable memory tags */ 199 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 200 alignment, /* alignment */ 201 4096, /* boundary */ 202 BUS_SPACE_MAXADDR, /* low */ 203 BUS_SPACE_MAXADDR, /* high */ 204 NULL, NULL, /* filter */ 205 bytes, /* maxsize */ 206 1, /* num segs */ 207 4096, /* maxsegsize */ 208 BUS_DMA_COHERENT, /* flags */ 209 NULL, NULL, /* lock */ 210 &dma->dmat); /* tag */ 211 if (err != 0) { 212 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 213 return err; 214 } 215 216 /* allocate DMAable memory & map */ 217 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 218 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 219 | BUS_DMA_ZERO), &dma->map); 220 if (err != 0) { 221 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 222 goto abort_with_dmat; 223 } 224 225 /* load the memory */ 226 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 227 mxge_dmamap_callback, 228 (void *)&dma->bus_addr, 0); 229 if (err != 0) { 230 device_printf(dev, "couldn't load map (err = %d)\n", err); 231 goto abort_with_mem; 232 } 233 return 0; 234 235 abort_with_mem: 236 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 237 abort_with_dmat: 238 (void)bus_dma_tag_destroy(dma->dmat); 239 return err; 240 } 241 242 243 static void 244 mxge_dma_free(mxge_dma_t *dma) 245 { 246 bus_dmamap_unload(dma->dmat, dma->map); 247 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 248 (void)bus_dma_tag_destroy(dma->dmat); 249 } 250 251 /* 252 * The eeprom strings on the lanaiX have the format 253 * SN=x\0 254 * MAC=x:x:x:x:x:x\0 255 * PC=text\0 256 */ 257 258 static int 259 mxge_parse_strings(mxge_softc_t *sc) 260 { 261 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 262 263 char *ptr, *limit; 264 int i, found_mac; 265 266 ptr = sc->eeprom_strings; 267 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 268 found_mac = 0; 269 while (ptr < limit && *ptr != '\0') { 270 if (memcmp(ptr, "MAC=", 4) == 0) { 271 ptr += 1; 272 sc->mac_addr_string = ptr; 273 for (i = 0; i < 6; i++) { 274 ptr += 3; 275 if ((ptr + 2) > limit) 276 goto abort; 277 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 278 found_mac = 1; 279 } 280 } else if (memcmp(ptr, "PC=", 3) == 0) { 281 ptr += 3; 282 strncpy(sc->product_code_string, ptr, 283 sizeof (sc->product_code_string) - 1); 284 } else if (memcmp(ptr, "SN=", 3) == 0) { 285 ptr += 3; 286 strncpy(sc->serial_number_string, ptr, 287 sizeof (sc->serial_number_string) - 1); 288 } 289 MXGE_NEXT_STRING(ptr); 290 } 291 292 if (found_mac) 293 return 0; 294 295 abort: 296 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 297 298 return ENXIO; 299 } 300 301 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 302 static void 303 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 304 { 305 uint32_t val; 306 unsigned long base, off; 307 char *va, *cfgptr; 308 device_t pdev, mcp55; 309 uint16_t vendor_id, device_id, word; 310 uintptr_t bus, slot, func, ivend, idev; 311 uint32_t *ptr32; 312 313 314 if (!mxge_nvidia_ecrc_enable) 315 return; 316 317 pdev = device_get_parent(device_get_parent(sc->dev)); 318 if (pdev == NULL) { 319 device_printf(sc->dev, "could not find parent?\n"); 320 return; 321 } 322 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 323 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 324 325 if (vendor_id != 0x10de) 326 return; 327 328 base = 0; 329 330 if (device_id == 0x005d) { 331 /* ck804, base address is magic */ 332 base = 0xe0000000UL; 333 } else if (device_id >= 0x0374 && device_id <= 0x378) { 334 /* mcp55, base address stored in chipset */ 335 mcp55 = pci_find_bsf(0, 0, 0); 336 if (mcp55 && 337 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 338 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 339 word = pci_read_config(mcp55, 0x90, 2); 340 base = ((unsigned long)word & 0x7ffeU) << 25; 341 } 342 } 343 if (!base) 344 return; 345 346 /* XXXX 347 Test below is commented because it is believed that doing 348 config read/write beyond 0xff will access the config space 349 for the next larger function. Uncomment this and remove 350 the hacky pmap_mapdev() way of accessing config space when 351 FreeBSD grows support for extended pcie config space access 352 */ 353 #if 0 354 /* See if we can, by some miracle, access the extended 355 config space */ 356 val = pci_read_config(pdev, 0x178, 4); 357 if (val != 0xffffffff) { 358 val |= 0x40; 359 pci_write_config(pdev, 0x178, val, 4); 360 return; 361 } 362 #endif 363 /* Rather than using normal pci config space writes, we must 364 * map the Nvidia config space ourselves. This is because on 365 * opteron/nvidia class machine the 0xe000000 mapping is 366 * handled by the nvidia chipset, that means the internal PCI 367 * device (the on-chip northbridge), or the amd-8131 bridge 368 * and things behind them are not visible by this method. 369 */ 370 371 BUS_READ_IVAR(device_get_parent(pdev), pdev, 372 PCI_IVAR_BUS, &bus); 373 BUS_READ_IVAR(device_get_parent(pdev), pdev, 374 PCI_IVAR_SLOT, &slot); 375 BUS_READ_IVAR(device_get_parent(pdev), pdev, 376 PCI_IVAR_FUNCTION, &func); 377 BUS_READ_IVAR(device_get_parent(pdev), pdev, 378 PCI_IVAR_VENDOR, &ivend); 379 BUS_READ_IVAR(device_get_parent(pdev), pdev, 380 PCI_IVAR_DEVICE, &idev); 381 382 off = base 383 + 0x00100000UL * (unsigned long)bus 384 + 0x00001000UL * (unsigned long)(func 385 + 8 * slot); 386 387 /* map it into the kernel */ 388 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 389 390 391 if (va == NULL) { 392 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 393 return; 394 } 395 /* get a pointer to the config space mapped into the kernel */ 396 cfgptr = va + (off & PAGE_MASK); 397 398 /* make sure that we can really access it */ 399 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 400 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 401 if (! (vendor_id == ivend && device_id == idev)) { 402 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 403 vendor_id, device_id); 404 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 405 return; 406 } 407 408 ptr32 = (uint32_t*)(cfgptr + 0x178); 409 val = *ptr32; 410 411 if (val == 0xffffffff) { 412 device_printf(sc->dev, "extended mapping failed\n"); 413 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 414 return; 415 } 416 *ptr32 = val | 0x40; 417 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 418 if (mxge_verbose) 419 device_printf(sc->dev, 420 "Enabled ECRC on upstream Nvidia bridge " 421 "at %d:%d:%d\n", 422 (int)bus, (int)slot, (int)func); 423 return; 424 } 425 #else 426 static void 427 mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 428 { 429 device_printf(sc->dev, 430 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 431 return; 432 } 433 #endif 434 435 436 static int 437 mxge_dma_test(mxge_softc_t *sc, int test_type) 438 { 439 mxge_cmd_t cmd; 440 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 441 int status; 442 uint32_t len; 443 char *test = " "; 444 445 446 /* Run a small DMA test. 447 * The magic multipliers to the length tell the firmware 448 * to do DMA read, write, or read+write tests. The 449 * results are returned in cmd.data0. The upper 16 450 * bits of the return is the number of transfers completed. 451 * The lower 16 bits is the time in 0.5us ticks that the 452 * transfers took to complete. 453 */ 454 455 len = sc->tx.boundary; 456 457 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 458 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 459 cmd.data2 = len * 0x10000; 460 status = mxge_send_cmd(sc, test_type, &cmd); 461 if (status != 0) { 462 test = "read"; 463 goto abort; 464 } 465 sc->read_dma = ((cmd.data0>>16) * len * 2) / 466 (cmd.data0 & 0xffff); 467 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 468 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 469 cmd.data2 = len * 0x1; 470 status = mxge_send_cmd(sc, test_type, &cmd); 471 if (status != 0) { 472 test = "write"; 473 goto abort; 474 } 475 sc->write_dma = ((cmd.data0>>16) * len * 2) / 476 (cmd.data0 & 0xffff); 477 478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 480 cmd.data2 = len * 0x10001; 481 status = mxge_send_cmd(sc, test_type, &cmd); 482 if (status != 0) { 483 test = "read/write"; 484 goto abort; 485 } 486 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 487 (cmd.data0 & 0xffff); 488 489 abort: 490 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 491 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 492 test, status); 493 494 return status; 495 } 496 497 /* 498 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 499 * when the PCI-E Completion packets are aligned on an 8-byte 500 * boundary. Some PCI-E chip sets always align Completion packets; on 501 * the ones that do not, the alignment can be enforced by enabling 502 * ECRC generation (if supported). 503 * 504 * When PCI-E Completion packets are not aligned, it is actually more 505 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 506 * 507 * If the driver can neither enable ECRC nor verify that it has 508 * already been enabled, then it must use a firmware image which works 509 * around unaligned completion packets (ethp_z8e.dat), and it should 510 * also ensure that it never gives the device a Read-DMA which is 511 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 512 * enabled, then the driver should use the aligned (eth_z8e.dat) 513 * firmware image, and set tx.boundary to 4KB. 514 */ 515 516 static int 517 mxge_firmware_probe(mxge_softc_t *sc) 518 { 519 device_t dev = sc->dev; 520 int reg, status; 521 uint16_t pectl; 522 523 sc->tx.boundary = 4096; 524 /* 525 * Verify the max read request size was set to 4KB 526 * before trying the test with 4KB. 527 */ 528 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 529 pectl = pci_read_config(dev, reg + 0x8, 2); 530 if ((pectl & (5 << 12)) != (5 << 12)) { 531 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 532 pectl); 533 sc->tx.boundary = 2048; 534 } 535 } 536 537 /* 538 * load the optimized firmware (which assumes aligned PCIe 539 * completions) in order to see if it works on this host. 540 */ 541 sc->fw_name = mxge_fw_aligned; 542 status = mxge_load_firmware(sc); 543 if (status != 0) { 544 return status; 545 } 546 547 /* 548 * Enable ECRC if possible 549 */ 550 mxge_enable_nvidia_ecrc(sc); 551 552 /* 553 * Run a DMA test which watches for unaligned completions and 554 * aborts on the first one seen. 555 */ 556 557 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 558 if (status == 0) 559 return 0; /* keep the aligned firmware */ 560 561 if (status != E2BIG) 562 device_printf(dev, "DMA test failed: %d\n", status); 563 if (status == ENOSYS) 564 device_printf(dev, "Falling back to ethp! " 565 "Please install up to date fw\n"); 566 return status; 567 } 568 569 static int 570 mxge_select_firmware(mxge_softc_t *sc) 571 { 572 int aligned = 0; 573 574 575 if (mxge_force_firmware != 0) { 576 if (mxge_force_firmware == 1) 577 aligned = 1; 578 else 579 aligned = 0; 580 if (mxge_verbose) 581 device_printf(sc->dev, 582 "Assuming %s completions (forced)\n", 583 aligned ? "aligned" : "unaligned"); 584 goto abort; 585 } 586 587 /* if the PCIe link width is 4 or less, we can use the aligned 588 firmware and skip any checks */ 589 if (sc->link_width != 0 && sc->link_width <= 4) { 590 device_printf(sc->dev, 591 "PCIe x%d Link, expect reduced performance\n", 592 sc->link_width); 593 aligned = 1; 594 goto abort; 595 } 596 597 if (0 == mxge_firmware_probe(sc)) 598 return 0; 599 600 abort: 601 if (aligned) { 602 sc->fw_name = mxge_fw_aligned; 603 sc->tx.boundary = 4096; 604 } else { 605 sc->fw_name = mxge_fw_unaligned; 606 sc->tx.boundary = 2048; 607 } 608 return (mxge_load_firmware(sc)); 609 } 610 611 union qualhack 612 { 613 const char *ro_char; 614 char *rw_char; 615 }; 616 617 static int 618 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 619 { 620 621 622 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 623 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 624 be32toh(hdr->mcp_type)); 625 return EIO; 626 } 627 628 /* save firmware version for sysctl */ 629 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 630 if (mxge_verbose) 631 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 632 633 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 634 &sc->fw_ver_minor, &sc->fw_ver_tiny); 635 636 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 637 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 638 device_printf(sc->dev, "Found firmware version %s\n", 639 sc->fw_version); 640 device_printf(sc->dev, "Driver needs %d.%d\n", 641 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 642 return EINVAL; 643 } 644 return 0; 645 646 } 647 648 static void * 649 z_alloc(void *nil, u_int items, u_int size) 650 { 651 void *ptr; 652 653 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 654 return ptr; 655 } 656 657 static void 658 z_free(void *nil, void *ptr) 659 { 660 free(ptr, M_TEMP); 661 } 662 663 664 static int 665 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 666 { 667 z_stream zs; 668 char *inflate_buffer; 669 const struct firmware *fw; 670 const mcp_gen_header_t *hdr; 671 unsigned hdr_offset; 672 int status; 673 unsigned int i; 674 char dummy; 675 size_t fw_len; 676 677 fw = firmware_get(sc->fw_name); 678 if (fw == NULL) { 679 device_printf(sc->dev, "Could not find firmware image %s\n", 680 sc->fw_name); 681 return ENOENT; 682 } 683 684 685 686 /* setup zlib and decompress f/w */ 687 bzero(&zs, sizeof (zs)); 688 zs.zalloc = z_alloc; 689 zs.zfree = z_free; 690 status = inflateInit(&zs); 691 if (status != Z_OK) { 692 status = EIO; 693 goto abort_with_fw; 694 } 695 696 /* the uncompressed size is stored as the firmware version, 697 which would otherwise go unused */ 698 fw_len = (size_t) fw->version; 699 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 700 if (inflate_buffer == NULL) 701 goto abort_with_zs; 702 zs.avail_in = fw->datasize; 703 zs.next_in = __DECONST(char *, fw->data); 704 zs.avail_out = fw_len; 705 zs.next_out = inflate_buffer; 706 status = inflate(&zs, Z_FINISH); 707 if (status != Z_STREAM_END) { 708 device_printf(sc->dev, "zlib %d\n", status); 709 status = EIO; 710 goto abort_with_buffer; 711 } 712 713 /* check id */ 714 hdr_offset = htobe32(*(const uint32_t *) 715 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 716 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 717 device_printf(sc->dev, "Bad firmware file"); 718 status = EIO; 719 goto abort_with_buffer; 720 } 721 hdr = (const void*)(inflate_buffer + hdr_offset); 722 723 status = mxge_validate_firmware(sc, hdr); 724 if (status != 0) 725 goto abort_with_buffer; 726 727 /* Copy the inflated firmware to NIC SRAM. */ 728 for (i = 0; i < fw_len; i += 256) { 729 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 730 inflate_buffer + i, 731 min(256U, (unsigned)(fw_len - i))); 732 mb(); 733 dummy = *sc->sram; 734 mb(); 735 } 736 737 *limit = fw_len; 738 status = 0; 739 abort_with_buffer: 740 free(inflate_buffer, M_TEMP); 741 abort_with_zs: 742 inflateEnd(&zs); 743 abort_with_fw: 744 firmware_put(fw, FIRMWARE_UNLOAD); 745 return status; 746 } 747 748 /* 749 * Enable or disable periodic RDMAs from the host to make certain 750 * chipsets resend dropped PCIe messages 751 */ 752 753 static void 754 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 755 { 756 char buf_bytes[72]; 757 volatile uint32_t *confirm; 758 volatile char *submit; 759 uint32_t *buf, dma_low, dma_high; 760 int i; 761 762 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 763 764 /* clear confirmation addr */ 765 confirm = (volatile uint32_t *)sc->cmd; 766 *confirm = 0; 767 mb(); 768 769 /* send an rdma command to the PCIe engine, and wait for the 770 response in the confirmation address. The firmware should 771 write a -1 there to indicate it is alive and well 772 */ 773 774 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 775 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 776 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 777 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 778 buf[2] = htobe32(0xffffffff); /* confirm data */ 779 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 780 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 781 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 782 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 783 buf[5] = htobe32(enable); /* enable? */ 784 785 786 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 787 788 mxge_pio_copy(submit, buf, 64); 789 mb(); 790 DELAY(1000); 791 mb(); 792 i = 0; 793 while (*confirm != 0xffffffff && i < 20) { 794 DELAY(1000); 795 i++; 796 } 797 if (*confirm != 0xffffffff) { 798 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 799 (enable ? "enable" : "disable"), confirm, 800 *confirm); 801 } 802 return; 803 } 804 805 static int 806 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 807 { 808 mcp_cmd_t *buf; 809 char buf_bytes[sizeof(*buf) + 8]; 810 volatile mcp_cmd_response_t *response = sc->cmd; 811 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 812 uint32_t dma_low, dma_high; 813 int err, sleep_total = 0; 814 815 /* ensure buf is aligned to 8 bytes */ 816 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 817 818 buf->data0 = htobe32(data->data0); 819 buf->data1 = htobe32(data->data1); 820 buf->data2 = htobe32(data->data2); 821 buf->cmd = htobe32(cmd); 822 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 823 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 824 825 buf->response_addr.low = htobe32(dma_low); 826 buf->response_addr.high = htobe32(dma_high); 827 mtx_lock(&sc->cmd_mtx); 828 response->result = 0xffffffff; 829 mb(); 830 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 831 832 /* wait up to 20ms */ 833 err = EAGAIN; 834 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 835 bus_dmamap_sync(sc->cmd_dma.dmat, 836 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 837 mb(); 838 switch (be32toh(response->result)) { 839 case 0: 840 data->data0 = be32toh(response->data); 841 err = 0; 842 break; 843 case 0xffffffff: 844 DELAY(1000); 845 break; 846 case MXGEFW_CMD_UNKNOWN: 847 err = ENOSYS; 848 break; 849 case MXGEFW_CMD_ERROR_UNALIGNED: 850 err = E2BIG; 851 break; 852 default: 853 device_printf(sc->dev, 854 "mxge: command %d " 855 "failed, result = %d\n", 856 cmd, be32toh(response->result)); 857 err = ENXIO; 858 break; 859 } 860 if (err != EAGAIN) 861 break; 862 } 863 if (err == EAGAIN) 864 device_printf(sc->dev, "mxge: command %d timed out" 865 "result = %d\n", 866 cmd, be32toh(response->result)); 867 mtx_unlock(&sc->cmd_mtx); 868 return err; 869 } 870 871 static int 872 mxge_adopt_running_firmware(mxge_softc_t *sc) 873 { 874 struct mcp_gen_header *hdr; 875 const size_t bytes = sizeof (struct mcp_gen_header); 876 size_t hdr_offset; 877 int status; 878 879 /* find running firmware header */ 880 hdr_offset = htobe32(*(volatile uint32_t *) 881 (sc->sram + MCP_HEADER_PTR_OFFSET)); 882 883 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 884 device_printf(sc->dev, 885 "Running firmware has bad header offset (%d)\n", 886 (int)hdr_offset); 887 return EIO; 888 } 889 890 /* copy header of running firmware from SRAM to host memory to 891 * validate firmware */ 892 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 893 if (hdr == NULL) { 894 device_printf(sc->dev, "could not malloc firmware hdr\n"); 895 return ENOMEM; 896 } 897 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 898 rman_get_bushandle(sc->mem_res), 899 hdr_offset, (char *)hdr, bytes); 900 status = mxge_validate_firmware(sc, hdr); 901 free(hdr, M_DEVBUF); 902 903 /* 904 * check to see if adopted firmware has bug where adopting 905 * it will cause broadcasts to be filtered unless the NIC 906 * is kept in ALLMULTI mode 907 */ 908 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 909 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 910 sc->adopted_rx_filter_bug = 1; 911 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 912 "working around rx filter bug\n", 913 sc->fw_ver_major, sc->fw_ver_minor, 914 sc->fw_ver_tiny); 915 } 916 917 return status; 918 } 919 920 921 static int 922 mxge_load_firmware(mxge_softc_t *sc) 923 { 924 volatile uint32_t *confirm; 925 volatile char *submit; 926 char buf_bytes[72]; 927 uint32_t *buf, size, dma_low, dma_high; 928 int status, i; 929 930 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 931 932 size = sc->sram_size; 933 status = mxge_load_firmware_helper(sc, &size); 934 if (status) { 935 /* Try to use the currently running firmware, if 936 it is new enough */ 937 status = mxge_adopt_running_firmware(sc); 938 if (status) { 939 device_printf(sc->dev, 940 "failed to adopt running firmware\n"); 941 return status; 942 } 943 device_printf(sc->dev, 944 "Successfully adopted running firmware\n"); 945 if (sc->tx.boundary == 4096) { 946 device_printf(sc->dev, 947 "Using firmware currently running on NIC" 948 ". For optimal\n"); 949 device_printf(sc->dev, 950 "performance consider loading optimized " 951 "firmware\n"); 952 } 953 sc->fw_name = mxge_fw_unaligned; 954 sc->tx.boundary = 2048; 955 return 0; 956 } 957 /* clear confirmation addr */ 958 confirm = (volatile uint32_t *)sc->cmd; 959 *confirm = 0; 960 mb(); 961 /* send a reload command to the bootstrap MCP, and wait for the 962 response in the confirmation address. The firmware should 963 write a -1 there to indicate it is alive and well 964 */ 965 966 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 967 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 968 969 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 970 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 971 buf[2] = htobe32(0xffffffff); /* confirm data */ 972 973 /* FIX: All newest firmware should un-protect the bottom of 974 the sram before handoff. However, the very first interfaces 975 do not. Therefore the handoff copy must skip the first 8 bytes 976 */ 977 /* where the code starts*/ 978 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 979 buf[4] = htobe32(size - 8); /* length of code */ 980 buf[5] = htobe32(8); /* where to copy to */ 981 buf[6] = htobe32(0); /* where to jump to */ 982 983 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 984 mxge_pio_copy(submit, buf, 64); 985 mb(); 986 DELAY(1000); 987 mb(); 988 i = 0; 989 while (*confirm != 0xffffffff && i < 20) { 990 DELAY(1000*10); 991 i++; 992 bus_dmamap_sync(sc->cmd_dma.dmat, 993 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 994 } 995 if (*confirm != 0xffffffff) { 996 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 997 confirm, *confirm); 998 999 return ENXIO; 1000 } 1001 return 0; 1002 } 1003 1004 static int 1005 mxge_update_mac_address(mxge_softc_t *sc) 1006 { 1007 mxge_cmd_t cmd; 1008 uint8_t *addr = sc->mac_addr; 1009 int status; 1010 1011 1012 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1013 | (addr[2] << 8) | addr[3]); 1014 1015 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1016 1017 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1018 return status; 1019 } 1020 1021 static int 1022 mxge_change_pause(mxge_softc_t *sc, int pause) 1023 { 1024 mxge_cmd_t cmd; 1025 int status; 1026 1027 if (pause) 1028 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1029 &cmd); 1030 else 1031 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1032 &cmd); 1033 1034 if (status) { 1035 device_printf(sc->dev, "Failed to set flow control mode\n"); 1036 return ENXIO; 1037 } 1038 sc->pause = pause; 1039 return 0; 1040 } 1041 1042 static void 1043 mxge_change_promisc(mxge_softc_t *sc, int promisc) 1044 { 1045 mxge_cmd_t cmd; 1046 int status; 1047 1048 if (promisc) 1049 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1050 &cmd); 1051 else 1052 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1053 &cmd); 1054 1055 if (status) { 1056 device_printf(sc->dev, "Failed to set promisc mode\n"); 1057 } 1058 } 1059 1060 static void 1061 mxge_set_multicast_list(mxge_softc_t *sc) 1062 { 1063 mxge_cmd_t cmd; 1064 struct ifmultiaddr *ifma; 1065 struct ifnet *ifp = sc->ifp; 1066 int err; 1067 1068 /* This firmware is known to not support multicast */ 1069 if (!sc->fw_multicast_support) 1070 return; 1071 1072 /* Disable multicast filtering while we play with the lists*/ 1073 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1074 if (err != 0) { 1075 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1076 " error status: %d\n", err); 1077 return; 1078 } 1079 1080 if (sc->adopted_rx_filter_bug) 1081 return; 1082 1083 if (ifp->if_flags & IFF_ALLMULTI) 1084 /* request to disable multicast filtering, so quit here */ 1085 return; 1086 1087 /* Flush all the filters */ 1088 1089 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1090 if (err != 0) { 1091 device_printf(sc->dev, 1092 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1093 ", error status: %d\n", err); 1094 return; 1095 } 1096 1097 /* Walk the multicast list, and add each address */ 1098 1099 IF_ADDR_LOCK(ifp); 1100 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1101 if (ifma->ifma_addr->sa_family != AF_LINK) 1102 continue; 1103 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1104 &cmd.data0, 4); 1105 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1106 &cmd.data1, 2); 1107 cmd.data0 = htonl(cmd.data0); 1108 cmd.data1 = htonl(cmd.data1); 1109 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1110 if (err != 0) { 1111 device_printf(sc->dev, "Failed " 1112 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1113 "%d\t", err); 1114 /* abort, leaving multicast filtering off */ 1115 IF_ADDR_UNLOCK(ifp); 1116 return; 1117 } 1118 } 1119 IF_ADDR_UNLOCK(ifp); 1120 /* Enable multicast filtering */ 1121 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1122 if (err != 0) { 1123 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1124 ", error status: %d\n", err); 1125 } 1126 } 1127 1128 static int 1129 mxge_max_mtu(mxge_softc_t *sc) 1130 { 1131 mxge_cmd_t cmd; 1132 int status; 1133 1134 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1135 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1136 1137 /* try to set nbufs to see if it we can 1138 use virtually contiguous jumbos */ 1139 cmd.data0 = 0; 1140 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1141 &cmd); 1142 if (status == 0) 1143 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1144 1145 /* otherwise, we're limited to MJUMPAGESIZE */ 1146 return MJUMPAGESIZE - MXGEFW_PAD; 1147 } 1148 1149 static int 1150 mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1151 { 1152 1153 mxge_cmd_t cmd; 1154 size_t bytes; 1155 int status; 1156 1157 /* try to send a reset command to the card to see if it 1158 is alive */ 1159 memset(&cmd, 0, sizeof (cmd)); 1160 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1161 if (status != 0) { 1162 device_printf(sc->dev, "failed reset\n"); 1163 return ENXIO; 1164 } 1165 1166 mxge_dummy_rdma(sc, 1); 1167 1168 if (interrupts_setup) { 1169 /* Now exchange information about interrupts */ 1170 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 1171 memset(sc->rx_done.entry, 0, bytes); 1172 cmd.data0 = (uint32_t)bytes; 1173 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1174 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 1175 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 1176 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 1177 } 1178 1179 status |= mxge_send_cmd(sc, 1180 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1181 1182 1183 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1184 1185 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1186 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1187 1188 1189 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1190 &cmd); 1191 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1192 if (status != 0) { 1193 device_printf(sc->dev, "failed set interrupt parameters\n"); 1194 return status; 1195 } 1196 1197 1198 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1199 1200 1201 /* run a DMA benchmark */ 1202 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1203 1204 /* reset mcp/driver shared state back to 0 */ 1205 sc->rx_done.idx = 0; 1206 sc->rx_done.cnt = 0; 1207 sc->tx.req = 0; 1208 sc->tx.done = 0; 1209 sc->tx.pkt_done = 0; 1210 sc->tx.wake = 0; 1211 sc->tx_defrag = 0; 1212 sc->tx.stall = 0; 1213 sc->rx_big.cnt = 0; 1214 sc->rx_small.cnt = 0; 1215 sc->rdma_tags_available = 15; 1216 sc->fw_stats->valid = 0; 1217 sc->fw_stats->send_done_count = 0; 1218 sc->lro_bad_csum = 0; 1219 sc->lro_queued = 0; 1220 sc->lro_flushed = 0; 1221 status = mxge_update_mac_address(sc); 1222 mxge_change_promisc(sc, 0); 1223 mxge_change_pause(sc, sc->pause); 1224 mxge_set_multicast_list(sc); 1225 return status; 1226 } 1227 1228 static int 1229 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1230 { 1231 mxge_softc_t *sc; 1232 unsigned int intr_coal_delay; 1233 int err; 1234 1235 sc = arg1; 1236 intr_coal_delay = sc->intr_coal_delay; 1237 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1238 if (err != 0) { 1239 return err; 1240 } 1241 if (intr_coal_delay == sc->intr_coal_delay) 1242 return 0; 1243 1244 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1245 return EINVAL; 1246 1247 mtx_lock(&sc->driver_mtx); 1248 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1249 sc->intr_coal_delay = intr_coal_delay; 1250 1251 mtx_unlock(&sc->driver_mtx); 1252 return err; 1253 } 1254 1255 static int 1256 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1257 { 1258 mxge_softc_t *sc; 1259 unsigned int enabled; 1260 int err; 1261 1262 sc = arg1; 1263 enabled = sc->pause; 1264 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1265 if (err != 0) { 1266 return err; 1267 } 1268 if (enabled == sc->pause) 1269 return 0; 1270 1271 mtx_lock(&sc->driver_mtx); 1272 err = mxge_change_pause(sc, enabled); 1273 mtx_unlock(&sc->driver_mtx); 1274 return err; 1275 } 1276 1277 static int 1278 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1279 { 1280 struct ifnet *ifp; 1281 int err; 1282 1283 ifp = sc->ifp; 1284 if (lro_cnt == 0) 1285 ifp->if_capenable &= ~IFCAP_LRO; 1286 else 1287 ifp->if_capenable |= IFCAP_LRO; 1288 sc->lro_cnt = lro_cnt; 1289 callout_stop(&sc->co_hdl); 1290 mxge_close(sc); 1291 err = mxge_open(sc); 1292 if (err == 0) 1293 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 1294 return err; 1295 } 1296 1297 static int 1298 mxge_change_lro(SYSCTL_HANDLER_ARGS) 1299 { 1300 mxge_softc_t *sc; 1301 unsigned int lro_cnt; 1302 int err; 1303 1304 sc = arg1; 1305 lro_cnt = sc->lro_cnt; 1306 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1307 if (err != 0) 1308 return err; 1309 1310 if (lro_cnt == sc->lro_cnt) 1311 return 0; 1312 1313 if (lro_cnt > 128) 1314 return EINVAL; 1315 1316 mtx_lock(&sc->driver_mtx); 1317 err = mxge_change_lro_locked(sc, lro_cnt); 1318 mtx_unlock(&sc->driver_mtx); 1319 return err; 1320 } 1321 1322 static int 1323 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1324 { 1325 int err; 1326 1327 if (arg1 == NULL) 1328 return EFAULT; 1329 arg2 = be32toh(*(int *)arg1); 1330 arg1 = NULL; 1331 err = sysctl_handle_int(oidp, arg1, arg2, req); 1332 1333 return err; 1334 } 1335 1336 static void 1337 mxge_add_sysctls(mxge_softc_t *sc) 1338 { 1339 struct sysctl_ctx_list *ctx; 1340 struct sysctl_oid_list *children; 1341 mcp_irq_data_t *fw; 1342 1343 ctx = device_get_sysctl_ctx(sc->dev); 1344 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1345 fw = sc->fw_stats; 1346 1347 /* random information */ 1348 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1349 "firmware_version", 1350 CTLFLAG_RD, &sc->fw_version, 1351 0, "firmware version"); 1352 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1353 "serial_number", 1354 CTLFLAG_RD, &sc->serial_number_string, 1355 0, "serial number"); 1356 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1357 "product_code", 1358 CTLFLAG_RD, &sc->product_code_string, 1359 0, "product_code"); 1360 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1361 "pcie_link_width", 1362 CTLFLAG_RD, &sc->link_width, 1363 0, "tx_boundary"); 1364 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1365 "tx_boundary", 1366 CTLFLAG_RD, &sc->tx.boundary, 1367 0, "tx_boundary"); 1368 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1369 "write_combine", 1370 CTLFLAG_RD, &sc->wc, 1371 0, "write combining PIO?"); 1372 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1373 "read_dma_MBs", 1374 CTLFLAG_RD, &sc->read_dma, 1375 0, "DMA Read speed in MB/s"); 1376 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1377 "write_dma_MBs", 1378 CTLFLAG_RD, &sc->write_dma, 1379 0, "DMA Write speed in MB/s"); 1380 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1381 "read_write_dma_MBs", 1382 CTLFLAG_RD, &sc->read_write_dma, 1383 0, "DMA concurrent Read/Write speed in MB/s"); 1384 1385 1386 /* performance related tunables */ 1387 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1388 "intr_coal_delay", 1389 CTLTYPE_INT|CTLFLAG_RW, sc, 1390 0, mxge_change_intr_coal, 1391 "I", "interrupt coalescing delay in usecs"); 1392 1393 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1394 "flow_control_enabled", 1395 CTLTYPE_INT|CTLFLAG_RW, sc, 1396 0, mxge_change_flow_control, 1397 "I", "interrupt coalescing delay in usecs"); 1398 1399 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1400 "deassert_wait", 1401 CTLFLAG_RW, &mxge_deassert_wait, 1402 0, "Wait for IRQ line to go low in ihandler"); 1403 1404 /* stats block from firmware is in network byte order. 1405 Need to swap it */ 1406 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1407 "link_up", 1408 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1409 0, mxge_handle_be32, 1410 "I", "link up"); 1411 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1412 "rdma_tags_available", 1413 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1414 0, mxge_handle_be32, 1415 "I", "rdma_tags_available"); 1416 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1417 "dropped_bad_crc32", 1418 CTLTYPE_INT|CTLFLAG_RD, 1419 &fw->dropped_bad_crc32, 1420 0, mxge_handle_be32, 1421 "I", "dropped_bad_crc32"); 1422 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1423 "dropped_bad_phy", 1424 CTLTYPE_INT|CTLFLAG_RD, 1425 &fw->dropped_bad_phy, 1426 0, mxge_handle_be32, 1427 "I", "dropped_bad_phy"); 1428 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1429 "dropped_link_error_or_filtered", 1430 CTLTYPE_INT|CTLFLAG_RD, 1431 &fw->dropped_link_error_or_filtered, 1432 0, mxge_handle_be32, 1433 "I", "dropped_link_error_or_filtered"); 1434 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1435 "dropped_link_overflow", 1436 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1437 0, mxge_handle_be32, 1438 "I", "dropped_link_overflow"); 1439 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1440 "dropped_multicast_filtered", 1441 CTLTYPE_INT|CTLFLAG_RD, 1442 &fw->dropped_multicast_filtered, 1443 0, mxge_handle_be32, 1444 "I", "dropped_multicast_filtered"); 1445 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1446 "dropped_no_big_buffer", 1447 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1448 0, mxge_handle_be32, 1449 "I", "dropped_no_big_buffer"); 1450 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1451 "dropped_no_small_buffer", 1452 CTLTYPE_INT|CTLFLAG_RD, 1453 &fw->dropped_no_small_buffer, 1454 0, mxge_handle_be32, 1455 "I", "dropped_no_small_buffer"); 1456 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1457 "dropped_overrun", 1458 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1459 0, mxge_handle_be32, 1460 "I", "dropped_overrun"); 1461 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1462 "dropped_pause", 1463 CTLTYPE_INT|CTLFLAG_RD, 1464 &fw->dropped_pause, 1465 0, mxge_handle_be32, 1466 "I", "dropped_pause"); 1467 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1468 "dropped_runt", 1469 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1470 0, mxge_handle_be32, 1471 "I", "dropped_runt"); 1472 1473 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1474 "dropped_unicast_filtered", 1475 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1476 0, mxge_handle_be32, 1477 "I", "dropped_unicast_filtered"); 1478 1479 /* host counters exported for debugging */ 1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1481 "rx_small_cnt", 1482 CTLFLAG_RD, &sc->rx_small.cnt, 1483 0, "rx_small_cnt"); 1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1485 "rx_big_cnt", 1486 CTLFLAG_RD, &sc->rx_big.cnt, 1487 0, "rx_small_cnt"); 1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1489 "tx_req", 1490 CTLFLAG_RD, &sc->tx.req, 1491 0, "tx_req"); 1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1493 "tx_done", 1494 CTLFLAG_RD, &sc->tx.done, 1495 0, "tx_done"); 1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1497 "tx_pkt_done", 1498 CTLFLAG_RD, &sc->tx.pkt_done, 1499 0, "tx_done"); 1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1501 "tx_stall", 1502 CTLFLAG_RD, &sc->tx.stall, 1503 0, "tx_stall"); 1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1505 "tx_wake", 1506 CTLFLAG_RD, &sc->tx.wake, 1507 0, "tx_wake"); 1508 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1509 "tx_defrag", 1510 CTLFLAG_RD, &sc->tx_defrag, 1511 0, "tx_defrag"); 1512 1513 /* verbose printing? */ 1514 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1515 "verbose", 1516 CTLFLAG_RW, &mxge_verbose, 1517 0, "verbose printing"); 1518 1519 /* lro */ 1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1521 "lro_cnt", 1522 CTLTYPE_INT|CTLFLAG_RW, sc, 1523 0, mxge_change_lro, 1524 "I", "number of lro merge queues"); 1525 1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1527 "lro_flushed", CTLFLAG_RD, &sc->lro_flushed, 1528 0, "number of lro merge queues flushed"); 1529 1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1531 "lro_queued", CTLFLAG_RD, &sc->lro_queued, 1532 0, "number of frames appended to lro merge queues"); 1533 1534 } 1535 1536 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1537 backwards one at a time and handle ring wraps */ 1538 1539 static inline void 1540 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1541 mcp_kreq_ether_send_t *src, int cnt) 1542 { 1543 int idx, starting_slot; 1544 starting_slot = tx->req; 1545 while (cnt > 1) { 1546 cnt--; 1547 idx = (starting_slot + cnt) & tx->mask; 1548 mxge_pio_copy(&tx->lanai[idx], 1549 &src[cnt], sizeof(*src)); 1550 mb(); 1551 } 1552 } 1553 1554 /* 1555 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1556 * at most 32 bytes at a time, so as to avoid involving the software 1557 * pio handler in the nic. We re-write the first segment's flags 1558 * to mark them valid only after writing the entire chain 1559 */ 1560 1561 static inline void 1562 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1563 int cnt) 1564 { 1565 int idx, i; 1566 uint32_t *src_ints; 1567 volatile uint32_t *dst_ints; 1568 mcp_kreq_ether_send_t *srcp; 1569 volatile mcp_kreq_ether_send_t *dstp, *dst; 1570 uint8_t last_flags; 1571 1572 idx = tx->req & tx->mask; 1573 1574 last_flags = src->flags; 1575 src->flags = 0; 1576 mb(); 1577 dst = dstp = &tx->lanai[idx]; 1578 srcp = src; 1579 1580 if ((idx + cnt) < tx->mask) { 1581 for (i = 0; i < (cnt - 1); i += 2) { 1582 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1583 mb(); /* force write every 32 bytes */ 1584 srcp += 2; 1585 dstp += 2; 1586 } 1587 } else { 1588 /* submit all but the first request, and ensure 1589 that it is submitted below */ 1590 mxge_submit_req_backwards(tx, src, cnt); 1591 i = 0; 1592 } 1593 if (i < cnt) { 1594 /* submit the first request */ 1595 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1596 mb(); /* barrier before setting valid flag */ 1597 } 1598 1599 /* re-write the last 32-bits with the valid flags */ 1600 src->flags = last_flags; 1601 src_ints = (uint32_t *)src; 1602 src_ints+=3; 1603 dst_ints = (volatile uint32_t *)dst; 1604 dst_ints+=3; 1605 *dst_ints = *src_ints; 1606 tx->req += cnt; 1607 mb(); 1608 } 1609 1610 static void 1611 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt, 1612 int ip_off) 1613 { 1614 mxge_tx_buf_t *tx; 1615 mcp_kreq_ether_send_t *req; 1616 bus_dma_segment_t *seg; 1617 struct ip *ip; 1618 struct tcphdr *tcp; 1619 uint32_t low, high_swapped; 1620 int len, seglen, cum_len, cum_len_next; 1621 int next_is_first, chop, cnt, rdma_count, small; 1622 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1623 uint8_t flags, flags_next; 1624 static int once; 1625 1626 mss = m->m_pkthdr.tso_segsz; 1627 1628 /* negative cum_len signifies to the 1629 * send loop that we are still in the 1630 * header portion of the TSO packet. 1631 */ 1632 1633 /* ensure we have the ethernet, IP and TCP 1634 header together in the first mbuf, copy 1635 it to a scratch buffer if not */ 1636 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1637 m_copydata(m, 0, ip_off + sizeof (*ip), 1638 sc->scratch); 1639 ip = (struct ip *)(sc->scratch + ip_off); 1640 } else { 1641 ip = (struct ip *)(mtod(m, char *) + ip_off); 1642 } 1643 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1644 + sizeof (*tcp))) { 1645 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1646 + sizeof (*tcp), sc->scratch); 1647 ip = (struct ip *)(mtod(m, char *) + ip_off); 1648 } 1649 1650 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1651 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1652 1653 /* TSO implies checksum offload on this hardware */ 1654 cksum_offset = ip_off + (ip->ip_hl << 2); 1655 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1656 1657 1658 /* for TSO, pseudo_hdr_offset holds mss. 1659 * The firmware figures out where to put 1660 * the checksum by parsing the header. */ 1661 pseudo_hdr_offset = htobe16(mss); 1662 1663 tx = &sc->tx; 1664 req = tx->req_list; 1665 seg = tx->seg_list; 1666 cnt = 0; 1667 rdma_count = 0; 1668 /* "rdma_count" is the number of RDMAs belonging to the 1669 * current packet BEFORE the current send request. For 1670 * non-TSO packets, this is equal to "count". 1671 * For TSO packets, rdma_count needs to be reset 1672 * to 0 after a segment cut. 1673 * 1674 * The rdma_count field of the send request is 1675 * the number of RDMAs of the packet starting at 1676 * that request. For TSO send requests with one ore more cuts 1677 * in the middle, this is the number of RDMAs starting 1678 * after the last cut in the request. All previous 1679 * segments before the last cut implicitly have 1 RDMA. 1680 * 1681 * Since the number of RDMAs is not known beforehand, 1682 * it must be filled-in retroactively - after each 1683 * segmentation cut or at the end of the entire packet. 1684 */ 1685 1686 while (busdma_seg_cnt) { 1687 /* Break the busdma segment up into pieces*/ 1688 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1689 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1690 len = seg->ds_len; 1691 1692 while (len) { 1693 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1694 seglen = len; 1695 cum_len_next = cum_len + seglen; 1696 (req-rdma_count)->rdma_count = rdma_count + 1; 1697 if (__predict_true(cum_len >= 0)) { 1698 /* payload */ 1699 chop = (cum_len_next > mss); 1700 cum_len_next = cum_len_next % mss; 1701 next_is_first = (cum_len_next == 0); 1702 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1703 flags_next |= next_is_first * 1704 MXGEFW_FLAGS_FIRST; 1705 rdma_count |= -(chop | next_is_first); 1706 rdma_count += chop & !next_is_first; 1707 } else if (cum_len_next >= 0) { 1708 /* header ends */ 1709 rdma_count = -1; 1710 cum_len_next = 0; 1711 seglen = -cum_len; 1712 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1713 flags_next = MXGEFW_FLAGS_TSO_PLD | 1714 MXGEFW_FLAGS_FIRST | 1715 (small * MXGEFW_FLAGS_SMALL); 1716 } 1717 1718 req->addr_high = high_swapped; 1719 req->addr_low = htobe32(low); 1720 req->pseudo_hdr_offset = pseudo_hdr_offset; 1721 req->pad = 0; 1722 req->rdma_count = 1; 1723 req->length = htobe16(seglen); 1724 req->cksum_offset = cksum_offset; 1725 req->flags = flags | ((cum_len & 1) * 1726 MXGEFW_FLAGS_ALIGN_ODD); 1727 low += seglen; 1728 len -= seglen; 1729 cum_len = cum_len_next; 1730 flags = flags_next; 1731 req++; 1732 cnt++; 1733 rdma_count++; 1734 if (__predict_false(cksum_offset > seglen)) 1735 cksum_offset -= seglen; 1736 else 1737 cksum_offset = 0; 1738 if (__predict_false(cnt > tx->max_desc)) 1739 goto drop; 1740 } 1741 busdma_seg_cnt--; 1742 seg++; 1743 } 1744 (req-rdma_count)->rdma_count = rdma_count; 1745 1746 do { 1747 req--; 1748 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1749 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1750 1751 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1752 mxge_submit_req(tx, tx->req_list, cnt); 1753 return; 1754 1755 drop: 1756 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1757 m_freem(m); 1758 sc->ifp->if_oerrors++; 1759 if (!once) { 1760 printf("tx->max_desc exceeded via TSO!\n"); 1761 printf("mss = %d, %ld, %d!\n", mss, 1762 (long)seg - (long)tx->seg_list, tx->max_desc); 1763 once = 1; 1764 } 1765 return; 1766 1767 } 1768 1769 /* 1770 * We reproduce the software vlan tag insertion from 1771 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1772 * vlan tag insertion. We need to advertise this in order to have the 1773 * vlan interface respect our csum offload flags. 1774 */ 1775 static struct mbuf * 1776 mxge_vlan_tag_insert(struct mbuf *m) 1777 { 1778 struct ether_vlan_header *evl; 1779 1780 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1781 if (__predict_false(m == NULL)) 1782 return NULL; 1783 if (m->m_len < sizeof(*evl)) { 1784 m = m_pullup(m, sizeof(*evl)); 1785 if (__predict_false(m == NULL)) 1786 return NULL; 1787 } 1788 /* 1789 * Transform the Ethernet header into an Ethernet header 1790 * with 802.1Q encapsulation. 1791 */ 1792 evl = mtod(m, struct ether_vlan_header *); 1793 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 1794 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 1795 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 1796 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 1797 m->m_flags &= ~M_VLANTAG; 1798 return m; 1799 } 1800 1801 static void 1802 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1803 { 1804 mcp_kreq_ether_send_t *req; 1805 bus_dma_segment_t *seg; 1806 struct mbuf *m_tmp; 1807 struct ifnet *ifp; 1808 mxge_tx_buf_t *tx; 1809 struct ip *ip; 1810 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 1811 uint16_t pseudo_hdr_offset; 1812 uint8_t flags, cksum_offset; 1813 1814 1815 1816 ifp = sc->ifp; 1817 tx = &sc->tx; 1818 1819 ip_off = sizeof (struct ether_header); 1820 if (m->m_flags & M_VLANTAG) { 1821 m = mxge_vlan_tag_insert(m); 1822 if (__predict_false(m == NULL)) 1823 goto drop; 1824 ip_off += ETHER_VLAN_ENCAP_LEN; 1825 } 1826 1827 /* (try to) map the frame for DMA */ 1828 idx = tx->req & tx->mask; 1829 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1830 m, tx->seg_list, &cnt, 1831 BUS_DMA_NOWAIT); 1832 if (__predict_false(err == EFBIG)) { 1833 /* Too many segments in the chain. Try 1834 to defrag */ 1835 m_tmp = m_defrag(m, M_NOWAIT); 1836 if (m_tmp == NULL) { 1837 goto drop; 1838 } 1839 sc->tx_defrag++; 1840 m = m_tmp; 1841 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1842 tx->info[idx].map, 1843 m, tx->seg_list, &cnt, 1844 BUS_DMA_NOWAIT); 1845 } 1846 if (__predict_false(err != 0)) { 1847 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1848 " packet len = %d\n", err, m->m_pkthdr.len); 1849 goto drop; 1850 } 1851 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1852 BUS_DMASYNC_PREWRITE); 1853 tx->info[idx].m = m; 1854 1855 1856 /* TSO is different enough, we handle it in another routine */ 1857 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1858 mxge_encap_tso(sc, m, cnt, ip_off); 1859 return; 1860 } 1861 1862 req = tx->req_list; 1863 cksum_offset = 0; 1864 pseudo_hdr_offset = 0; 1865 flags = MXGEFW_FLAGS_NO_TSO; 1866 1867 /* checksum offloading? */ 1868 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1869 /* ensure ip header is in first mbuf, copy 1870 it to a scratch buffer if not */ 1871 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1872 m_copydata(m, 0, ip_off + sizeof (*ip), 1873 sc->scratch); 1874 ip = (struct ip *)(sc->scratch + ip_off); 1875 } else { 1876 ip = (struct ip *)(mtod(m, char *) + ip_off); 1877 } 1878 cksum_offset = ip_off + (ip->ip_hl << 2); 1879 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1880 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1881 req->cksum_offset = cksum_offset; 1882 flags |= MXGEFW_FLAGS_CKSUM; 1883 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1884 } else { 1885 odd_flag = 0; 1886 } 1887 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1888 flags |= MXGEFW_FLAGS_SMALL; 1889 1890 /* convert segments into a request list */ 1891 cum_len = 0; 1892 seg = tx->seg_list; 1893 req->flags = MXGEFW_FLAGS_FIRST; 1894 for (i = 0; i < cnt; i++) { 1895 req->addr_low = 1896 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1897 req->addr_high = 1898 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1899 req->length = htobe16(seg->ds_len); 1900 req->cksum_offset = cksum_offset; 1901 if (cksum_offset > seg->ds_len) 1902 cksum_offset -= seg->ds_len; 1903 else 1904 cksum_offset = 0; 1905 req->pseudo_hdr_offset = pseudo_hdr_offset; 1906 req->pad = 0; /* complete solid 16-byte block */ 1907 req->rdma_count = 1; 1908 req->flags |= flags | ((cum_len & 1) * odd_flag); 1909 cum_len += seg->ds_len; 1910 seg++; 1911 req++; 1912 req->flags = 0; 1913 } 1914 req--; 1915 /* pad runts to 60 bytes */ 1916 if (cum_len < 60) { 1917 req++; 1918 req->addr_low = 1919 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1920 req->addr_high = 1921 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1922 req->length = htobe16(60 - cum_len); 1923 req->cksum_offset = 0; 1924 req->pseudo_hdr_offset = pseudo_hdr_offset; 1925 req->pad = 0; /* complete solid 16-byte block */ 1926 req->rdma_count = 1; 1927 req->flags |= flags | ((cum_len & 1) * odd_flag); 1928 cnt++; 1929 } 1930 1931 tx->req_list[0].rdma_count = cnt; 1932 #if 0 1933 /* print what the firmware will see */ 1934 for (i = 0; i < cnt; i++) { 1935 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1936 "cso:%d, flags:0x%x, rdma:%d\n", 1937 i, (int)ntohl(tx->req_list[i].addr_high), 1938 (int)ntohl(tx->req_list[i].addr_low), 1939 (int)ntohs(tx->req_list[i].length), 1940 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1941 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1942 tx->req_list[i].rdma_count); 1943 } 1944 printf("--------------\n"); 1945 #endif 1946 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1947 mxge_submit_req(tx, tx->req_list, cnt); 1948 return; 1949 1950 drop: 1951 m_freem(m); 1952 ifp->if_oerrors++; 1953 return; 1954 } 1955 1956 1957 1958 1959 static inline void 1960 mxge_start_locked(mxge_softc_t *sc) 1961 { 1962 struct mbuf *m; 1963 struct ifnet *ifp; 1964 mxge_tx_buf_t *tx; 1965 1966 ifp = sc->ifp; 1967 tx = &sc->tx; 1968 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 1969 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1970 if (m == NULL) { 1971 return; 1972 } 1973 /* let BPF see it */ 1974 BPF_MTAP(ifp, m); 1975 1976 /* give it to the nic */ 1977 mxge_encap(sc, m); 1978 } 1979 /* ran out of transmit slots */ 1980 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1981 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1982 tx->stall++; 1983 } 1984 } 1985 1986 static void 1987 mxge_start(struct ifnet *ifp) 1988 { 1989 mxge_softc_t *sc = ifp->if_softc; 1990 1991 1992 mtx_lock(&sc->tx_mtx); 1993 mxge_start_locked(sc); 1994 mtx_unlock(&sc->tx_mtx); 1995 } 1996 1997 /* 1998 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1999 * at most 32 bytes at a time, so as to avoid involving the software 2000 * pio handler in the nic. We re-write the first segment's low 2001 * DMA address to mark it valid only after we write the entire chunk 2002 * in a burst 2003 */ 2004 static inline void 2005 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2006 mcp_kreq_ether_recv_t *src) 2007 { 2008 uint32_t low; 2009 2010 low = src->addr_low; 2011 src->addr_low = 0xffffffff; 2012 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2013 mb(); 2014 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2015 mb(); 2016 src->addr_low = low; 2017 dst->addr_low = low; 2018 mb(); 2019 } 2020 2021 static int 2022 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 2023 { 2024 bus_dma_segment_t seg; 2025 struct mbuf *m; 2026 mxge_rx_buf_t *rx = &sc->rx_small; 2027 int cnt, err; 2028 2029 m = m_gethdr(M_DONTWAIT, MT_DATA); 2030 if (m == NULL) { 2031 rx->alloc_fail++; 2032 err = ENOBUFS; 2033 goto done; 2034 } 2035 m->m_len = MHLEN; 2036 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2037 &seg, &cnt, BUS_DMA_NOWAIT); 2038 if (err != 0) { 2039 m_free(m); 2040 goto done; 2041 } 2042 rx->info[idx].m = m; 2043 rx->shadow[idx].addr_low = 2044 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2045 rx->shadow[idx].addr_high = 2046 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2047 2048 done: 2049 if ((idx & 7) == 7) 2050 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2051 return err; 2052 } 2053 2054 static int 2055 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 2056 { 2057 bus_dma_segment_t seg[3]; 2058 struct mbuf *m; 2059 mxge_rx_buf_t *rx = &sc->rx_big; 2060 int cnt, err, i; 2061 2062 if (rx->cl_size == MCLBYTES) 2063 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2064 else 2065 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2066 if (m == NULL) { 2067 rx->alloc_fail++; 2068 err = ENOBUFS; 2069 goto done; 2070 } 2071 m->m_len = rx->cl_size; 2072 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2073 seg, &cnt, BUS_DMA_NOWAIT); 2074 if (err != 0) { 2075 m_free(m); 2076 goto done; 2077 } 2078 rx->info[idx].m = m; 2079 2080 for (i = 0; i < cnt; i++) { 2081 rx->shadow[idx + i].addr_low = 2082 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2083 rx->shadow[idx + i].addr_high = 2084 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2085 } 2086 2087 2088 done: 2089 for (i = 0; i < rx->nbufs; i++) { 2090 if ((idx & 7) == 7) { 2091 mxge_submit_8rx(&rx->lanai[idx - 7], 2092 &rx->shadow[idx - 7]); 2093 } 2094 idx++; 2095 } 2096 return err; 2097 } 2098 2099 /* 2100 * Myri10GE hardware checksums are not valid if the sender 2101 * padded the frame with non-zero padding. This is because 2102 * the firmware just does a simple 16-bit 1s complement 2103 * checksum across the entire frame, excluding the first 14 2104 * bytes. It is best to simply to check the checksum and 2105 * tell the stack about it only if the checksum is good 2106 */ 2107 2108 static inline uint16_t 2109 mxge_rx_csum(struct mbuf *m, int csum) 2110 { 2111 struct ether_header *eh; 2112 struct ip *ip; 2113 uint16_t c; 2114 2115 eh = mtod(m, struct ether_header *); 2116 2117 /* only deal with IPv4 TCP & UDP for now */ 2118 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2119 return 1; 2120 ip = (struct ip *)(eh + 1); 2121 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2122 ip->ip_p != IPPROTO_UDP)) 2123 return 1; 2124 2125 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2126 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2127 - (ip->ip_hl << 2) + ip->ip_p)); 2128 c ^= 0xffff; 2129 return (c); 2130 } 2131 2132 static void 2133 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2134 { 2135 struct ether_vlan_header *evl; 2136 struct ether_header *eh; 2137 uint32_t partial; 2138 2139 evl = mtod(m, struct ether_vlan_header *); 2140 eh = mtod(m, struct ether_header *); 2141 2142 /* 2143 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2144 * after what the firmware thought was the end of the ethernet 2145 * header. 2146 */ 2147 2148 /* put checksum into host byte order */ 2149 *csum = ntohs(*csum); 2150 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2151 (*csum) += ~partial; 2152 (*csum) += ((*csum) < ~partial); 2153 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2154 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2155 2156 /* restore checksum to network byte order; 2157 later consumers expect this */ 2158 *csum = htons(*csum); 2159 2160 /* save the tag */ 2161 m->m_flags |= M_VLANTAG; 2162 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2163 2164 /* 2165 * Remove the 802.1q header by copying the Ethernet 2166 * addresses over it and adjusting the beginning of 2167 * the data in the mbuf. The encapsulated Ethernet 2168 * type field is already in place. 2169 */ 2170 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2171 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2172 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2173 } 2174 2175 2176 static inline void 2177 mxge_rx_done_big(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2178 { 2179 struct ifnet *ifp; 2180 struct mbuf *m; 2181 struct ether_header *eh; 2182 mxge_rx_buf_t *rx; 2183 bus_dmamap_t old_map; 2184 int idx; 2185 uint16_t tcpudp_csum; 2186 2187 ifp = sc->ifp; 2188 rx = &sc->rx_big; 2189 idx = rx->cnt & rx->mask; 2190 rx->cnt += rx->nbufs; 2191 /* save a pointer to the received mbuf */ 2192 m = rx->info[idx].m; 2193 /* try to replace the received mbuf */ 2194 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 2195 /* drop the frame -- the old mbuf is re-cycled */ 2196 ifp->if_ierrors++; 2197 return; 2198 } 2199 2200 /* unmap the received buffer */ 2201 old_map = rx->info[idx].map; 2202 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2203 bus_dmamap_unload(rx->dmat, old_map); 2204 2205 /* swap the bus_dmamap_t's */ 2206 rx->info[idx].map = rx->extra_map; 2207 rx->extra_map = old_map; 2208 2209 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2210 * aligned */ 2211 m->m_data += MXGEFW_PAD; 2212 2213 m->m_pkthdr.rcvif = ifp; 2214 m->m_len = m->m_pkthdr.len = len; 2215 ifp->if_ipackets++; 2216 eh = mtod(m, struct ether_header *); 2217 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2218 mxge_vlan_tag_remove(m, &csum); 2219 } 2220 /* if the checksum is valid, mark it in the mbuf header */ 2221 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2222 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2223 return; 2224 /* otherwise, it was a UDP frame, or a TCP frame which 2225 we could not do LRO on. Tell the stack that the 2226 checksum is good */ 2227 m->m_pkthdr.csum_data = 0xffff; 2228 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2229 } 2230 /* pass the frame up the stack */ 2231 (*ifp->if_input)(ifp, m); 2232 } 2233 2234 static inline void 2235 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 2236 { 2237 struct ifnet *ifp; 2238 struct ether_header *eh; 2239 struct mbuf *m; 2240 mxge_rx_buf_t *rx; 2241 bus_dmamap_t old_map; 2242 int idx; 2243 uint16_t tcpudp_csum; 2244 2245 ifp = sc->ifp; 2246 rx = &sc->rx_small; 2247 idx = rx->cnt & rx->mask; 2248 rx->cnt++; 2249 /* save a pointer to the received mbuf */ 2250 m = rx->info[idx].m; 2251 /* try to replace the received mbuf */ 2252 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 2253 /* drop the frame -- the old mbuf is re-cycled */ 2254 ifp->if_ierrors++; 2255 return; 2256 } 2257 2258 /* unmap the received buffer */ 2259 old_map = rx->info[idx].map; 2260 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2261 bus_dmamap_unload(rx->dmat, old_map); 2262 2263 /* swap the bus_dmamap_t's */ 2264 rx->info[idx].map = rx->extra_map; 2265 rx->extra_map = old_map; 2266 2267 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2268 * aligned */ 2269 m->m_data += MXGEFW_PAD; 2270 2271 m->m_pkthdr.rcvif = ifp; 2272 m->m_len = m->m_pkthdr.len = len; 2273 ifp->if_ipackets++; 2274 eh = mtod(m, struct ether_header *); 2275 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2276 mxge_vlan_tag_remove(m, &csum); 2277 } 2278 /* if the checksum is valid, mark it in the mbuf header */ 2279 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2280 if (sc->lro_cnt && (0 == mxge_lro_rx(sc, m, csum))) 2281 return; 2282 /* otherwise, it was a UDP frame, or a TCP frame which 2283 we could not do LRO on. Tell the stack that the 2284 checksum is good */ 2285 m->m_pkthdr.csum_data = 0xffff; 2286 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2287 } 2288 2289 /* pass the frame up the stack */ 2290 (*ifp->if_input)(ifp, m); 2291 } 2292 2293 static inline void 2294 mxge_clean_rx_done(mxge_softc_t *sc) 2295 { 2296 mxge_rx_done_t *rx_done = &sc->rx_done; 2297 struct lro_entry *lro; 2298 int limit = 0; 2299 uint16_t length; 2300 uint16_t checksum; 2301 2302 2303 while (rx_done->entry[rx_done->idx].length != 0) { 2304 length = ntohs(rx_done->entry[rx_done->idx].length); 2305 rx_done->entry[rx_done->idx].length = 0; 2306 checksum = rx_done->entry[rx_done->idx].checksum; 2307 if (length <= (MHLEN - MXGEFW_PAD)) 2308 mxge_rx_done_small(sc, length, checksum); 2309 else 2310 mxge_rx_done_big(sc, length, checksum); 2311 rx_done->cnt++; 2312 rx_done->idx = rx_done->cnt & rx_done->mask; 2313 2314 /* limit potential for livelock */ 2315 if (__predict_false(++limit > rx_done->mask / 2)) 2316 break; 2317 } 2318 while(!SLIST_EMPTY(&sc->lro_active)) { 2319 lro = SLIST_FIRST(&sc->lro_active); 2320 SLIST_REMOVE_HEAD(&sc->lro_active, next); 2321 mxge_lro_flush(sc, lro); 2322 } 2323 } 2324 2325 2326 static inline void 2327 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2328 { 2329 struct ifnet *ifp; 2330 mxge_tx_buf_t *tx; 2331 struct mbuf *m; 2332 bus_dmamap_t map; 2333 int idx; 2334 2335 tx = &sc->tx; 2336 ifp = sc->ifp; 2337 while (tx->pkt_done != mcp_idx) { 2338 idx = tx->done & tx->mask; 2339 tx->done++; 2340 m = tx->info[idx].m; 2341 /* mbuf and DMA map only attached to the first 2342 segment per-mbuf */ 2343 if (m != NULL) { 2344 ifp->if_opackets++; 2345 tx->info[idx].m = NULL; 2346 map = tx->info[idx].map; 2347 bus_dmamap_unload(tx->dmat, map); 2348 m_freem(m); 2349 } 2350 if (tx->info[idx].flag) { 2351 tx->info[idx].flag = 0; 2352 tx->pkt_done++; 2353 } 2354 } 2355 2356 /* If we have space, clear IFF_OACTIVE to tell the stack that 2357 its OK to send packets */ 2358 2359 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2360 tx->req - tx->done < (tx->mask + 1)/4) { 2361 mtx_lock(&sc->tx_mtx); 2362 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2363 sc->tx.wake++; 2364 mxge_start_locked(sc); 2365 mtx_unlock(&sc->tx_mtx); 2366 } 2367 } 2368 2369 static void 2370 mxge_intr(void *arg) 2371 { 2372 mxge_softc_t *sc = arg; 2373 mcp_irq_data_t *stats = sc->fw_stats; 2374 mxge_tx_buf_t *tx = &sc->tx; 2375 mxge_rx_done_t *rx_done = &sc->rx_done; 2376 uint32_t send_done_count; 2377 uint8_t valid; 2378 2379 2380 /* make sure the DMA has finished */ 2381 if (!stats->valid) { 2382 return; 2383 } 2384 valid = stats->valid; 2385 2386 if (!sc->msi_enabled) { 2387 /* lower legacy IRQ */ 2388 *sc->irq_deassert = 0; 2389 if (!mxge_deassert_wait) 2390 /* don't wait for conf. that irq is low */ 2391 stats->valid = 0; 2392 } else { 2393 stats->valid = 0; 2394 } 2395 2396 /* loop while waiting for legacy irq deassertion */ 2397 do { 2398 /* check for transmit completes and receives */ 2399 send_done_count = be32toh(stats->send_done_count); 2400 while ((send_done_count != tx->pkt_done) || 2401 (rx_done->entry[rx_done->idx].length != 0)) { 2402 mxge_tx_done(sc, (int)send_done_count); 2403 mxge_clean_rx_done(sc); 2404 send_done_count = be32toh(stats->send_done_count); 2405 } 2406 } while (*((volatile uint8_t *) &stats->valid)); 2407 2408 if (__predict_false(stats->stats_updated)) { 2409 if (sc->link_state != stats->link_up) { 2410 sc->link_state = stats->link_up; 2411 if (sc->link_state) { 2412 if_link_state_change(sc->ifp, LINK_STATE_UP); 2413 if (mxge_verbose) 2414 device_printf(sc->dev, "link up\n"); 2415 } else { 2416 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2417 if (mxge_verbose) 2418 device_printf(sc->dev, "link down\n"); 2419 } 2420 } 2421 if (sc->rdma_tags_available != 2422 be32toh(sc->fw_stats->rdma_tags_available)) { 2423 sc->rdma_tags_available = 2424 be32toh(sc->fw_stats->rdma_tags_available); 2425 device_printf(sc->dev, "RDMA timed out! %d tags " 2426 "left\n", sc->rdma_tags_available); 2427 } 2428 sc->down_cnt += stats->link_down; 2429 } 2430 2431 /* check to see if we have rx token to pass back */ 2432 if (valid & 0x1) 2433 *sc->irq_claim = be32toh(3); 2434 *(sc->irq_claim + 1) = be32toh(3); 2435 } 2436 2437 static void 2438 mxge_init(void *arg) 2439 { 2440 } 2441 2442 2443 2444 static void 2445 mxge_free_mbufs(mxge_softc_t *sc) 2446 { 2447 int i; 2448 2449 for (i = 0; i <= sc->rx_big.mask; i++) { 2450 if (sc->rx_big.info[i].m == NULL) 2451 continue; 2452 bus_dmamap_unload(sc->rx_big.dmat, 2453 sc->rx_big.info[i].map); 2454 m_freem(sc->rx_big.info[i].m); 2455 sc->rx_big.info[i].m = NULL; 2456 } 2457 2458 for (i = 0; i <= sc->rx_small.mask; i++) { 2459 if (sc->rx_small.info[i].m == NULL) 2460 continue; 2461 bus_dmamap_unload(sc->rx_small.dmat, 2462 sc->rx_small.info[i].map); 2463 m_freem(sc->rx_small.info[i].m); 2464 sc->rx_small.info[i].m = NULL; 2465 } 2466 2467 for (i = 0; i <= sc->tx.mask; i++) { 2468 sc->tx.info[i].flag = 0; 2469 if (sc->tx.info[i].m == NULL) 2470 continue; 2471 bus_dmamap_unload(sc->tx.dmat, 2472 sc->tx.info[i].map); 2473 m_freem(sc->tx.info[i].m); 2474 sc->tx.info[i].m = NULL; 2475 } 2476 } 2477 2478 static void 2479 mxge_free_rings(mxge_softc_t *sc) 2480 { 2481 int i; 2482 2483 if (sc->rx_done.entry != NULL) 2484 mxge_dma_free(&sc->rx_done.dma); 2485 sc->rx_done.entry = NULL; 2486 if (sc->tx.req_bytes != NULL) 2487 free(sc->tx.req_bytes, M_DEVBUF); 2488 if (sc->tx.seg_list != NULL) 2489 free(sc->tx.seg_list, M_DEVBUF); 2490 if (sc->rx_small.shadow != NULL) 2491 free(sc->rx_small.shadow, M_DEVBUF); 2492 if (sc->rx_big.shadow != NULL) 2493 free(sc->rx_big.shadow, M_DEVBUF); 2494 if (sc->tx.info != NULL) { 2495 if (sc->tx.dmat != NULL) { 2496 for (i = 0; i <= sc->tx.mask; i++) { 2497 bus_dmamap_destroy(sc->tx.dmat, 2498 sc->tx.info[i].map); 2499 } 2500 bus_dma_tag_destroy(sc->tx.dmat); 2501 } 2502 free(sc->tx.info, M_DEVBUF); 2503 } 2504 if (sc->rx_small.info != NULL) { 2505 if (sc->rx_small.dmat != NULL) { 2506 for (i = 0; i <= sc->rx_small.mask; i++) { 2507 bus_dmamap_destroy(sc->rx_small.dmat, 2508 sc->rx_small.info[i].map); 2509 } 2510 bus_dmamap_destroy(sc->rx_small.dmat, 2511 sc->rx_small.extra_map); 2512 bus_dma_tag_destroy(sc->rx_small.dmat); 2513 } 2514 free(sc->rx_small.info, M_DEVBUF); 2515 } 2516 if (sc->rx_big.info != NULL) { 2517 if (sc->rx_big.dmat != NULL) { 2518 for (i = 0; i <= sc->rx_big.mask; i++) { 2519 bus_dmamap_destroy(sc->rx_big.dmat, 2520 sc->rx_big.info[i].map); 2521 } 2522 bus_dmamap_destroy(sc->rx_big.dmat, 2523 sc->rx_big.extra_map); 2524 bus_dma_tag_destroy(sc->rx_big.dmat); 2525 } 2526 free(sc->rx_big.info, M_DEVBUF); 2527 } 2528 } 2529 2530 static int 2531 mxge_alloc_rings(mxge_softc_t *sc) 2532 { 2533 mxge_cmd_t cmd; 2534 int tx_ring_size, rx_ring_size; 2535 int tx_ring_entries, rx_ring_entries; 2536 int i, err; 2537 unsigned long bytes; 2538 2539 /* get ring sizes */ 2540 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2541 tx_ring_size = cmd.data0; 2542 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2543 if (err != 0) { 2544 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2545 goto abort_with_nothing; 2546 } 2547 2548 rx_ring_size = cmd.data0; 2549 2550 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2551 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2552 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2553 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2554 IFQ_SET_READY(&sc->ifp->if_snd); 2555 2556 sc->tx.mask = tx_ring_entries - 1; 2557 sc->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 2558 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2559 sc->rx_done.mask = (2 * rx_ring_entries) - 1; 2560 2561 err = ENOMEM; 2562 2563 /* allocate interrupt queues */ 2564 bytes = (sc->rx_done.mask + 1) * sizeof (*sc->rx_done.entry); 2565 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2566 if (err != 0) 2567 goto abort_with_nothing; 2568 sc->rx_done.entry = sc->rx_done.dma.addr; 2569 bzero(sc->rx_done.entry, bytes); 2570 2571 /* allocate the tx request copy block */ 2572 bytes = 8 + 2573 sizeof (*sc->tx.req_list) * (sc->tx.max_desc + 4); 2574 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2575 if (sc->tx.req_bytes == NULL) 2576 goto abort_with_alloc; 2577 /* ensure req_list entries are aligned to 8 bytes */ 2578 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2579 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2580 2581 /* allocate the tx busdma segment list */ 2582 bytes = sizeof (*sc->tx.seg_list) * sc->tx.max_desc; 2583 sc->tx.seg_list = (bus_dma_segment_t *) 2584 malloc(bytes, M_DEVBUF, M_WAITOK); 2585 if (sc->tx.seg_list == NULL) 2586 goto abort_with_alloc; 2587 2588 /* allocate the rx shadow rings */ 2589 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2590 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2591 if (sc->rx_small.shadow == NULL) 2592 goto abort_with_alloc; 2593 2594 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2595 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2596 if (sc->rx_big.shadow == NULL) 2597 goto abort_with_alloc; 2598 2599 /* allocate the host info rings */ 2600 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2601 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2602 if (sc->tx.info == NULL) 2603 goto abort_with_alloc; 2604 2605 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2606 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2607 if (sc->rx_small.info == NULL) 2608 goto abort_with_alloc; 2609 2610 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2611 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2612 if (sc->rx_big.info == NULL) 2613 goto abort_with_alloc; 2614 2615 /* allocate the busdma resources */ 2616 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2617 1, /* alignment */ 2618 sc->tx.boundary, /* boundary */ 2619 BUS_SPACE_MAXADDR, /* low */ 2620 BUS_SPACE_MAXADDR, /* high */ 2621 NULL, NULL, /* filter */ 2622 65536 + 256, /* maxsize */ 2623 sc->tx.max_desc - 2, /* num segs */ 2624 sc->tx.boundary, /* maxsegsize */ 2625 BUS_DMA_ALLOCNOW, /* flags */ 2626 NULL, NULL, /* lock */ 2627 &sc->tx.dmat); /* tag */ 2628 2629 if (err != 0) { 2630 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2631 err); 2632 goto abort_with_alloc; 2633 } 2634 2635 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2636 1, /* alignment */ 2637 4096, /* boundary */ 2638 BUS_SPACE_MAXADDR, /* low */ 2639 BUS_SPACE_MAXADDR, /* high */ 2640 NULL, NULL, /* filter */ 2641 MHLEN, /* maxsize */ 2642 1, /* num segs */ 2643 MHLEN, /* maxsegsize */ 2644 BUS_DMA_ALLOCNOW, /* flags */ 2645 NULL, NULL, /* lock */ 2646 &sc->rx_small.dmat); /* tag */ 2647 if (err != 0) { 2648 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2649 err); 2650 goto abort_with_alloc; 2651 } 2652 2653 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2654 1, /* alignment */ 2655 4096, /* boundary */ 2656 BUS_SPACE_MAXADDR, /* low */ 2657 BUS_SPACE_MAXADDR, /* high */ 2658 NULL, NULL, /* filter */ 2659 3*4096, /* maxsize */ 2660 3, /* num segs */ 2661 4096, /* maxsegsize */ 2662 BUS_DMA_ALLOCNOW, /* flags */ 2663 NULL, NULL, /* lock */ 2664 &sc->rx_big.dmat); /* tag */ 2665 if (err != 0) { 2666 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2667 err); 2668 goto abort_with_alloc; 2669 } 2670 2671 /* now use these tags to setup dmamaps for each slot 2672 in each ring */ 2673 for (i = 0; i <= sc->tx.mask; i++) { 2674 err = bus_dmamap_create(sc->tx.dmat, 0, 2675 &sc->tx.info[i].map); 2676 if (err != 0) { 2677 device_printf(sc->dev, "Err %d tx dmamap\n", 2678 err); 2679 goto abort_with_alloc; 2680 } 2681 } 2682 for (i = 0; i <= sc->rx_small.mask; i++) { 2683 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2684 &sc->rx_small.info[i].map); 2685 if (err != 0) { 2686 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2687 err); 2688 goto abort_with_alloc; 2689 } 2690 } 2691 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2692 &sc->rx_small.extra_map); 2693 if (err != 0) { 2694 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2695 err); 2696 goto abort_with_alloc; 2697 } 2698 2699 for (i = 0; i <= sc->rx_big.mask; i++) { 2700 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2701 &sc->rx_big.info[i].map); 2702 if (err != 0) { 2703 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2704 err); 2705 goto abort_with_alloc; 2706 } 2707 } 2708 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2709 &sc->rx_big.extra_map); 2710 if (err != 0) { 2711 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2712 err); 2713 goto abort_with_alloc; 2714 } 2715 return 0; 2716 2717 abort_with_alloc: 2718 mxge_free_rings(sc); 2719 2720 abort_with_nothing: 2721 return err; 2722 } 2723 2724 static void 2725 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 2726 { 2727 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 2728 2729 if (bufsize < MCLBYTES) { 2730 /* easy, everything fits in a single buffer */ 2731 *big_buf_size = MCLBYTES; 2732 *cl_size = MCLBYTES; 2733 *nbufs = 1; 2734 return; 2735 } 2736 2737 if (bufsize < MJUMPAGESIZE) { 2738 /* still easy, everything still fits in a single buffer */ 2739 *big_buf_size = MJUMPAGESIZE; 2740 *cl_size = MJUMPAGESIZE; 2741 *nbufs = 1; 2742 return; 2743 } 2744 /* now we need to use virtually contiguous buffers */ 2745 *cl_size = MJUM9BYTES; 2746 *big_buf_size = 4096; 2747 *nbufs = mtu / 4096 + 1; 2748 /* needs to be a power of two, so round up */ 2749 if (*nbufs == 3) 2750 *nbufs = 4; 2751 } 2752 2753 static int 2754 mxge_open(mxge_softc_t *sc) 2755 { 2756 mxge_cmd_t cmd; 2757 int i, err, big_bytes; 2758 bus_dmamap_t map; 2759 bus_addr_t bus; 2760 struct lro_entry *lro_entry; 2761 2762 SLIST_INIT(&sc->lro_free); 2763 SLIST_INIT(&sc->lro_active); 2764 2765 for (i = 0; i < sc->lro_cnt; i++) { 2766 lro_entry = (struct lro_entry *) 2767 malloc(sizeof (*lro_entry), M_DEVBUF, M_NOWAIT | M_ZERO); 2768 if (lro_entry == NULL) { 2769 sc->lro_cnt = i; 2770 break; 2771 } 2772 SLIST_INSERT_HEAD(&sc->lro_free, lro_entry, next); 2773 } 2774 2775 /* Copy the MAC address in case it was overridden */ 2776 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2777 2778 err = mxge_reset(sc, 1); 2779 if (err != 0) { 2780 device_printf(sc->dev, "failed to reset\n"); 2781 return EIO; 2782 } 2783 2784 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, 2785 &sc->rx_big.cl_size, &sc->rx_big.nbufs); 2786 2787 cmd.data0 = sc->rx_big.nbufs; 2788 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 2789 &cmd); 2790 /* error is only meaningful if we're trying to set 2791 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 2792 if (err && sc->rx_big.nbufs > 1) { 2793 device_printf(sc->dev, 2794 "Failed to set alway-use-n to %d\n", 2795 sc->rx_big.nbufs); 2796 return EIO; 2797 } 2798 /* get the lanai pointers to the send and receive rings */ 2799 2800 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2801 sc->tx.lanai = 2802 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2803 err |= mxge_send_cmd(sc, 2804 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2805 sc->rx_small.lanai = 2806 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2807 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2808 sc->rx_big.lanai = 2809 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2810 2811 if (err != 0) { 2812 device_printf(sc->dev, 2813 "failed to get ring sizes or locations\n"); 2814 return EIO; 2815 } 2816 2817 /* stock receive rings */ 2818 for (i = 0; i <= sc->rx_small.mask; i++) { 2819 map = sc->rx_small.info[i].map; 2820 err = mxge_get_buf_small(sc, map, i); 2821 if (err) { 2822 device_printf(sc->dev, "alloced %d/%d smalls\n", 2823 i, sc->rx_small.mask + 1); 2824 goto abort; 2825 } 2826 } 2827 for (i = 0; i <= sc->rx_big.mask; i++) { 2828 sc->rx_big.shadow[i].addr_low = 0xffffffff; 2829 sc->rx_big.shadow[i].addr_high = 0xffffffff; 2830 } 2831 for (i = 0; i <= sc->rx_big.mask; i += sc->rx_big.nbufs) { 2832 map = sc->rx_big.info[i].map; 2833 err = mxge_get_buf_big(sc, map, i); 2834 if (err) { 2835 device_printf(sc->dev, "alloced %d/%d bigs\n", 2836 i, sc->rx_big.mask + 1); 2837 goto abort; 2838 } 2839 } 2840 2841 /* Give the firmware the mtu and the big and small buffer 2842 sizes. The firmware wants the big buf size to be a power 2843 of two. Luckily, FreeBSD's clusters are powers of two */ 2844 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 2845 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2846 cmd.data0 = MHLEN - MXGEFW_PAD; 2847 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2848 &cmd); 2849 cmd.data0 = big_bytes; 2850 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2851 2852 if (err != 0) { 2853 device_printf(sc->dev, "failed to setup params\n"); 2854 goto abort; 2855 } 2856 2857 /* Now give him the pointer to the stats block */ 2858 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2859 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2860 cmd.data2 = sizeof(struct mcp_irq_data); 2861 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2862 2863 if (err != 0) { 2864 bus = sc->fw_stats_dma.bus_addr; 2865 bus += offsetof(struct mcp_irq_data, send_done_count); 2866 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2867 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2868 err = mxge_send_cmd(sc, 2869 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2870 &cmd); 2871 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2872 sc->fw_multicast_support = 0; 2873 } else { 2874 sc->fw_multicast_support = 1; 2875 } 2876 2877 if (err != 0) { 2878 device_printf(sc->dev, "failed to setup params\n"); 2879 goto abort; 2880 } 2881 2882 /* Finally, start the firmware running */ 2883 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2884 if (err) { 2885 device_printf(sc->dev, "Couldn't bring up link\n"); 2886 goto abort; 2887 } 2888 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2889 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2890 2891 return 0; 2892 2893 2894 abort: 2895 mxge_free_mbufs(sc); 2896 2897 return err; 2898 } 2899 2900 static int 2901 mxge_close(mxge_softc_t *sc) 2902 { 2903 struct lro_entry *lro_entry; 2904 mxge_cmd_t cmd; 2905 int err, old_down_cnt; 2906 2907 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2908 old_down_cnt = sc->down_cnt; 2909 mb(); 2910 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2911 if (err) { 2912 device_printf(sc->dev, "Couldn't bring down link\n"); 2913 } 2914 if (old_down_cnt == sc->down_cnt) { 2915 /* wait for down irq */ 2916 DELAY(10 * sc->intr_coal_delay); 2917 } 2918 if (old_down_cnt == sc->down_cnt) { 2919 device_printf(sc->dev, "never got down irq\n"); 2920 } 2921 2922 mxge_free_mbufs(sc); 2923 2924 while (!SLIST_EMPTY(&sc->lro_free)) { 2925 lro_entry = SLIST_FIRST(&sc->lro_free); 2926 SLIST_REMOVE_HEAD(&sc->lro_free, next); 2927 } 2928 return 0; 2929 } 2930 2931 static void 2932 mxge_setup_cfg_space(mxge_softc_t *sc) 2933 { 2934 device_t dev = sc->dev; 2935 int reg; 2936 uint16_t cmd, lnk, pectl; 2937 2938 /* find the PCIe link width and set max read request to 4KB*/ 2939 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2940 lnk = pci_read_config(dev, reg + 0x12, 2); 2941 sc->link_width = (lnk >> 4) & 0x3f; 2942 2943 pectl = pci_read_config(dev, reg + 0x8, 2); 2944 pectl = (pectl & ~0x7000) | (5 << 12); 2945 pci_write_config(dev, reg + 0x8, pectl, 2); 2946 } 2947 2948 /* Enable DMA and Memory space access */ 2949 pci_enable_busmaster(dev); 2950 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2951 cmd |= PCIM_CMD_MEMEN; 2952 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2953 } 2954 2955 static uint32_t 2956 mxge_read_reboot(mxge_softc_t *sc) 2957 { 2958 device_t dev = sc->dev; 2959 uint32_t vs; 2960 2961 /* find the vendor specific offset */ 2962 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2963 device_printf(sc->dev, 2964 "could not find vendor specific offset\n"); 2965 return (uint32_t)-1; 2966 } 2967 /* enable read32 mode */ 2968 pci_write_config(dev, vs + 0x10, 0x3, 1); 2969 /* tell NIC which register to read */ 2970 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2971 return (pci_read_config(dev, vs + 0x14, 4)); 2972 } 2973 2974 static void 2975 mxge_watchdog_reset(mxge_softc_t *sc) 2976 { 2977 int err; 2978 uint32_t reboot; 2979 uint16_t cmd; 2980 2981 err = ENXIO; 2982 2983 device_printf(sc->dev, "Watchdog reset!\n"); 2984 2985 /* 2986 * check to see if the NIC rebooted. If it did, then all of 2987 * PCI config space has been reset, and things like the 2988 * busmaster bit will be zero. If this is the case, then we 2989 * must restore PCI config space before the NIC can be used 2990 * again 2991 */ 2992 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2993 if (cmd == 0xffff) { 2994 /* 2995 * maybe the watchdog caught the NIC rebooting; wait 2996 * up to 100ms for it to finish. If it does not come 2997 * back, then give up 2998 */ 2999 DELAY(1000*100); 3000 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3001 if (cmd == 0xffff) { 3002 device_printf(sc->dev, "NIC disappeared!\n"); 3003 goto abort; 3004 } 3005 } 3006 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3007 /* print the reboot status */ 3008 reboot = mxge_read_reboot(sc); 3009 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3010 reboot); 3011 /* restore PCI configuration space */ 3012 3013 /* XXXX waiting for pci_cfg_restore() to be exported */ 3014 goto abort; /* just abort for now */ 3015 3016 /* and redo any changes we made to our config space */ 3017 mxge_setup_cfg_space(sc); 3018 } else { 3019 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 3020 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 3021 sc->tx.req, sc->tx.done); 3022 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3023 sc->tx.pkt_done, 3024 be32toh(sc->fw_stats->send_done_count)); 3025 } 3026 3027 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 3028 mxge_close(sc); 3029 err = mxge_open(sc); 3030 } 3031 3032 abort: 3033 /* 3034 * stop the watchdog if the nic is dead, to avoid spamming the 3035 * console 3036 */ 3037 if (err != 0) { 3038 callout_stop(&sc->co_hdl); 3039 } 3040 } 3041 3042 static void 3043 mxge_watchdog(mxge_softc_t *sc) 3044 { 3045 mxge_tx_buf_t *tx = &sc->tx; 3046 3047 /* see if we have outstanding transmits, which 3048 have been pending for more than mxge_ticks */ 3049 if (tx->req != tx->done && 3050 tx->watchdog_req != tx->watchdog_done && 3051 tx->done == tx->watchdog_done) 3052 mxge_watchdog_reset(sc); 3053 3054 tx->watchdog_req = tx->req; 3055 tx->watchdog_done = tx->done; 3056 } 3057 3058 static void 3059 mxge_tick(void *arg) 3060 { 3061 mxge_softc_t *sc = arg; 3062 3063 3064 /* Synchronize with possible callout reset/stop. */ 3065 if (callout_pending(&sc->co_hdl) || 3066 !callout_active(&sc->co_hdl)) { 3067 mtx_unlock(&sc->driver_mtx); 3068 return; 3069 } 3070 3071 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3072 mxge_watchdog(sc); 3073 } 3074 3075 static int 3076 mxge_media_change(struct ifnet *ifp) 3077 { 3078 return EINVAL; 3079 } 3080 3081 static int 3082 mxge_change_mtu(mxge_softc_t *sc, int mtu) 3083 { 3084 struct ifnet *ifp = sc->ifp; 3085 int real_mtu, old_mtu; 3086 int err = 0; 3087 3088 3089 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3090 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3091 return EINVAL; 3092 mtx_lock(&sc->driver_mtx); 3093 old_mtu = ifp->if_mtu; 3094 ifp->if_mtu = mtu; 3095 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3096 callout_stop(&sc->co_hdl); 3097 mxge_close(sc); 3098 err = mxge_open(sc); 3099 if (err != 0) { 3100 ifp->if_mtu = old_mtu; 3101 mxge_close(sc); 3102 (void) mxge_open(sc); 3103 } 3104 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3105 } 3106 mtx_unlock(&sc->driver_mtx); 3107 return err; 3108 } 3109 3110 static void 3111 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3112 { 3113 mxge_softc_t *sc = ifp->if_softc; 3114 3115 3116 if (sc == NULL) 3117 return; 3118 ifmr->ifm_status = IFM_AVALID; 3119 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 3120 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3121 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 3122 } 3123 3124 static int 3125 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3126 { 3127 mxge_softc_t *sc = ifp->if_softc; 3128 struct ifreq *ifr = (struct ifreq *)data; 3129 int err, mask; 3130 3131 err = 0; 3132 switch (command) { 3133 case SIOCSIFADDR: 3134 case SIOCGIFADDR: 3135 err = ether_ioctl(ifp, command, data); 3136 break; 3137 3138 case SIOCSIFMTU: 3139 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3140 break; 3141 3142 case SIOCSIFFLAGS: 3143 mtx_lock(&sc->driver_mtx); 3144 if (ifp->if_flags & IFF_UP) { 3145 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 3146 err = mxge_open(sc); 3147 callout_reset(&sc->co_hdl, mxge_ticks, 3148 mxge_tick, sc); 3149 } else { 3150 /* take care of promis can allmulti 3151 flag chages */ 3152 mxge_change_promisc(sc, 3153 ifp->if_flags & IFF_PROMISC); 3154 mxge_set_multicast_list(sc); 3155 } 3156 } else { 3157 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3158 mxge_close(sc); 3159 callout_stop(&sc->co_hdl); 3160 } 3161 } 3162 mtx_unlock(&sc->driver_mtx); 3163 break; 3164 3165 case SIOCADDMULTI: 3166 case SIOCDELMULTI: 3167 mtx_lock(&sc->driver_mtx); 3168 mxge_set_multicast_list(sc); 3169 mtx_unlock(&sc->driver_mtx); 3170 break; 3171 3172 case SIOCSIFCAP: 3173 mtx_lock(&sc->driver_mtx); 3174 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 3175 if (mask & IFCAP_TXCSUM) { 3176 if (IFCAP_TXCSUM & ifp->if_capenable) { 3177 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 3178 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 3179 | CSUM_TSO); 3180 } else { 3181 ifp->if_capenable |= IFCAP_TXCSUM; 3182 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 3183 } 3184 } else if (mask & IFCAP_RXCSUM) { 3185 if (IFCAP_RXCSUM & ifp->if_capenable) { 3186 ifp->if_capenable &= ~IFCAP_RXCSUM; 3187 sc->csum_flag = 0; 3188 } else { 3189 ifp->if_capenable |= IFCAP_RXCSUM; 3190 sc->csum_flag = 1; 3191 } 3192 } 3193 if (mask & IFCAP_TSO4) { 3194 if (IFCAP_TSO4 & ifp->if_capenable) { 3195 ifp->if_capenable &= ~IFCAP_TSO4; 3196 ifp->if_hwassist &= ~CSUM_TSO; 3197 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 3198 ifp->if_capenable |= IFCAP_TSO4; 3199 ifp->if_hwassist |= CSUM_TSO; 3200 } else { 3201 printf("mxge requires tx checksum offload" 3202 " be enabled to use TSO\n"); 3203 err = EINVAL; 3204 } 3205 } 3206 if (mask & IFCAP_LRO) { 3207 if (IFCAP_LRO & ifp->if_capenable) 3208 err = mxge_change_lro_locked(sc, 0); 3209 else 3210 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 3211 } 3212 if (mask & IFCAP_VLAN_HWTAGGING) 3213 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 3214 mtx_unlock(&sc->driver_mtx); 3215 VLAN_CAPABILITIES(ifp); 3216 3217 break; 3218 3219 case SIOCGIFMEDIA: 3220 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 3221 &sc->media, command); 3222 break; 3223 3224 default: 3225 err = ENOTTY; 3226 } 3227 return err; 3228 } 3229 3230 static void 3231 mxge_fetch_tunables(mxge_softc_t *sc) 3232 { 3233 3234 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 3235 &mxge_flow_control); 3236 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 3237 &mxge_intr_coal_delay); 3238 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 3239 &mxge_nvidia_ecrc_enable); 3240 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 3241 &mxge_force_firmware); 3242 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 3243 &mxge_deassert_wait); 3244 TUNABLE_INT_FETCH("hw.mxge.verbose", 3245 &mxge_verbose); 3246 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 3247 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 3248 if (sc->lro_cnt != 0) 3249 mxge_lro_cnt = sc->lro_cnt; 3250 3251 if (bootverbose) 3252 mxge_verbose = 1; 3253 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 3254 mxge_intr_coal_delay = 30; 3255 if (mxge_ticks == 0) 3256 mxge_ticks = hz; 3257 sc->pause = mxge_flow_control; 3258 3259 } 3260 3261 static int 3262 mxge_attach(device_t dev) 3263 { 3264 mxge_softc_t *sc = device_get_softc(dev); 3265 struct ifnet *ifp; 3266 int count, rid, err; 3267 3268 sc->dev = dev; 3269 mxge_fetch_tunables(sc); 3270 3271 err = bus_dma_tag_create(NULL, /* parent */ 3272 1, /* alignment */ 3273 4096, /* boundary */ 3274 BUS_SPACE_MAXADDR, /* low */ 3275 BUS_SPACE_MAXADDR, /* high */ 3276 NULL, NULL, /* filter */ 3277 65536 + 256, /* maxsize */ 3278 MXGE_MAX_SEND_DESC, /* num segs */ 3279 4096, /* maxsegsize */ 3280 0, /* flags */ 3281 NULL, NULL, /* lock */ 3282 &sc->parent_dmat); /* tag */ 3283 3284 if (err != 0) { 3285 device_printf(sc->dev, "Err %d allocating parent dmat\n", 3286 err); 3287 goto abort_with_nothing; 3288 } 3289 3290 ifp = sc->ifp = if_alloc(IFT_ETHER); 3291 if (ifp == NULL) { 3292 device_printf(dev, "can not if_alloc()\n"); 3293 err = ENOSPC; 3294 goto abort_with_parent_dmat; 3295 } 3296 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 3297 device_get_nameunit(dev)); 3298 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 3299 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 3300 device_get_nameunit(dev)); 3301 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 3302 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 3303 "%s:drv", device_get_nameunit(dev)); 3304 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 3305 MTX_NETWORK_LOCK, MTX_DEF); 3306 3307 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 3308 3309 mxge_setup_cfg_space(sc); 3310 3311 /* Map the board into the kernel */ 3312 rid = PCIR_BARS; 3313 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 3314 ~0, 1, RF_ACTIVE); 3315 if (sc->mem_res == NULL) { 3316 device_printf(dev, "could not map memory\n"); 3317 err = ENXIO; 3318 goto abort_with_lock; 3319 } 3320 sc->sram = rman_get_virtual(sc->mem_res); 3321 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 3322 if (sc->sram_size > rman_get_size(sc->mem_res)) { 3323 device_printf(dev, "impossible memory region size %ld\n", 3324 rman_get_size(sc->mem_res)); 3325 err = ENXIO; 3326 goto abort_with_mem_res; 3327 } 3328 3329 /* make NULL terminated copy of the EEPROM strings section of 3330 lanai SRAM */ 3331 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 3332 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 3333 rman_get_bushandle(sc->mem_res), 3334 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 3335 sc->eeprom_strings, 3336 MXGE_EEPROM_STRINGS_SIZE - 2); 3337 err = mxge_parse_strings(sc); 3338 if (err != 0) 3339 goto abort_with_mem_res; 3340 3341 /* Enable write combining for efficient use of PCIe bus */ 3342 mxge_enable_wc(sc); 3343 3344 /* Allocate the out of band dma memory */ 3345 err = mxge_dma_alloc(sc, &sc->cmd_dma, 3346 sizeof (mxge_cmd_t), 64); 3347 if (err != 0) 3348 goto abort_with_mem_res; 3349 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 3350 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 3351 if (err != 0) 3352 goto abort_with_cmd_dma; 3353 3354 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 3355 sizeof (*sc->fw_stats), 64); 3356 if (err != 0) 3357 goto abort_with_zeropad_dma; 3358 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 3359 3360 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 3361 if (err != 0) 3362 goto abort_with_fw_stats; 3363 3364 /* Add our ithread */ 3365 count = pci_msi_count(dev); 3366 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3367 rid = 1; 3368 sc->msi_enabled = 1; 3369 } else { 3370 rid = 0; 3371 } 3372 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3373 1, RF_SHAREABLE | RF_ACTIVE); 3374 if (sc->irq_res == NULL) { 3375 device_printf(dev, "could not alloc interrupt\n"); 3376 goto abort_with_dmabench; 3377 } 3378 if (mxge_verbose) 3379 device_printf(dev, "using %s irq %ld\n", 3380 sc->msi_enabled ? "MSI" : "INTx", 3381 rman_get_start(sc->irq_res)); 3382 /* select & load the firmware */ 3383 err = mxge_select_firmware(sc); 3384 if (err != 0) 3385 goto abort_with_irq_res; 3386 sc->intr_coal_delay = mxge_intr_coal_delay; 3387 err = mxge_reset(sc, 0); 3388 if (err != 0) 3389 goto abort_with_irq_res; 3390 3391 err = mxge_alloc_rings(sc); 3392 if (err != 0) { 3393 device_printf(sc->dev, "failed to allocate rings\n"); 3394 goto abort_with_irq_res; 3395 } 3396 3397 err = bus_setup_intr(sc->dev, sc->irq_res, 3398 INTR_TYPE_NET | INTR_MPSAFE, 3399 NULL, mxge_intr, sc, &sc->ih); 3400 if (err != 0) { 3401 goto abort_with_rings; 3402 } 3403 /* hook into the network stack */ 3404 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3405 ifp->if_baudrate = 100000000; 3406 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3407 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | 3408 IFCAP_VLAN_HWCSUM | IFCAP_LRO; 3409 3410 sc->max_mtu = mxge_max_mtu(sc); 3411 if (sc->max_mtu >= 9000) 3412 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3413 else 3414 device_printf(dev, "MTU limited to %d. Install " 3415 "latest firmware for 9000 byte jumbo support\n", 3416 sc->max_mtu - ETHER_HDR_LEN); 3417 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3418 ifp->if_capenable = ifp->if_capabilities; 3419 if (sc->lro_cnt == 0) 3420 ifp->if_capenable &= ~IFCAP_LRO; 3421 sc->csum_flag = 1; 3422 ifp->if_init = mxge_init; 3423 ifp->if_softc = sc; 3424 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3425 ifp->if_ioctl = mxge_ioctl; 3426 ifp->if_start = mxge_start; 3427 ether_ifattach(ifp, sc->mac_addr); 3428 /* ether_ifattach sets mtu to 1500 */ 3429 if (ifp->if_capabilities & IFCAP_JUMBO_MTU) 3430 ifp->if_mtu = 9000; 3431 3432 /* Initialise the ifmedia structure */ 3433 ifmedia_init(&sc->media, 0, mxge_media_change, 3434 mxge_media_status); 3435 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3436 mxge_add_sysctls(sc); 3437 return 0; 3438 3439 abort_with_rings: 3440 mxge_free_rings(sc); 3441 abort_with_irq_res: 3442 bus_release_resource(dev, SYS_RES_IRQ, 3443 sc->msi_enabled ? 1 : 0, sc->irq_res); 3444 if (sc->msi_enabled) 3445 pci_release_msi(dev); 3446 abort_with_dmabench: 3447 mxge_dma_free(&sc->dmabench_dma); 3448 abort_with_fw_stats: 3449 mxge_dma_free(&sc->fw_stats_dma); 3450 abort_with_zeropad_dma: 3451 mxge_dma_free(&sc->zeropad_dma); 3452 abort_with_cmd_dma: 3453 mxge_dma_free(&sc->cmd_dma); 3454 abort_with_mem_res: 3455 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3456 abort_with_lock: 3457 pci_disable_busmaster(dev); 3458 mtx_destroy(&sc->cmd_mtx); 3459 mtx_destroy(&sc->tx_mtx); 3460 mtx_destroy(&sc->driver_mtx); 3461 if_free(ifp); 3462 abort_with_parent_dmat: 3463 bus_dma_tag_destroy(sc->parent_dmat); 3464 3465 abort_with_nothing: 3466 return err; 3467 } 3468 3469 static int 3470 mxge_detach(device_t dev) 3471 { 3472 mxge_softc_t *sc = device_get_softc(dev); 3473 3474 if (sc->ifp->if_vlantrunk != NULL) { 3475 device_printf(sc->dev, 3476 "Detach vlans before removing module\n"); 3477 return EBUSY; 3478 } 3479 mtx_lock(&sc->driver_mtx); 3480 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3481 mxge_close(sc); 3482 callout_stop(&sc->co_hdl); 3483 mtx_unlock(&sc->driver_mtx); 3484 ether_ifdetach(sc->ifp); 3485 ifmedia_removeall(&sc->media); 3486 mxge_dummy_rdma(sc, 0); 3487 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3488 mxge_free_rings(sc); 3489 bus_release_resource(dev, SYS_RES_IRQ, 3490 sc->msi_enabled ? 1 : 0, sc->irq_res); 3491 if (sc->msi_enabled) 3492 pci_release_msi(dev); 3493 3494 sc->rx_done.entry = NULL; 3495 mxge_dma_free(&sc->fw_stats_dma); 3496 mxge_dma_free(&sc->dmabench_dma); 3497 mxge_dma_free(&sc->zeropad_dma); 3498 mxge_dma_free(&sc->cmd_dma); 3499 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3500 pci_disable_busmaster(dev); 3501 mtx_destroy(&sc->cmd_mtx); 3502 mtx_destroy(&sc->tx_mtx); 3503 mtx_destroy(&sc->driver_mtx); 3504 if_free(sc->ifp); 3505 bus_dma_tag_destroy(sc->parent_dmat); 3506 return 0; 3507 } 3508 3509 static int 3510 mxge_shutdown(device_t dev) 3511 { 3512 return 0; 3513 } 3514 3515 /* 3516 This file uses Myri10GE driver indentation. 3517 3518 Local Variables: 3519 c-file-style:"linux" 3520 tab-width:8 3521 End: 3522 */ 3523