1 /****************************************************************************** 2 3 Copyright (c) 2006, Myricom Inc. 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32 ***************************************************************************/ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/linker.h> 40 #include <sys/firmware.h> 41 #include <sys/endian.h> 42 #include <sys/sockio.h> 43 #include <sys/mbuf.h> 44 #include <sys/malloc.h> 45 #include <sys/kdb.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/module.h> 49 #include <sys/memrange.h> 50 #include <sys/socket.h> 51 #include <sys/sysctl.h> 52 #include <sys/sx.h> 53 54 #include <net/if.h> 55 #include <net/if_arp.h> 56 #include <net/ethernet.h> 57 #include <net/if_dl.h> 58 #include <net/if_media.h> 59 60 #include <net/bpf.h> 61 62 #include <net/if_types.h> 63 #include <net/if_vlan_var.h> 64 #include <net/zlib.h> 65 66 #include <netinet/in_systm.h> 67 #include <netinet/in.h> 68 #include <netinet/ip.h> 69 #include <netinet/tcp.h> 70 71 #include <machine/bus.h> 72 #include <machine/resource.h> 73 #include <sys/bus.h> 74 #include <sys/rman.h> 75 76 #include <dev/pci/pcireg.h> 77 #include <dev/pci/pcivar.h> 78 79 #include <vm/vm.h> /* for pmap_mapdev() */ 80 #include <vm/pmap.h> 81 82 #include <dev/mxge/mxge_mcp.h> 83 #include <dev/mxge/mcp_gen_header.h> 84 #include <dev/mxge/if_mxge_var.h> 85 86 /* tunable params */ 87 static int mxge_nvidia_ecrc_enable = 1; 88 static int mxge_force_firmware = 0; 89 static int mxge_max_intr_slots = 1024; 90 static int mxge_intr_coal_delay = 30; 91 static int mxge_deassert_wait = 1; 92 static int mxge_flow_control = 1; 93 static int mxge_verbose = 0; 94 static int mxge_ticks; 95 static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 96 static char *mxge_fw_aligned = "mxge_eth_z8e"; 97 98 static int mxge_probe(device_t dev); 99 static int mxge_attach(device_t dev); 100 static int mxge_detach(device_t dev); 101 static int mxge_shutdown(device_t dev); 102 static void mxge_intr(void *arg); 103 104 static device_method_t mxge_methods[] = 105 { 106 /* Device interface */ 107 DEVMETHOD(device_probe, mxge_probe), 108 DEVMETHOD(device_attach, mxge_attach), 109 DEVMETHOD(device_detach, mxge_detach), 110 DEVMETHOD(device_shutdown, mxge_shutdown), 111 {0, 0} 112 }; 113 114 static driver_t mxge_driver = 115 { 116 "mxge", 117 mxge_methods, 118 sizeof(mxge_softc_t), 119 }; 120 121 static devclass_t mxge_devclass; 122 123 /* Declare ourselves to be a child of the PCI bus.*/ 124 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 125 MODULE_DEPEND(mxge, firmware, 1, 1, 1); 126 127 static int 128 mxge_probe(device_t dev) 129 { 130 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 131 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 132 device_set_desc(dev, "Myri10G-PCIE-8A"); 133 return 0; 134 } 135 return ENXIO; 136 } 137 138 static void 139 mxge_enable_wc(mxge_softc_t *sc) 140 { 141 struct mem_range_desc mrdesc; 142 vm_paddr_t pa; 143 vm_offset_t len; 144 int err, action; 145 146 pa = rman_get_start(sc->mem_res); 147 len = rman_get_size(sc->mem_res); 148 mrdesc.mr_base = pa; 149 mrdesc.mr_len = len; 150 mrdesc.mr_flags = MDF_WRITECOMBINE; 151 action = MEMRANGE_SET_UPDATE; 152 strcpy((char *)&mrdesc.mr_owner, "mxge"); 153 err = mem_range_attr_set(&mrdesc, &action); 154 if (err != 0) { 155 device_printf(sc->dev, 156 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 157 (unsigned long)pa, (unsigned long)len, err); 158 } else { 159 sc->wc = 1; 160 } 161 } 162 163 164 /* callback to get our DMA address */ 165 static void 166 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 167 int error) 168 { 169 if (error == 0) { 170 *(bus_addr_t *) arg = segs->ds_addr; 171 } 172 } 173 174 static int 175 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 176 bus_size_t alignment) 177 { 178 int err; 179 device_t dev = sc->dev; 180 181 /* allocate DMAable memory tags */ 182 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 183 alignment, /* alignment */ 184 4096, /* boundary */ 185 BUS_SPACE_MAXADDR, /* low */ 186 BUS_SPACE_MAXADDR, /* high */ 187 NULL, NULL, /* filter */ 188 bytes, /* maxsize */ 189 1, /* num segs */ 190 4096, /* maxsegsize */ 191 BUS_DMA_COHERENT, /* flags */ 192 NULL, NULL, /* lock */ 193 &dma->dmat); /* tag */ 194 if (err != 0) { 195 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 196 return err; 197 } 198 199 /* allocate DMAable memory & map */ 200 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 201 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 202 | BUS_DMA_ZERO), &dma->map); 203 if (err != 0) { 204 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 205 goto abort_with_dmat; 206 } 207 208 /* load the memory */ 209 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 210 mxge_dmamap_callback, 211 (void *)&dma->bus_addr, 0); 212 if (err != 0) { 213 device_printf(dev, "couldn't load map (err = %d)\n", err); 214 goto abort_with_mem; 215 } 216 return 0; 217 218 abort_with_mem: 219 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 220 abort_with_dmat: 221 (void)bus_dma_tag_destroy(dma->dmat); 222 return err; 223 } 224 225 226 static void 227 mxge_dma_free(mxge_dma_t *dma) 228 { 229 bus_dmamap_unload(dma->dmat, dma->map); 230 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 231 (void)bus_dma_tag_destroy(dma->dmat); 232 } 233 234 /* 235 * The eeprom strings on the lanaiX have the format 236 * SN=x\0 237 * MAC=x:x:x:x:x:x\0 238 * PC=text\0 239 */ 240 241 static int 242 mxge_parse_strings(mxge_softc_t *sc) 243 { 244 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 245 246 char *ptr, *limit; 247 int i, found_mac; 248 249 ptr = sc->eeprom_strings; 250 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 251 found_mac = 0; 252 while (ptr < limit && *ptr != '\0') { 253 if (memcmp(ptr, "MAC=", 4) == 0) { 254 ptr += 1; 255 sc->mac_addr_string = ptr; 256 for (i = 0; i < 6; i++) { 257 ptr += 3; 258 if ((ptr + 2) > limit) 259 goto abort; 260 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 261 found_mac = 1; 262 } 263 } else if (memcmp(ptr, "PC=", 3) == 0) { 264 ptr += 3; 265 strncpy(sc->product_code_string, ptr, 266 sizeof (sc->product_code_string) - 1); 267 } else if (memcmp(ptr, "SN=", 3) == 0) { 268 ptr += 3; 269 strncpy(sc->serial_number_string, ptr, 270 sizeof (sc->serial_number_string) - 1); 271 } 272 MXGE_NEXT_STRING(ptr); 273 } 274 275 if (found_mac) 276 return 0; 277 278 abort: 279 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 280 281 return ENXIO; 282 } 283 284 #if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 285 static int 286 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 287 { 288 uint32_t val; 289 unsigned long off; 290 char *va, *cfgptr; 291 uint16_t vendor_id, device_id; 292 uintptr_t bus, slot, func, ivend, idev; 293 uint32_t *ptr32; 294 295 /* XXXX 296 Test below is commented because it is believed that doing 297 config read/write beyond 0xff will access the config space 298 for the next larger function. Uncomment this and remove 299 the hacky pmap_mapdev() way of accessing config space when 300 FreeBSD grows support for extended pcie config space access 301 */ 302 #if 0 303 /* See if we can, by some miracle, access the extended 304 config space */ 305 val = pci_read_config(pdev, 0x178, 4); 306 if (val != 0xffffffff) { 307 val |= 0x40; 308 pci_write_config(pdev, 0x178, val, 4); 309 return 0; 310 } 311 #endif 312 /* Rather than using normal pci config space writes, we must 313 * map the Nvidia config space ourselves. This is because on 314 * opteron/nvidia class machine the 0xe000000 mapping is 315 * handled by the nvidia chipset, that means the internal PCI 316 * device (the on-chip northbridge), or the amd-8131 bridge 317 * and things behind them are not visible by this method. 318 */ 319 320 BUS_READ_IVAR(device_get_parent(pdev), pdev, 321 PCI_IVAR_BUS, &bus); 322 BUS_READ_IVAR(device_get_parent(pdev), pdev, 323 PCI_IVAR_SLOT, &slot); 324 BUS_READ_IVAR(device_get_parent(pdev), pdev, 325 PCI_IVAR_FUNCTION, &func); 326 BUS_READ_IVAR(device_get_parent(pdev), pdev, 327 PCI_IVAR_VENDOR, &ivend); 328 BUS_READ_IVAR(device_get_parent(pdev), pdev, 329 PCI_IVAR_DEVICE, &idev); 330 331 off = 0xe0000000UL 332 + 0x00100000UL * (unsigned long)bus 333 + 0x00001000UL * (unsigned long)(func 334 + 8 * slot); 335 336 /* map it into the kernel */ 337 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 338 339 340 if (va == NULL) { 341 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 342 return EIO; 343 } 344 /* get a pointer to the config space mapped into the kernel */ 345 cfgptr = va + (off & PAGE_MASK); 346 347 /* make sure that we can really access it */ 348 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 349 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 350 if (! (vendor_id == ivend && device_id == idev)) { 351 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 352 vendor_id, device_id); 353 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 354 return EIO; 355 } 356 357 ptr32 = (uint32_t*)(cfgptr + 0x178); 358 val = *ptr32; 359 360 if (val == 0xffffffff) { 361 device_printf(sc->dev, "extended mapping failed\n"); 362 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 363 return EIO; 364 } 365 *ptr32 = val | 0x40; 366 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 367 if (mxge_verbose) 368 device_printf(sc->dev, 369 "Enabled ECRC on upstream Nvidia bridge " 370 "at %d:%d:%d\n", 371 (int)bus, (int)slot, (int)func); 372 return 0; 373 } 374 #else 375 static int 376 mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 377 { 378 device_printf(sc->dev, 379 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 380 return ENXIO; 381 } 382 #endif 383 /* 384 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 385 * when the PCI-E Completion packets are aligned on an 8-byte 386 * boundary. Some PCI-E chip sets always align Completion packets; on 387 * the ones that do not, the alignment can be enforced by enabling 388 * ECRC generation (if supported). 389 * 390 * When PCI-E Completion packets are not aligned, it is actually more 391 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 392 * 393 * If the driver can neither enable ECRC nor verify that it has 394 * already been enabled, then it must use a firmware image which works 395 * around unaligned completion packets (ethp_z8e.dat), and it should 396 * also ensure that it never gives the device a Read-DMA which is 397 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 398 * enabled, then the driver should use the aligned (eth_z8e.dat) 399 * firmware image, and set tx.boundary to 4KB. 400 */ 401 402 static void 403 mxge_select_firmware(mxge_softc_t *sc) 404 { 405 int err, aligned = 0; 406 device_t pdev; 407 uint16_t pvend, pdid; 408 409 410 if (mxge_force_firmware != 0) { 411 if (mxge_force_firmware == 1) 412 aligned = 1; 413 else 414 aligned = 0; 415 if (mxge_verbose) 416 device_printf(sc->dev, 417 "Assuming %s completions (forced)\n", 418 aligned ? "aligned" : "unaligned"); 419 goto abort; 420 } 421 422 /* if the PCIe link width is 4 or less, we can use the aligned 423 firmware and skip any checks */ 424 if (sc->link_width != 0 && sc->link_width <= 4) { 425 device_printf(sc->dev, 426 "PCIe x%d Link, expect reduced performance\n", 427 sc->link_width); 428 aligned = 1; 429 goto abort; 430 } 431 432 pdev = device_get_parent(device_get_parent(sc->dev)); 433 if (pdev == NULL) { 434 device_printf(sc->dev, "could not find parent?\n"); 435 goto abort; 436 } 437 pvend = pci_read_config(pdev, PCIR_VENDOR, 2); 438 pdid = pci_read_config(pdev, PCIR_DEVICE, 2); 439 440 /* see if we can enable ECRC's on an upstream 441 Nvidia bridge */ 442 if (mxge_nvidia_ecrc_enable && 443 (pvend == 0x10de && pdid == 0x005d)) { 444 err = mxge_enable_nvidia_ecrc(sc, pdev); 445 if (err == 0) { 446 aligned = 1; 447 if (mxge_verbose) 448 device_printf(sc->dev, 449 "Assuming aligned completions" 450 " (ECRC)\n"); 451 } 452 } 453 /* see if the upstream bridge is known to 454 provided aligned completions */ 455 if (/* HT2000 */ (pvend == 0x1166 && pdid == 0x0132) || 456 /* PLX */ (pvend == 0x10b5 && pdid == 0x8532) || 457 /* Intel */ (pvend == 0x8086 && 458 /* E5000 NorthBridge*/((pdid >= 0x25f7 && pdid <= 0x25fa) || 459 /* E5000 SouthBridge*/ (pdid >= 0x3510 && pdid <= 0x351b)))) { 460 aligned = 1; 461 if (mxge_verbose) 462 device_printf(sc->dev, 463 "Assuming aligned completions " 464 "(0x%x:0x%x)\n", pvend, pdid); 465 } 466 467 abort: 468 if (aligned) { 469 sc->fw_name = mxge_fw_aligned; 470 sc->tx.boundary = 4096; 471 } else { 472 sc->fw_name = mxge_fw_unaligned; 473 sc->tx.boundary = 2048; 474 } 475 } 476 477 union qualhack 478 { 479 const char *ro_char; 480 char *rw_char; 481 }; 482 483 static int 484 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 485 { 486 487 488 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 489 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 490 be32toh(hdr->mcp_type)); 491 return EIO; 492 } 493 494 /* save firmware version for sysctl */ 495 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 496 if (mxge_verbose) 497 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 498 499 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 500 &sc->fw_ver_minor, &sc->fw_ver_tiny); 501 502 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 503 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 504 device_printf(sc->dev, "Found firmware version %s\n", 505 sc->fw_version); 506 device_printf(sc->dev, "Driver needs %d.%d\n", 507 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 508 return EINVAL; 509 } 510 return 0; 511 512 } 513 514 static int 515 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 516 { 517 const struct firmware *fw; 518 const mcp_gen_header_t *hdr; 519 unsigned hdr_offset; 520 const char *fw_data; 521 union qualhack hack; 522 int status; 523 unsigned int i; 524 char dummy; 525 526 527 fw = firmware_get(sc->fw_name); 528 529 if (fw == NULL) { 530 device_printf(sc->dev, "Could not find firmware image %s\n", 531 sc->fw_name); 532 return ENOENT; 533 } 534 if (fw->datasize > *limit || 535 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 536 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 537 sc->fw_name, (int)fw->datasize, (int) *limit); 538 status = ENOSPC; 539 goto abort_with_fw; 540 } 541 *limit = fw->datasize; 542 543 /* check id */ 544 fw_data = (const char *)fw->data; 545 hdr_offset = htobe32(*(const uint32_t *) 546 (fw_data + MCP_HEADER_PTR_OFFSET)); 547 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 548 device_printf(sc->dev, "Bad firmware file"); 549 status = EIO; 550 goto abort_with_fw; 551 } 552 hdr = (const void*)(fw_data + hdr_offset); 553 554 status = mxge_validate_firmware(sc, hdr); 555 if (status != 0) 556 goto abort_with_fw; 557 558 hack.ro_char = fw_data; 559 /* Copy the inflated firmware to NIC SRAM. */ 560 for (i = 0; i < *limit; i += 256) { 561 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 562 hack.rw_char + i, 563 min(256U, (unsigned)(*limit - i))); 564 mb(); 565 dummy = *sc->sram; 566 mb(); 567 } 568 569 status = 0; 570 abort_with_fw: 571 firmware_put(fw, FIRMWARE_UNLOAD); 572 return status; 573 } 574 575 /* 576 * Enable or disable periodic RDMAs from the host to make certain 577 * chipsets resend dropped PCIe messages 578 */ 579 580 static void 581 mxge_dummy_rdma(mxge_softc_t *sc, int enable) 582 { 583 char buf_bytes[72]; 584 volatile uint32_t *confirm; 585 volatile char *submit; 586 uint32_t *buf, dma_low, dma_high; 587 int i; 588 589 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 590 591 /* clear confirmation addr */ 592 confirm = (volatile uint32_t *)sc->cmd; 593 *confirm = 0; 594 mb(); 595 596 /* send an rdma command to the PCIe engine, and wait for the 597 response in the confirmation address. The firmware should 598 write a -1 there to indicate it is alive and well 599 */ 600 601 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 602 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 603 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 604 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 605 buf[2] = htobe32(0xffffffff); /* confirm data */ 606 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 607 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 608 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 609 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 610 buf[5] = htobe32(enable); /* enable? */ 611 612 613 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 614 615 mxge_pio_copy(submit, buf, 64); 616 mb(); 617 DELAY(1000); 618 mb(); 619 i = 0; 620 while (*confirm != 0xffffffff && i < 20) { 621 DELAY(1000); 622 i++; 623 } 624 if (*confirm != 0xffffffff) { 625 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 626 (enable ? "enable" : "disable"), confirm, 627 *confirm); 628 } 629 return; 630 } 631 632 static int 633 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 634 { 635 mcp_cmd_t *buf; 636 char buf_bytes[sizeof(*buf) + 8]; 637 volatile mcp_cmd_response_t *response = sc->cmd; 638 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 639 uint32_t dma_low, dma_high; 640 int sleep_total = 0; 641 642 /* ensure buf is aligned to 8 bytes */ 643 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 644 645 buf->data0 = htobe32(data->data0); 646 buf->data1 = htobe32(data->data1); 647 buf->data2 = htobe32(data->data2); 648 buf->cmd = htobe32(cmd); 649 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 650 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 651 652 buf->response_addr.low = htobe32(dma_low); 653 buf->response_addr.high = htobe32(dma_high); 654 mtx_lock(&sc->cmd_mtx); 655 response->result = 0xffffffff; 656 mb(); 657 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 658 659 /* wait up to 20ms */ 660 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 661 bus_dmamap_sync(sc->cmd_dma.dmat, 662 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 663 mb(); 664 if (response->result != 0xffffffff) { 665 if (response->result == 0) { 666 data->data0 = be32toh(response->data); 667 mtx_unlock(&sc->cmd_mtx); 668 return 0; 669 } else { 670 device_printf(sc->dev, 671 "mxge: command %d " 672 "failed, result = %d\n", 673 cmd, be32toh(response->result)); 674 mtx_unlock(&sc->cmd_mtx); 675 return ENXIO; 676 } 677 } 678 DELAY(1000); 679 } 680 mtx_unlock(&sc->cmd_mtx); 681 device_printf(sc->dev, "mxge: command %d timed out" 682 "result = %d\n", 683 cmd, be32toh(response->result)); 684 return EAGAIN; 685 } 686 687 static int 688 mxge_adopt_running_firmware(mxge_softc_t *sc) 689 { 690 struct mcp_gen_header *hdr; 691 const size_t bytes = sizeof (struct mcp_gen_header); 692 size_t hdr_offset; 693 int status; 694 695 /* find running firmware header */ 696 hdr_offset = htobe32(*(volatile uint32_t *) 697 (sc->sram + MCP_HEADER_PTR_OFFSET)); 698 699 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 700 device_printf(sc->dev, 701 "Running firmware has bad header offset (%d)\n", 702 (int)hdr_offset); 703 return EIO; 704 } 705 706 /* copy header of running firmware from SRAM to host memory to 707 * validate firmware */ 708 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 709 if (hdr == NULL) { 710 device_printf(sc->dev, "could not malloc firmware hdr\n"); 711 return ENOMEM; 712 } 713 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 714 rman_get_bushandle(sc->mem_res), 715 hdr_offset, (char *)hdr, bytes); 716 status = mxge_validate_firmware(sc, hdr); 717 free(hdr, M_DEVBUF); 718 719 /* 720 * check to see if adopted firmware has bug where adopting 721 * it will cause broadcasts to be filtered unless the NIC 722 * is kept in ALLMULTI mode 723 */ 724 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 725 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 726 sc->adopted_rx_filter_bug = 1; 727 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 728 "working around rx filter bug\n", 729 sc->fw_ver_major, sc->fw_ver_minor, 730 sc->fw_ver_tiny); 731 } 732 733 return status; 734 } 735 736 737 static int 738 mxge_load_firmware(mxge_softc_t *sc) 739 { 740 volatile uint32_t *confirm; 741 volatile char *submit; 742 char buf_bytes[72]; 743 uint32_t *buf, size, dma_low, dma_high; 744 int status, i; 745 746 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 747 748 size = sc->sram_size; 749 status = mxge_load_firmware_helper(sc, &size); 750 if (status) { 751 /* Try to use the currently running firmware, if 752 it is new enough */ 753 status = mxge_adopt_running_firmware(sc); 754 if (status) { 755 device_printf(sc->dev, 756 "failed to adopt running firmware\n"); 757 return status; 758 } 759 device_printf(sc->dev, 760 "Successfully adopted running firmware\n"); 761 if (sc->tx.boundary == 4096) { 762 device_printf(sc->dev, 763 "Using firmware currently running on NIC" 764 ". For optimal\n"); 765 device_printf(sc->dev, 766 "performance consider loading optimized " 767 "firmware\n"); 768 } 769 sc->fw_name = mxge_fw_unaligned; 770 sc->tx.boundary = 2048; 771 return 0; 772 } 773 /* clear confirmation addr */ 774 confirm = (volatile uint32_t *)sc->cmd; 775 *confirm = 0; 776 mb(); 777 /* send a reload command to the bootstrap MCP, and wait for the 778 response in the confirmation address. The firmware should 779 write a -1 there to indicate it is alive and well 780 */ 781 782 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 783 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 784 785 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 786 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 787 buf[2] = htobe32(0xffffffff); /* confirm data */ 788 789 /* FIX: All newest firmware should un-protect the bottom of 790 the sram before handoff. However, the very first interfaces 791 do not. Therefore the handoff copy must skip the first 8 bytes 792 */ 793 /* where the code starts*/ 794 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 795 buf[4] = htobe32(size - 8); /* length of code */ 796 buf[5] = htobe32(8); /* where to copy to */ 797 buf[6] = htobe32(0); /* where to jump to */ 798 799 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 800 mxge_pio_copy(submit, buf, 64); 801 mb(); 802 DELAY(1000); 803 mb(); 804 i = 0; 805 while (*confirm != 0xffffffff && i < 20) { 806 DELAY(1000*10); 807 i++; 808 bus_dmamap_sync(sc->cmd_dma.dmat, 809 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 810 } 811 if (*confirm != 0xffffffff) { 812 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 813 confirm, *confirm); 814 815 return ENXIO; 816 } 817 return 0; 818 } 819 820 static int 821 mxge_update_mac_address(mxge_softc_t *sc) 822 { 823 mxge_cmd_t cmd; 824 uint8_t *addr = sc->mac_addr; 825 int status; 826 827 828 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 829 | (addr[2] << 8) | addr[3]); 830 831 cmd.data1 = ((addr[4] << 8) | (addr[5])); 832 833 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 834 return status; 835 } 836 837 static int 838 mxge_change_pause(mxge_softc_t *sc, int pause) 839 { 840 mxge_cmd_t cmd; 841 int status; 842 843 if (pause) 844 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 845 &cmd); 846 else 847 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 848 &cmd); 849 850 if (status) { 851 device_printf(sc->dev, "Failed to set flow control mode\n"); 852 return ENXIO; 853 } 854 sc->pause = pause; 855 return 0; 856 } 857 858 static void 859 mxge_change_promisc(mxge_softc_t *sc, int promisc) 860 { 861 mxge_cmd_t cmd; 862 int status; 863 864 if (promisc) 865 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 866 &cmd); 867 else 868 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 869 &cmd); 870 871 if (status) { 872 device_printf(sc->dev, "Failed to set promisc mode\n"); 873 } 874 } 875 876 static void 877 mxge_set_multicast_list(mxge_softc_t *sc) 878 { 879 mxge_cmd_t cmd; 880 struct ifmultiaddr *ifma; 881 struct ifnet *ifp = sc->ifp; 882 int err; 883 884 /* This firmware is known to not support multicast */ 885 if (!sc->fw_multicast_support) 886 return; 887 888 /* Disable multicast filtering while we play with the lists*/ 889 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 890 if (err != 0) { 891 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 892 " error status: %d\n", err); 893 return; 894 } 895 896 if (sc->adopted_rx_filter_bug) 897 return; 898 899 if (ifp->if_flags & IFF_ALLMULTI) 900 /* request to disable multicast filtering, so quit here */ 901 return; 902 903 /* Flush all the filters */ 904 905 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 906 if (err != 0) { 907 device_printf(sc->dev, 908 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 909 ", error status: %d\n", err); 910 return; 911 } 912 913 /* Walk the multicast list, and add each address */ 914 915 IF_ADDR_LOCK(ifp); 916 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 917 if (ifma->ifma_addr->sa_family != AF_LINK) 918 continue; 919 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 920 &cmd.data0, 4); 921 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 922 &cmd.data1, 2); 923 cmd.data0 = htonl(cmd.data0); 924 cmd.data1 = htonl(cmd.data1); 925 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 926 if (err != 0) { 927 device_printf(sc->dev, "Failed " 928 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 929 "%d\t", err); 930 /* abort, leaving multicast filtering off */ 931 IF_ADDR_UNLOCK(ifp); 932 return; 933 } 934 } 935 IF_ADDR_UNLOCK(ifp); 936 /* Enable multicast filtering */ 937 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 938 if (err != 0) { 939 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 940 ", error status: %d\n", err); 941 } 942 } 943 944 945 static int 946 mxge_reset(mxge_softc_t *sc) 947 { 948 949 mxge_cmd_t cmd; 950 size_t bytes; 951 int status; 952 953 /* try to send a reset command to the card to see if it 954 is alive */ 955 memset(&cmd, 0, sizeof (cmd)); 956 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 957 if (status != 0) { 958 device_printf(sc->dev, "failed reset\n"); 959 return ENXIO; 960 } 961 962 mxge_dummy_rdma(sc, 1); 963 964 /* Now exchange information about interrupts */ 965 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\ 966 memset(sc->rx_done.entry, 0, bytes); 967 cmd.data0 = (uint32_t)bytes; 968 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 969 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 970 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 971 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 972 973 status |= mxge_send_cmd(sc, 974 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 975 976 977 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 978 979 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 980 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 981 982 983 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 984 &cmd); 985 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 986 if (status != 0) { 987 device_printf(sc->dev, "failed set interrupt parameters\n"); 988 return status; 989 } 990 991 992 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 993 994 995 /* run a DMA benchmark */ 996 sc->read_dma = sc->write_dma = sc->read_write_dma = 0; 997 998 /* Read DMA */ 999 cmd.data0 = MXGE_LOWPART_TO_U32(sc->dmabench_dma.bus_addr); 1000 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->dmabench_dma.bus_addr); 1001 cmd.data2 = sc->tx.boundary * 0x10000; 1002 1003 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 1004 if (status != 0) 1005 device_printf(sc->dev, "read dma benchmark failed\n"); 1006 else 1007 sc->read_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) / 1008 (cmd.data0 & 0xffff); 1009 1010 /* Write DMA */ 1011 cmd.data0 = MXGE_LOWPART_TO_U32(sc->dmabench_dma.bus_addr); 1012 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->dmabench_dma.bus_addr); 1013 cmd.data2 = sc->tx.boundary * 0x1; 1014 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 1015 if (status != 0) 1016 device_printf(sc->dev, "write dma benchmark failed\n"); 1017 else 1018 sc->write_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) / 1019 (cmd.data0 & 0xffff); 1020 /* Read/Write DMA */ 1021 cmd.data0 = MXGE_LOWPART_TO_U32(sc->dmabench_dma.bus_addr); 1022 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->dmabench_dma.bus_addr); 1023 cmd.data2 = sc->tx.boundary * 0x10001; 1024 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 1025 if (status != 0) 1026 device_printf(sc->dev, "read/write dma benchmark failed\n"); 1027 else 1028 sc->read_write_dma = 1029 ((cmd.data0>>16) * sc->tx.boundary * 2 * 2) / 1030 (cmd.data0 & 0xffff); 1031 1032 /* reset mcp/driver shared state back to 0 */ 1033 bzero(sc->rx_done.entry, bytes); 1034 sc->rx_done.idx = 0; 1035 sc->rx_done.cnt = 0; 1036 sc->tx.req = 0; 1037 sc->tx.done = 0; 1038 sc->tx.pkt_done = 0; 1039 sc->tx.wake = 0; 1040 sc->tx.stall = 0; 1041 sc->rx_big.cnt = 0; 1042 sc->rx_small.cnt = 0; 1043 sc->rdma_tags_available = 15; 1044 sc->fw_stats->valid = 0; 1045 sc->fw_stats->send_done_count = 0; 1046 status = mxge_update_mac_address(sc); 1047 mxge_change_promisc(sc, 0); 1048 mxge_change_pause(sc, sc->pause); 1049 mxge_set_multicast_list(sc); 1050 return status; 1051 } 1052 1053 static int 1054 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1055 { 1056 mxge_softc_t *sc; 1057 unsigned int intr_coal_delay; 1058 int err; 1059 1060 sc = arg1; 1061 intr_coal_delay = sc->intr_coal_delay; 1062 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1063 if (err != 0) { 1064 return err; 1065 } 1066 if (intr_coal_delay == sc->intr_coal_delay) 1067 return 0; 1068 1069 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1070 return EINVAL; 1071 1072 mtx_lock(&sc->driver_mtx); 1073 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1074 sc->intr_coal_delay = intr_coal_delay; 1075 1076 mtx_unlock(&sc->driver_mtx); 1077 return err; 1078 } 1079 1080 static int 1081 mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1082 { 1083 mxge_softc_t *sc; 1084 unsigned int enabled; 1085 int err; 1086 1087 sc = arg1; 1088 enabled = sc->pause; 1089 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1090 if (err != 0) { 1091 return err; 1092 } 1093 if (enabled == sc->pause) 1094 return 0; 1095 1096 mtx_lock(&sc->driver_mtx); 1097 err = mxge_change_pause(sc, enabled); 1098 mtx_unlock(&sc->driver_mtx); 1099 return err; 1100 } 1101 1102 static int 1103 mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1104 { 1105 int err; 1106 1107 if (arg1 == NULL) 1108 return EFAULT; 1109 arg2 = be32toh(*(int *)arg1); 1110 arg1 = NULL; 1111 err = sysctl_handle_int(oidp, arg1, arg2, req); 1112 1113 return err; 1114 } 1115 1116 static void 1117 mxge_add_sysctls(mxge_softc_t *sc) 1118 { 1119 struct sysctl_ctx_list *ctx; 1120 struct sysctl_oid_list *children; 1121 mcp_irq_data_t *fw; 1122 1123 ctx = device_get_sysctl_ctx(sc->dev); 1124 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1125 fw = sc->fw_stats; 1126 1127 /* random information */ 1128 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1129 "firmware_version", 1130 CTLFLAG_RD, &sc->fw_version, 1131 0, "firmware version"); 1132 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1133 "serial_number", 1134 CTLFLAG_RD, &sc->serial_number_string, 1135 0, "serial number"); 1136 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1137 "product_code", 1138 CTLFLAG_RD, &sc->product_code_string, 1139 0, "product_code"); 1140 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1141 "pcie_link_width", 1142 CTLFLAG_RD, &sc->link_width, 1143 0, "tx_boundary"); 1144 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1145 "tx_boundary", 1146 CTLFLAG_RD, &sc->tx.boundary, 1147 0, "tx_boundary"); 1148 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1149 "write_combine", 1150 CTLFLAG_RD, &sc->wc, 1151 0, "write combining PIO?"); 1152 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1153 "read_dma_MBs", 1154 CTLFLAG_RD, &sc->read_dma, 1155 0, "DMA Read speed in MB/s"); 1156 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1157 "write_dma_MBs", 1158 CTLFLAG_RD, &sc->write_dma, 1159 0, "DMA Write speed in MB/s"); 1160 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1161 "read_write_dma_MBs", 1162 CTLFLAG_RD, &sc->read_write_dma, 1163 0, "DMA concurrent Read/Write speed in MB/s"); 1164 1165 1166 /* performance related tunables */ 1167 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1168 "intr_coal_delay", 1169 CTLTYPE_INT|CTLFLAG_RW, sc, 1170 0, mxge_change_intr_coal, 1171 "I", "interrupt coalescing delay in usecs"); 1172 1173 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1174 "flow_control_enabled", 1175 CTLTYPE_INT|CTLFLAG_RW, sc, 1176 0, mxge_change_flow_control, 1177 "I", "interrupt coalescing delay in usecs"); 1178 1179 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1180 "deassert_wait", 1181 CTLFLAG_RW, &mxge_deassert_wait, 1182 0, "Wait for IRQ line to go low in ihandler"); 1183 1184 /* stats block from firmware is in network byte order. 1185 Need to swap it */ 1186 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1187 "link_up", 1188 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1189 0, mxge_handle_be32, 1190 "I", "link up"); 1191 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1192 "rdma_tags_available", 1193 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1194 0, mxge_handle_be32, 1195 "I", "rdma_tags_available"); 1196 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1197 "dropped_link_overflow", 1198 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1199 0, mxge_handle_be32, 1200 "I", "dropped_link_overflow"); 1201 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1202 "dropped_link_error_or_filtered", 1203 CTLTYPE_INT|CTLFLAG_RD, 1204 &fw->dropped_link_error_or_filtered, 1205 0, mxge_handle_be32, 1206 "I", "dropped_link_error_or_filtered"); 1207 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1208 "dropped_multicast_filtered", 1209 CTLTYPE_INT|CTLFLAG_RD, 1210 &fw->dropped_multicast_filtered, 1211 0, mxge_handle_be32, 1212 "I", "dropped_multicast_filtered"); 1213 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1214 "dropped_runt", 1215 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1216 0, mxge_handle_be32, 1217 "I", "dropped_runt"); 1218 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1219 "dropped_overrun", 1220 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1221 0, mxge_handle_be32, 1222 "I", "dropped_overrun"); 1223 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1224 "dropped_no_small_buffer", 1225 CTLTYPE_INT|CTLFLAG_RD, 1226 &fw->dropped_no_small_buffer, 1227 0, mxge_handle_be32, 1228 "I", "dropped_no_small_buffer"); 1229 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1230 "dropped_no_big_buffer", 1231 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1232 0, mxge_handle_be32, 1233 "I", "dropped_no_big_buffer"); 1234 1235 /* host counters exported for debugging */ 1236 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1237 "rx_small_cnt", 1238 CTLFLAG_RD, &sc->rx_small.cnt, 1239 0, "rx_small_cnt"); 1240 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1241 "rx_big_cnt", 1242 CTLFLAG_RD, &sc->rx_big.cnt, 1243 0, "rx_small_cnt"); 1244 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1245 "tx_req", 1246 CTLFLAG_RD, &sc->tx.req, 1247 0, "tx_req"); 1248 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1249 "tx_done", 1250 CTLFLAG_RD, &sc->tx.done, 1251 0, "tx_done"); 1252 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1253 "tx_pkt_done", 1254 CTLFLAG_RD, &sc->tx.pkt_done, 1255 0, "tx_done"); 1256 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1257 "tx_stall", 1258 CTLFLAG_RD, &sc->tx.stall, 1259 0, "tx_stall"); 1260 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1261 "tx_wake", 1262 CTLFLAG_RD, &sc->tx.wake, 1263 0, "tx_wake"); 1264 1265 /* verbose printing? */ 1266 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1267 "verbose", 1268 CTLFLAG_RW, &mxge_verbose, 1269 0, "verbose printing"); 1270 1271 } 1272 1273 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1274 backwards one at a time and handle ring wraps */ 1275 1276 static inline void 1277 mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1278 mcp_kreq_ether_send_t *src, int cnt) 1279 { 1280 int idx, starting_slot; 1281 starting_slot = tx->req; 1282 while (cnt > 1) { 1283 cnt--; 1284 idx = (starting_slot + cnt) & tx->mask; 1285 mxge_pio_copy(&tx->lanai[idx], 1286 &src[cnt], sizeof(*src)); 1287 mb(); 1288 } 1289 } 1290 1291 /* 1292 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1293 * at most 32 bytes at a time, so as to avoid involving the software 1294 * pio handler in the nic. We re-write the first segment's flags 1295 * to mark them valid only after writing the entire chain 1296 */ 1297 1298 static inline void 1299 mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1300 int cnt) 1301 { 1302 int idx, i; 1303 uint32_t *src_ints; 1304 volatile uint32_t *dst_ints; 1305 mcp_kreq_ether_send_t *srcp; 1306 volatile mcp_kreq_ether_send_t *dstp, *dst; 1307 uint8_t last_flags; 1308 1309 idx = tx->req & tx->mask; 1310 1311 last_flags = src->flags; 1312 src->flags = 0; 1313 mb(); 1314 dst = dstp = &tx->lanai[idx]; 1315 srcp = src; 1316 1317 if ((idx + cnt) < tx->mask) { 1318 for (i = 0; i < (cnt - 1); i += 2) { 1319 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1320 mb(); /* force write every 32 bytes */ 1321 srcp += 2; 1322 dstp += 2; 1323 } 1324 } else { 1325 /* submit all but the first request, and ensure 1326 that it is submitted below */ 1327 mxge_submit_req_backwards(tx, src, cnt); 1328 i = 0; 1329 } 1330 if (i < cnt) { 1331 /* submit the first request */ 1332 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1333 mb(); /* barrier before setting valid flag */ 1334 } 1335 1336 /* re-write the last 32-bits with the valid flags */ 1337 src->flags = last_flags; 1338 src_ints = (uint32_t *)src; 1339 src_ints+=3; 1340 dst_ints = (volatile uint32_t *)dst; 1341 dst_ints+=3; 1342 *dst_ints = *src_ints; 1343 tx->req += cnt; 1344 mb(); 1345 } 1346 1347 static inline void 1348 mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1349 { 1350 tx->req += cnt; 1351 mb(); 1352 while (cnt >= 4) { 1353 mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64); 1354 mb(); 1355 src += 4; 1356 cnt -= 4; 1357 } 1358 if (cnt > 0) { 1359 /* pad it to 64 bytes. The src is 64 bytes bigger than it 1360 needs to be so that we don't overrun it */ 1361 mxge_pio_copy(tx->wc_fifo + MXGEFW_ETH_SEND_OFFSET(cnt), src, 64); 1362 mb(); 1363 } 1364 } 1365 1366 static void 1367 mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt) 1368 { 1369 mxge_tx_buf_t *tx; 1370 mcp_kreq_ether_send_t *req; 1371 bus_dma_segment_t *seg; 1372 struct ether_header *eh; 1373 struct ip *ip; 1374 struct tcphdr *tcp; 1375 uint32_t low, high_swapped; 1376 int len, seglen, cum_len, cum_len_next; 1377 int next_is_first, chop, cnt, rdma_count, small; 1378 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1379 uint8_t flags, flags_next; 1380 static int once; 1381 1382 mss = m->m_pkthdr.tso_segsz; 1383 1384 /* negative cum_len signifies to the 1385 * send loop that we are still in the 1386 * header portion of the TSO packet. 1387 */ 1388 1389 /* ensure we have the ethernet, IP and TCP 1390 header together in the first mbuf, copy 1391 it to a scratch buffer if not */ 1392 if (__predict_false(m->m_len < sizeof (*eh) 1393 + sizeof (*ip))) { 1394 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1395 sc->scratch); 1396 eh = (struct ether_header *)sc->scratch; 1397 } else { 1398 eh = mtod(m, struct ether_header *); 1399 } 1400 ip = (struct ip *) (eh + 1); 1401 if (__predict_false(m->m_len < sizeof (*eh) + (ip->ip_hl << 2) 1402 + sizeof (*tcp))) { 1403 m_copydata(m, 0, sizeof (*eh) + (ip->ip_hl << 2) 1404 + sizeof (*tcp), sc->scratch); 1405 eh = (struct ether_header *) sc->scratch; 1406 ip = (struct ip *) (eh + 1); 1407 } 1408 1409 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1410 cum_len = -(sizeof (*eh) + ((ip->ip_hl + tcp->th_off) << 2)); 1411 1412 /* TSO implies checksum offload on this hardware */ 1413 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1414 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1415 1416 1417 /* for TSO, pseudo_hdr_offset holds mss. 1418 * The firmware figures out where to put 1419 * the checksum by parsing the header. */ 1420 pseudo_hdr_offset = htobe16(mss); 1421 1422 tx = &sc->tx; 1423 req = tx->req_list; 1424 seg = tx->seg_list; 1425 cnt = 0; 1426 rdma_count = 0; 1427 /* "rdma_count" is the number of RDMAs belonging to the 1428 * current packet BEFORE the current send request. For 1429 * non-TSO packets, this is equal to "count". 1430 * For TSO packets, rdma_count needs to be reset 1431 * to 0 after a segment cut. 1432 * 1433 * The rdma_count field of the send request is 1434 * the number of RDMAs of the packet starting at 1435 * that request. For TSO send requests with one ore more cuts 1436 * in the middle, this is the number of RDMAs starting 1437 * after the last cut in the request. All previous 1438 * segments before the last cut implicitly have 1 RDMA. 1439 * 1440 * Since the number of RDMAs is not known beforehand, 1441 * it must be filled-in retroactively - after each 1442 * segmentation cut or at the end of the entire packet. 1443 */ 1444 1445 while (busdma_seg_cnt) { 1446 /* Break the busdma segment up into pieces*/ 1447 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1448 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1449 len = seg->ds_len; 1450 1451 while (len) { 1452 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1453 seglen = len; 1454 cum_len_next = cum_len + seglen; 1455 (req-rdma_count)->rdma_count = rdma_count + 1; 1456 if (__predict_true(cum_len >= 0)) { 1457 /* payload */ 1458 chop = (cum_len_next > mss); 1459 cum_len_next = cum_len_next % mss; 1460 next_is_first = (cum_len_next == 0); 1461 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1462 flags_next |= next_is_first * 1463 MXGEFW_FLAGS_FIRST; 1464 rdma_count |= -(chop | next_is_first); 1465 rdma_count += chop & !next_is_first; 1466 } else if (cum_len_next >= 0) { 1467 /* header ends */ 1468 rdma_count = -1; 1469 cum_len_next = 0; 1470 seglen = -cum_len; 1471 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1472 flags_next = MXGEFW_FLAGS_TSO_PLD | 1473 MXGEFW_FLAGS_FIRST | 1474 (small * MXGEFW_FLAGS_SMALL); 1475 } 1476 1477 req->addr_high = high_swapped; 1478 req->addr_low = htobe32(low); 1479 req->pseudo_hdr_offset = pseudo_hdr_offset; 1480 req->pad = 0; 1481 req->rdma_count = 1; 1482 req->length = htobe16(seglen); 1483 req->cksum_offset = cksum_offset; 1484 req->flags = flags | ((cum_len & 1) * 1485 MXGEFW_FLAGS_ALIGN_ODD); 1486 low += seglen; 1487 len -= seglen; 1488 cum_len = cum_len_next; 1489 flags = flags_next; 1490 req++; 1491 cnt++; 1492 rdma_count++; 1493 if (__predict_false(cksum_offset > seglen)) 1494 cksum_offset -= seglen; 1495 else 1496 cksum_offset = 0; 1497 if (__predict_false(cnt > MXGE_MAX_SEND_DESC)) 1498 goto drop; 1499 } 1500 busdma_seg_cnt--; 1501 seg++; 1502 } 1503 (req-rdma_count)->rdma_count = rdma_count; 1504 1505 do { 1506 req--; 1507 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1508 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1509 1510 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1511 if (tx->wc_fifo == NULL) 1512 mxge_submit_req(tx, tx->req_list, cnt); 1513 else 1514 mxge_submit_req_wc(tx, tx->req_list, cnt); 1515 return; 1516 1517 drop: 1518 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1519 m_freem(m); 1520 sc->ifp->if_oerrors++; 1521 if (!once) { 1522 printf("MXGE_MAX_SEND_DESC exceeded via TSO!\n"); 1523 printf("mss = %d, %ld!\n", mss, (long)seg - (long)tx->seg_list); 1524 once = 1; 1525 } 1526 return; 1527 1528 } 1529 1530 static void 1531 mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1532 { 1533 mcp_kreq_ether_send_t *req; 1534 bus_dma_segment_t *seg; 1535 struct mbuf *m_tmp; 1536 struct ifnet *ifp; 1537 mxge_tx_buf_t *tx; 1538 struct ether_header *eh; 1539 struct ip *ip; 1540 int cnt, cum_len, err, i, idx, odd_flag; 1541 uint16_t pseudo_hdr_offset; 1542 uint8_t flags, cksum_offset; 1543 1544 1545 1546 ifp = sc->ifp; 1547 tx = &sc->tx; 1548 1549 /* (try to) map the frame for DMA */ 1550 idx = tx->req & tx->mask; 1551 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1552 m, tx->seg_list, &cnt, 1553 BUS_DMA_NOWAIT); 1554 if (err == EFBIG) { 1555 /* Too many segments in the chain. Try 1556 to defrag */ 1557 m_tmp = m_defrag(m, M_NOWAIT); 1558 if (m_tmp == NULL) { 1559 goto drop; 1560 } 1561 m = m_tmp; 1562 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1563 tx->info[idx].map, 1564 m, tx->seg_list, &cnt, 1565 BUS_DMA_NOWAIT); 1566 } 1567 if (err != 0) { 1568 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1569 " packet len = %d\n", err, m->m_pkthdr.len); 1570 goto drop; 1571 } 1572 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1573 BUS_DMASYNC_PREWRITE); 1574 tx->info[idx].m = m; 1575 1576 1577 /* TSO is different enough, we handle it in another routine */ 1578 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1579 mxge_encap_tso(sc, m, cnt); 1580 return; 1581 } 1582 1583 req = tx->req_list; 1584 cksum_offset = 0; 1585 pseudo_hdr_offset = 0; 1586 flags = MXGEFW_FLAGS_NO_TSO; 1587 1588 /* checksum offloading? */ 1589 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1590 /* ensure ip header is in first mbuf, copy 1591 it to a scratch buffer if not */ 1592 if (__predict_false(m->m_len < sizeof (*eh) 1593 + sizeof (*ip))) { 1594 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1595 sc->scratch); 1596 eh = (struct ether_header *)sc->scratch; 1597 } else { 1598 eh = mtod(m, struct ether_header *); 1599 } 1600 ip = (struct ip *) (eh + 1); 1601 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1602 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1603 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1604 req->cksum_offset = cksum_offset; 1605 flags |= MXGEFW_FLAGS_CKSUM; 1606 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1607 } else { 1608 odd_flag = 0; 1609 } 1610 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1611 flags |= MXGEFW_FLAGS_SMALL; 1612 1613 /* convert segments into a request list */ 1614 cum_len = 0; 1615 seg = tx->seg_list; 1616 req->flags = MXGEFW_FLAGS_FIRST; 1617 for (i = 0; i < cnt; i++) { 1618 req->addr_low = 1619 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1620 req->addr_high = 1621 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1622 req->length = htobe16(seg->ds_len); 1623 req->cksum_offset = cksum_offset; 1624 if (cksum_offset > seg->ds_len) 1625 cksum_offset -= seg->ds_len; 1626 else 1627 cksum_offset = 0; 1628 req->pseudo_hdr_offset = pseudo_hdr_offset; 1629 req->pad = 0; /* complete solid 16-byte block */ 1630 req->rdma_count = 1; 1631 req->flags |= flags | ((cum_len & 1) * odd_flag); 1632 cum_len += seg->ds_len; 1633 seg++; 1634 req++; 1635 req->flags = 0; 1636 } 1637 req--; 1638 /* pad runts to 60 bytes */ 1639 if (cum_len < 60) { 1640 req++; 1641 req->addr_low = 1642 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1643 req->addr_high = 1644 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1645 req->length = htobe16(60 - cum_len); 1646 req->cksum_offset = 0; 1647 req->pseudo_hdr_offset = pseudo_hdr_offset; 1648 req->pad = 0; /* complete solid 16-byte block */ 1649 req->rdma_count = 1; 1650 req->flags |= flags | ((cum_len & 1) * odd_flag); 1651 cnt++; 1652 } 1653 1654 tx->req_list[0].rdma_count = cnt; 1655 #if 0 1656 /* print what the firmware will see */ 1657 for (i = 0; i < cnt; i++) { 1658 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1659 "cso:%d, flags:0x%x, rdma:%d\n", 1660 i, (int)ntohl(tx->req_list[i].addr_high), 1661 (int)ntohl(tx->req_list[i].addr_low), 1662 (int)ntohs(tx->req_list[i].length), 1663 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1664 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1665 tx->req_list[i].rdma_count); 1666 } 1667 printf("--------------\n"); 1668 #endif 1669 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1670 if (tx->wc_fifo == NULL) 1671 mxge_submit_req(tx, tx->req_list, cnt); 1672 else 1673 mxge_submit_req_wc(tx, tx->req_list, cnt); 1674 return; 1675 1676 drop: 1677 m_freem(m); 1678 ifp->if_oerrors++; 1679 return; 1680 } 1681 1682 1683 1684 1685 static inline void 1686 mxge_start_locked(mxge_softc_t *sc) 1687 { 1688 struct mbuf *m; 1689 struct ifnet *ifp; 1690 1691 ifp = sc->ifp; 1692 while ((sc->tx.mask - (sc->tx.req - sc->tx.done)) 1693 > MXGE_MAX_SEND_DESC) { 1694 1695 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1696 if (m == NULL) { 1697 return; 1698 } 1699 /* let BPF see it */ 1700 BPF_MTAP(ifp, m); 1701 1702 /* give it to the nic */ 1703 mxge_encap(sc, m); 1704 } 1705 /* ran out of transmit slots */ 1706 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 1707 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1708 sc->tx.stall++; 1709 } 1710 } 1711 1712 static void 1713 mxge_start(struct ifnet *ifp) 1714 { 1715 mxge_softc_t *sc = ifp->if_softc; 1716 1717 1718 mtx_lock(&sc->tx_mtx); 1719 mxge_start_locked(sc); 1720 mtx_unlock(&sc->tx_mtx); 1721 } 1722 1723 /* 1724 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1725 * at most 32 bytes at a time, so as to avoid involving the software 1726 * pio handler in the nic. We re-write the first segment's low 1727 * DMA address to mark it valid only after we write the entire chunk 1728 * in a burst 1729 */ 1730 static inline void 1731 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1732 mcp_kreq_ether_recv_t *src) 1733 { 1734 uint32_t low; 1735 1736 low = src->addr_low; 1737 src->addr_low = 0xffffffff; 1738 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 1739 mb(); 1740 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 1741 mb(); 1742 src->addr_low = low; 1743 dst->addr_low = low; 1744 mb(); 1745 } 1746 1747 static int 1748 mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1749 { 1750 bus_dma_segment_t seg; 1751 struct mbuf *m; 1752 mxge_rx_buf_t *rx = &sc->rx_small; 1753 int cnt, err; 1754 1755 m = m_gethdr(M_DONTWAIT, MT_DATA); 1756 if (m == NULL) { 1757 rx->alloc_fail++; 1758 err = ENOBUFS; 1759 goto done; 1760 } 1761 m->m_len = MHLEN; 1762 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1763 &seg, &cnt, BUS_DMA_NOWAIT); 1764 if (err != 0) { 1765 m_free(m); 1766 goto done; 1767 } 1768 rx->info[idx].m = m; 1769 rx->shadow[idx].addr_low = 1770 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1771 rx->shadow[idx].addr_high = 1772 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1773 1774 done: 1775 if ((idx & 7) == 7) { 1776 if (rx->wc_fifo == NULL) 1777 mxge_submit_8rx(&rx->lanai[idx - 7], 1778 &rx->shadow[idx - 7]); 1779 else { 1780 mb(); 1781 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1782 } 1783 } 1784 return err; 1785 } 1786 1787 static int 1788 mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1789 { 1790 bus_dma_segment_t seg; 1791 struct mbuf *m; 1792 mxge_rx_buf_t *rx = &sc->rx_big; 1793 int cnt, err; 1794 1795 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes); 1796 if (m == NULL) { 1797 rx->alloc_fail++; 1798 err = ENOBUFS; 1799 goto done; 1800 } 1801 m->m_len = sc->big_bytes; 1802 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1803 &seg, &cnt, BUS_DMA_NOWAIT); 1804 if (err != 0) { 1805 m_free(m); 1806 goto done; 1807 } 1808 rx->info[idx].m = m; 1809 rx->shadow[idx].addr_low = 1810 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1811 rx->shadow[idx].addr_high = 1812 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1813 1814 done: 1815 if ((idx & 7) == 7) { 1816 if (rx->wc_fifo == NULL) 1817 mxge_submit_8rx(&rx->lanai[idx - 7], 1818 &rx->shadow[idx - 7]); 1819 else { 1820 mb(); 1821 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1822 } 1823 } 1824 return err; 1825 } 1826 1827 static inline void 1828 mxge_rx_csum(struct mbuf *m, int csum) 1829 { 1830 struct ether_header *eh; 1831 struct ip *ip; 1832 1833 eh = mtod(m, struct ether_header *); 1834 1835 /* only deal with IPv4 TCP & UDP for now */ 1836 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 1837 return; 1838 ip = (struct ip *)(eh + 1); 1839 if (__predict_false(ip->ip_p != IPPROTO_TCP && 1840 ip->ip_p != IPPROTO_UDP)) 1841 return; 1842 1843 /* 1844 * Myri10GE hardware checksums are not valid if the sender 1845 * padded the frame with non-zero padding. This is because 1846 * the firmware just does a simple 16-bit 1s complement 1847 * checksum across the entire frame, excluding the first 14 1848 * bytes. It is easiest to simply to assume the worst, and 1849 * only apply hardware checksums to non-padded frames. This 1850 * is what nearly every other OS does by default. 1851 */ 1852 1853 if (__predict_true(m->m_pkthdr.len == 1854 (ntohs(ip->ip_len) + ETHER_HDR_LEN))) { 1855 m->m_pkthdr.csum_data = csum; 1856 m->m_pkthdr.csum_flags = CSUM_DATA_VALID; 1857 } 1858 } 1859 1860 static inline void 1861 mxge_rx_done_big(mxge_softc_t *sc, int len, int csum) 1862 { 1863 struct ifnet *ifp; 1864 struct mbuf *m = 0; /* -Wunitialized */ 1865 struct mbuf *m_prev = 0; /* -Wunitialized */ 1866 struct mbuf *m_head = 0; 1867 bus_dmamap_t old_map; 1868 mxge_rx_buf_t *rx; 1869 int idx; 1870 1871 1872 rx = &sc->rx_big; 1873 ifp = sc->ifp; 1874 while (len > 0) { 1875 idx = rx->cnt & rx->mask; 1876 rx->cnt++; 1877 /* save a pointer to the received mbuf */ 1878 m = rx->info[idx].m; 1879 /* try to replace the received mbuf */ 1880 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 1881 goto drop; 1882 } 1883 /* unmap the received buffer */ 1884 old_map = rx->info[idx].map; 1885 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 1886 bus_dmamap_unload(rx->dmat, old_map); 1887 1888 /* swap the bus_dmamap_t's */ 1889 rx->info[idx].map = rx->extra_map; 1890 rx->extra_map = old_map; 1891 1892 /* chain multiple segments together */ 1893 if (!m_head) { 1894 m_head = m; 1895 /* mcp implicitly skips 1st bytes so that 1896 * packet is properly aligned */ 1897 m->m_data += MXGEFW_PAD; 1898 m->m_pkthdr.len = len; 1899 m->m_len = sc->big_bytes - MXGEFW_PAD; 1900 } else { 1901 m->m_len = sc->big_bytes; 1902 m->m_flags &= ~M_PKTHDR; 1903 m_prev->m_next = m; 1904 } 1905 len -= m->m_len; 1906 m_prev = m; 1907 } 1908 1909 /* trim trailing garbage from the last mbuf in the chain. If 1910 * there is any garbage, len will be negative */ 1911 m->m_len += len; 1912 1913 m_head->m_pkthdr.rcvif = ifp; 1914 ifp->if_ipackets++; 1915 /* if the checksum is valid, mark it in the mbuf header */ 1916 if (sc->csum_flag) 1917 mxge_rx_csum(m_head, csum); 1918 1919 /* pass the frame up the stack */ 1920 (*ifp->if_input)(ifp, m_head); 1921 return; 1922 1923 drop: 1924 /* drop the frame -- the old mbuf(s) are re-cycled by running 1925 every slot through the allocator */ 1926 if (m_head) { 1927 len -= sc->big_bytes; 1928 m_freem(m_head); 1929 } else { 1930 len -= (sc->big_bytes + MXGEFW_PAD); 1931 } 1932 while ((int)len > 0) { 1933 idx = rx->cnt & rx->mask; 1934 rx->cnt++; 1935 m = rx->info[idx].m; 1936 if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) { 1937 m_freem(m); 1938 /* unmap the received buffer */ 1939 old_map = rx->info[idx].map; 1940 bus_dmamap_sync(rx->dmat, old_map, 1941 BUS_DMASYNC_POSTREAD); 1942 bus_dmamap_unload(rx->dmat, old_map); 1943 1944 /* swap the bus_dmamap_t's */ 1945 rx->info[idx].map = rx->extra_map; 1946 rx->extra_map = old_map; 1947 } 1948 len -= sc->big_bytes; 1949 } 1950 1951 ifp->if_ierrors++; 1952 1953 } 1954 1955 static inline void 1956 mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 1957 { 1958 struct ifnet *ifp; 1959 struct mbuf *m; 1960 mxge_rx_buf_t *rx; 1961 bus_dmamap_t old_map; 1962 int idx; 1963 1964 ifp = sc->ifp; 1965 rx = &sc->rx_small; 1966 idx = rx->cnt & rx->mask; 1967 rx->cnt++; 1968 /* save a pointer to the received mbuf */ 1969 m = rx->info[idx].m; 1970 /* try to replace the received mbuf */ 1971 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 1972 /* drop the frame -- the old mbuf is re-cycled */ 1973 ifp->if_ierrors++; 1974 return; 1975 } 1976 1977 /* unmap the received buffer */ 1978 old_map = rx->info[idx].map; 1979 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 1980 bus_dmamap_unload(rx->dmat, old_map); 1981 1982 /* swap the bus_dmamap_t's */ 1983 rx->info[idx].map = rx->extra_map; 1984 rx->extra_map = old_map; 1985 1986 /* mcp implicitly skips 1st 2 bytes so that packet is properly 1987 * aligned */ 1988 m->m_data += MXGEFW_PAD; 1989 1990 m->m_pkthdr.rcvif = ifp; 1991 m->m_len = m->m_pkthdr.len = len; 1992 ifp->if_ipackets++; 1993 /* if the checksum is valid, mark it in the mbuf header */ 1994 if (sc->csum_flag) 1995 mxge_rx_csum(m, csum); 1996 1997 /* pass the frame up the stack */ 1998 (*ifp->if_input)(ifp, m); 1999 } 2000 2001 static inline void 2002 mxge_clean_rx_done(mxge_softc_t *sc) 2003 { 2004 mxge_rx_done_t *rx_done = &sc->rx_done; 2005 int limit = 0; 2006 uint16_t length; 2007 uint16_t checksum; 2008 2009 2010 while (rx_done->entry[rx_done->idx].length != 0) { 2011 length = ntohs(rx_done->entry[rx_done->idx].length); 2012 rx_done->entry[rx_done->idx].length = 0; 2013 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 2014 if (length <= (MHLEN - MXGEFW_PAD)) 2015 mxge_rx_done_small(sc, length, checksum); 2016 else 2017 mxge_rx_done_big(sc, length, checksum); 2018 rx_done->cnt++; 2019 rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1); 2020 2021 /* limit potential for livelock */ 2022 if (__predict_false(++limit > 2 * mxge_max_intr_slots)) 2023 break; 2024 2025 } 2026 } 2027 2028 2029 static inline void 2030 mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 2031 { 2032 struct ifnet *ifp; 2033 mxge_tx_buf_t *tx; 2034 struct mbuf *m; 2035 bus_dmamap_t map; 2036 int idx, limit; 2037 2038 limit = 0; 2039 tx = &sc->tx; 2040 ifp = sc->ifp; 2041 while (tx->pkt_done != mcp_idx) { 2042 idx = tx->done & tx->mask; 2043 tx->done++; 2044 m = tx->info[idx].m; 2045 /* mbuf and DMA map only attached to the first 2046 segment per-mbuf */ 2047 if (m != NULL) { 2048 ifp->if_opackets++; 2049 tx->info[idx].m = NULL; 2050 map = tx->info[idx].map; 2051 bus_dmamap_unload(tx->dmat, map); 2052 m_freem(m); 2053 } 2054 if (tx->info[idx].flag) { 2055 tx->info[idx].flag = 0; 2056 tx->pkt_done++; 2057 } 2058 /* limit potential for livelock by only handling 2059 2 full tx rings per call */ 2060 if (__predict_false(++limit > 2 * tx->mask)) 2061 break; 2062 } 2063 2064 /* If we have space, clear IFF_OACTIVE to tell the stack that 2065 its OK to send packets */ 2066 2067 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 2068 tx->req - tx->done < (tx->mask + 1)/4) { 2069 mtx_lock(&sc->tx_mtx); 2070 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2071 sc->tx.wake++; 2072 mxge_start_locked(sc); 2073 mtx_unlock(&sc->tx_mtx); 2074 } 2075 } 2076 2077 static void 2078 mxge_intr(void *arg) 2079 { 2080 mxge_softc_t *sc = arg; 2081 mcp_irq_data_t *stats = sc->fw_stats; 2082 mxge_tx_buf_t *tx = &sc->tx; 2083 mxge_rx_done_t *rx_done = &sc->rx_done; 2084 uint32_t send_done_count; 2085 uint8_t valid; 2086 2087 2088 /* make sure the DMA has finished */ 2089 if (!stats->valid) { 2090 return; 2091 } 2092 valid = stats->valid; 2093 2094 if (!sc->msi_enabled) { 2095 /* lower legacy IRQ */ 2096 *sc->irq_deassert = 0; 2097 if (!mxge_deassert_wait) 2098 /* don't wait for conf. that irq is low */ 2099 stats->valid = 0; 2100 } else { 2101 stats->valid = 0; 2102 } 2103 2104 /* loop while waiting for legacy irq deassertion */ 2105 do { 2106 /* check for transmit completes and receives */ 2107 send_done_count = be32toh(stats->send_done_count); 2108 while ((send_done_count != tx->pkt_done) || 2109 (rx_done->entry[rx_done->idx].length != 0)) { 2110 mxge_tx_done(sc, (int)send_done_count); 2111 mxge_clean_rx_done(sc); 2112 send_done_count = be32toh(stats->send_done_count); 2113 } 2114 } while (*((volatile uint8_t *) &stats->valid)); 2115 2116 if (__predict_false(stats->stats_updated)) { 2117 if (sc->link_state != stats->link_up) { 2118 sc->link_state = stats->link_up; 2119 if (sc->link_state) { 2120 if_link_state_change(sc->ifp, LINK_STATE_UP); 2121 if (mxge_verbose) 2122 device_printf(sc->dev, "link up\n"); 2123 } else { 2124 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2125 if (mxge_verbose) 2126 device_printf(sc->dev, "link down\n"); 2127 } 2128 } 2129 if (sc->rdma_tags_available != 2130 be32toh(sc->fw_stats->rdma_tags_available)) { 2131 sc->rdma_tags_available = 2132 be32toh(sc->fw_stats->rdma_tags_available); 2133 device_printf(sc->dev, "RDMA timed out! %d tags " 2134 "left\n", sc->rdma_tags_available); 2135 } 2136 sc->down_cnt += stats->link_down; 2137 } 2138 2139 /* check to see if we have rx token to pass back */ 2140 if (valid & 0x1) 2141 *sc->irq_claim = be32toh(3); 2142 *(sc->irq_claim + 1) = be32toh(3); 2143 } 2144 2145 static void 2146 mxge_init(void *arg) 2147 { 2148 } 2149 2150 2151 2152 static void 2153 mxge_free_mbufs(mxge_softc_t *sc) 2154 { 2155 int i; 2156 2157 for (i = 0; i <= sc->rx_big.mask; i++) { 2158 if (sc->rx_big.info[i].m == NULL) 2159 continue; 2160 bus_dmamap_unload(sc->rx_big.dmat, 2161 sc->rx_big.info[i].map); 2162 m_freem(sc->rx_big.info[i].m); 2163 sc->rx_big.info[i].m = NULL; 2164 } 2165 2166 for (i = 0; i <= sc->rx_small.mask; i++) { 2167 if (sc->rx_small.info[i].m == NULL) 2168 continue; 2169 bus_dmamap_unload(sc->rx_small.dmat, 2170 sc->rx_small.info[i].map); 2171 m_freem(sc->rx_small.info[i].m); 2172 sc->rx_small.info[i].m = NULL; 2173 } 2174 2175 for (i = 0; i <= sc->tx.mask; i++) { 2176 sc->tx.info[i].flag = 0; 2177 if (sc->tx.info[i].m == NULL) 2178 continue; 2179 bus_dmamap_unload(sc->tx.dmat, 2180 sc->tx.info[i].map); 2181 m_freem(sc->tx.info[i].m); 2182 sc->tx.info[i].m = NULL; 2183 } 2184 } 2185 2186 static void 2187 mxge_free_rings(mxge_softc_t *sc) 2188 { 2189 int i; 2190 2191 if (sc->tx.req_bytes != NULL) 2192 free(sc->tx.req_bytes, M_DEVBUF); 2193 if (sc->tx.seg_list != NULL) 2194 free(sc->tx.seg_list, M_DEVBUF); 2195 if (sc->rx_small.shadow != NULL) 2196 free(sc->rx_small.shadow, M_DEVBUF); 2197 if (sc->rx_big.shadow != NULL) 2198 free(sc->rx_big.shadow, M_DEVBUF); 2199 if (sc->tx.info != NULL) { 2200 if (sc->tx.dmat != NULL) { 2201 for (i = 0; i <= sc->tx.mask; i++) { 2202 bus_dmamap_destroy(sc->tx.dmat, 2203 sc->tx.info[i].map); 2204 } 2205 bus_dma_tag_destroy(sc->tx.dmat); 2206 } 2207 free(sc->tx.info, M_DEVBUF); 2208 } 2209 if (sc->rx_small.info != NULL) { 2210 if (sc->rx_small.dmat != NULL) { 2211 for (i = 0; i <= sc->rx_small.mask; i++) { 2212 bus_dmamap_destroy(sc->rx_small.dmat, 2213 sc->rx_small.info[i].map); 2214 } 2215 bus_dmamap_destroy(sc->rx_small.dmat, 2216 sc->rx_small.extra_map); 2217 bus_dma_tag_destroy(sc->rx_small.dmat); 2218 } 2219 free(sc->rx_small.info, M_DEVBUF); 2220 } 2221 if (sc->rx_big.info != NULL) { 2222 if (sc->rx_big.dmat != NULL) { 2223 for (i = 0; i <= sc->rx_big.mask; i++) { 2224 bus_dmamap_destroy(sc->rx_big.dmat, 2225 sc->rx_big.info[i].map); 2226 } 2227 bus_dmamap_destroy(sc->rx_big.dmat, 2228 sc->rx_big.extra_map); 2229 bus_dma_tag_destroy(sc->rx_big.dmat); 2230 } 2231 free(sc->rx_big.info, M_DEVBUF); 2232 } 2233 } 2234 2235 static int 2236 mxge_alloc_rings(mxge_softc_t *sc) 2237 { 2238 mxge_cmd_t cmd; 2239 int tx_ring_size, rx_ring_size; 2240 int tx_ring_entries, rx_ring_entries; 2241 int i, err; 2242 unsigned long bytes; 2243 2244 /* get ring sizes */ 2245 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2246 tx_ring_size = cmd.data0; 2247 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2248 if (err != 0) { 2249 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2250 goto abort_with_nothing; 2251 } 2252 2253 rx_ring_size = cmd.data0; 2254 2255 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2256 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2257 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2258 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2259 IFQ_SET_READY(&sc->ifp->if_snd); 2260 2261 sc->tx.mask = tx_ring_entries - 1; 2262 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2263 2264 err = ENOMEM; 2265 2266 /* allocate the tx request copy block */ 2267 bytes = 8 + 2268 sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4); 2269 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2270 if (sc->tx.req_bytes == NULL) 2271 goto abort_with_nothing; 2272 /* ensure req_list entries are aligned to 8 bytes */ 2273 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2274 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2275 2276 /* allocate the tx busdma segment list */ 2277 bytes = sizeof (*sc->tx.seg_list) * MXGE_MAX_SEND_DESC; 2278 sc->tx.seg_list = (bus_dma_segment_t *) 2279 malloc(bytes, M_DEVBUF, M_WAITOK); 2280 if (sc->tx.seg_list == NULL) 2281 goto abort_with_alloc; 2282 2283 /* allocate the rx shadow rings */ 2284 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2285 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2286 if (sc->rx_small.shadow == NULL) 2287 goto abort_with_alloc; 2288 2289 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2290 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2291 if (sc->rx_big.shadow == NULL) 2292 goto abort_with_alloc; 2293 2294 /* allocate the host info rings */ 2295 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2296 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2297 if (sc->tx.info == NULL) 2298 goto abort_with_alloc; 2299 2300 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2301 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2302 if (sc->rx_small.info == NULL) 2303 goto abort_with_alloc; 2304 2305 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2306 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2307 if (sc->rx_big.info == NULL) 2308 goto abort_with_alloc; 2309 2310 /* allocate the busdma resources */ 2311 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2312 1, /* alignment */ 2313 sc->tx.boundary, /* boundary */ 2314 BUS_SPACE_MAXADDR, /* low */ 2315 BUS_SPACE_MAXADDR, /* high */ 2316 NULL, NULL, /* filter */ 2317 65536 + 256, /* maxsize */ 2318 MXGE_MAX_SEND_DESC/2, /* num segs */ 2319 sc->tx.boundary, /* maxsegsize */ 2320 BUS_DMA_ALLOCNOW, /* flags */ 2321 NULL, NULL, /* lock */ 2322 &sc->tx.dmat); /* tag */ 2323 2324 if (err != 0) { 2325 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2326 err); 2327 goto abort_with_alloc; 2328 } 2329 2330 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2331 1, /* alignment */ 2332 4096, /* boundary */ 2333 BUS_SPACE_MAXADDR, /* low */ 2334 BUS_SPACE_MAXADDR, /* high */ 2335 NULL, NULL, /* filter */ 2336 MHLEN, /* maxsize */ 2337 1, /* num segs */ 2338 MHLEN, /* maxsegsize */ 2339 BUS_DMA_ALLOCNOW, /* flags */ 2340 NULL, NULL, /* lock */ 2341 &sc->rx_small.dmat); /* tag */ 2342 if (err != 0) { 2343 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2344 err); 2345 goto abort_with_alloc; 2346 } 2347 2348 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2349 1, /* alignment */ 2350 4096, /* boundary */ 2351 BUS_SPACE_MAXADDR, /* low */ 2352 BUS_SPACE_MAXADDR, /* high */ 2353 NULL, NULL, /* filter */ 2354 4096, /* maxsize */ 2355 1, /* num segs */ 2356 4096, /* maxsegsize */ 2357 BUS_DMA_ALLOCNOW, /* flags */ 2358 NULL, NULL, /* lock */ 2359 &sc->rx_big.dmat); /* tag */ 2360 if (err != 0) { 2361 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2362 err); 2363 goto abort_with_alloc; 2364 } 2365 2366 /* now use these tags to setup dmamaps for each slot 2367 in each ring */ 2368 for (i = 0; i <= sc->tx.mask; i++) { 2369 err = bus_dmamap_create(sc->tx.dmat, 0, 2370 &sc->tx.info[i].map); 2371 if (err != 0) { 2372 device_printf(sc->dev, "Err %d tx dmamap\n", 2373 err); 2374 goto abort_with_alloc; 2375 } 2376 } 2377 for (i = 0; i <= sc->rx_small.mask; i++) { 2378 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2379 &sc->rx_small.info[i].map); 2380 if (err != 0) { 2381 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2382 err); 2383 goto abort_with_alloc; 2384 } 2385 } 2386 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2387 &sc->rx_small.extra_map); 2388 if (err != 0) { 2389 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2390 err); 2391 goto abort_with_alloc; 2392 } 2393 2394 for (i = 0; i <= sc->rx_big.mask; i++) { 2395 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2396 &sc->rx_big.info[i].map); 2397 if (err != 0) { 2398 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2399 err); 2400 goto abort_with_alloc; 2401 } 2402 } 2403 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2404 &sc->rx_big.extra_map); 2405 if (err != 0) { 2406 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2407 err); 2408 goto abort_with_alloc; 2409 } 2410 return 0; 2411 2412 abort_with_alloc: 2413 mxge_free_rings(sc); 2414 2415 abort_with_nothing: 2416 return err; 2417 } 2418 2419 static int 2420 mxge_open(mxge_softc_t *sc) 2421 { 2422 mxge_cmd_t cmd; 2423 int i, err; 2424 bus_dmamap_t map; 2425 bus_addr_t bus; 2426 2427 2428 /* Copy the MAC address in case it was overridden */ 2429 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2430 2431 err = mxge_reset(sc); 2432 if (err != 0) { 2433 device_printf(sc->dev, "failed to reset\n"); 2434 return EIO; 2435 } 2436 bzero(sc->rx_done.entry, 2437 mxge_max_intr_slots * sizeof(*sc->rx_done.entry)); 2438 2439 if (MCLBYTES >= 2440 sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD) 2441 sc->big_bytes = MCLBYTES; 2442 else 2443 sc->big_bytes = MJUMPAGESIZE; 2444 2445 2446 /* get the lanai pointers to the send and receive rings */ 2447 2448 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2449 sc->tx.lanai = 2450 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2451 err |= mxge_send_cmd(sc, 2452 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2453 sc->rx_small.lanai = 2454 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2455 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2456 sc->rx_big.lanai = 2457 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2458 2459 if (err != 0) { 2460 device_printf(sc->dev, 2461 "failed to get ring sizes or locations\n"); 2462 return EIO; 2463 } 2464 2465 if (sc->wc) { 2466 sc->tx.wc_fifo = sc->sram + MXGEFW_ETH_SEND_4; 2467 sc->rx_small.wc_fifo = sc->sram + MXGEFW_ETH_RECV_SMALL; 2468 sc->rx_big.wc_fifo = sc->sram + MXGEFW_ETH_RECV_BIG; 2469 } else { 2470 sc->tx.wc_fifo = 0; 2471 sc->rx_small.wc_fifo = 0; 2472 sc->rx_big.wc_fifo = 0; 2473 } 2474 2475 2476 /* stock receive rings */ 2477 for (i = 0; i <= sc->rx_small.mask; i++) { 2478 map = sc->rx_small.info[i].map; 2479 err = mxge_get_buf_small(sc, map, i); 2480 if (err) { 2481 device_printf(sc->dev, "alloced %d/%d smalls\n", 2482 i, sc->rx_small.mask + 1); 2483 goto abort; 2484 } 2485 } 2486 for (i = 0; i <= sc->rx_big.mask; i++) { 2487 map = sc->rx_big.info[i].map; 2488 err = mxge_get_buf_big(sc, map, i); 2489 if (err) { 2490 device_printf(sc->dev, "alloced %d/%d bigs\n", 2491 i, sc->rx_big.mask + 1); 2492 goto abort; 2493 } 2494 } 2495 2496 /* Give the firmware the mtu and the big and small buffer 2497 sizes. The firmware wants the big buf size to be a power 2498 of two. Luckily, FreeBSD's clusters are powers of two */ 2499 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN; 2500 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2501 cmd.data0 = MHLEN - MXGEFW_PAD; 2502 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2503 &cmd); 2504 cmd.data0 = sc->big_bytes; 2505 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2506 2507 if (err != 0) { 2508 device_printf(sc->dev, "failed to setup params\n"); 2509 goto abort; 2510 } 2511 2512 /* Now give him the pointer to the stats block */ 2513 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2514 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2515 cmd.data2 = sizeof(struct mcp_irq_data); 2516 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2517 2518 if (err != 0) { 2519 bus = sc->fw_stats_dma.bus_addr; 2520 bus += offsetof(struct mcp_irq_data, send_done_count); 2521 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2522 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2523 err = mxge_send_cmd(sc, 2524 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2525 &cmd); 2526 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2527 sc->fw_multicast_support = 0; 2528 } else { 2529 sc->fw_multicast_support = 1; 2530 } 2531 2532 if (err != 0) { 2533 device_printf(sc->dev, "failed to setup params\n"); 2534 goto abort; 2535 } 2536 2537 /* Finally, start the firmware running */ 2538 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2539 if (err) { 2540 device_printf(sc->dev, "Couldn't bring up link\n"); 2541 goto abort; 2542 } 2543 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2544 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2545 2546 return 0; 2547 2548 2549 abort: 2550 mxge_free_mbufs(sc); 2551 2552 return err; 2553 } 2554 2555 static int 2556 mxge_close(mxge_softc_t *sc) 2557 { 2558 mxge_cmd_t cmd; 2559 int err, old_down_cnt; 2560 2561 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2562 old_down_cnt = sc->down_cnt; 2563 mb(); 2564 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2565 if (err) { 2566 device_printf(sc->dev, "Couldn't bring down link\n"); 2567 } 2568 if (old_down_cnt == sc->down_cnt) { 2569 /* wait for down irq */ 2570 DELAY(10 * sc->intr_coal_delay); 2571 } 2572 if (old_down_cnt == sc->down_cnt) { 2573 device_printf(sc->dev, "never got down irq\n"); 2574 } 2575 2576 mxge_free_mbufs(sc); 2577 2578 return 0; 2579 } 2580 2581 static void 2582 mxge_setup_cfg_space(mxge_softc_t *sc) 2583 { 2584 device_t dev = sc->dev; 2585 int reg; 2586 uint16_t cmd, lnk, pectl; 2587 2588 /* find the PCIe link width and set max read request to 4KB*/ 2589 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 2590 lnk = pci_read_config(dev, reg + 0x12, 2); 2591 sc->link_width = (lnk >> 4) & 0x3f; 2592 2593 pectl = pci_read_config(dev, reg + 0x8, 2); 2594 pectl = (pectl & ~0x7000) | (5 << 12); 2595 pci_write_config(dev, reg + 0x8, pectl, 2); 2596 } 2597 2598 /* Enable DMA and Memory space access */ 2599 pci_enable_busmaster(dev); 2600 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2601 cmd |= PCIM_CMD_MEMEN; 2602 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2603 } 2604 2605 static uint32_t 2606 mxge_read_reboot(mxge_softc_t *sc) 2607 { 2608 device_t dev = sc->dev; 2609 uint32_t vs; 2610 2611 /* find the vendor specific offset */ 2612 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 2613 device_printf(sc->dev, 2614 "could not find vendor specific offset\n"); 2615 return (uint32_t)-1; 2616 } 2617 /* enable read32 mode */ 2618 pci_write_config(dev, vs + 0x10, 0x3, 1); 2619 /* tell NIC which register to read */ 2620 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 2621 return (pci_read_config(dev, vs + 0x14, 4)); 2622 } 2623 2624 static void 2625 mxge_watchdog_reset(mxge_softc_t *sc) 2626 { 2627 int err; 2628 uint32_t reboot; 2629 uint16_t cmd; 2630 2631 err = ENXIO; 2632 2633 device_printf(sc->dev, "Watchdog reset!\n"); 2634 2635 /* 2636 * check to see if the NIC rebooted. If it did, then all of 2637 * PCI config space has been reset, and things like the 2638 * busmaster bit will be zero. If this is the case, then we 2639 * must restore PCI config space before the NIC can be used 2640 * again 2641 */ 2642 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2643 if (cmd == 0xffff) { 2644 /* 2645 * maybe the watchdog caught the NIC rebooting; wait 2646 * up to 100ms for it to finish. If it does not come 2647 * back, then give up 2648 */ 2649 DELAY(1000*100); 2650 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 2651 if (cmd == 0xffff) { 2652 device_printf(sc->dev, "NIC disappeared!\n"); 2653 goto abort; 2654 } 2655 } 2656 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 2657 /* print the reboot status */ 2658 reboot = mxge_read_reboot(sc); 2659 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 2660 reboot); 2661 /* restore PCI configuration space */ 2662 2663 /* XXXX waiting for pci_cfg_restore() to be exported */ 2664 goto abort; /* just abort for now */ 2665 2666 /* and redo any changes we made to our config space */ 2667 mxge_setup_cfg_space(sc); 2668 } else { 2669 device_printf(sc->dev, "NIC did not reboot, ring state:\n"); 2670 device_printf(sc->dev, "tx.req=%d tx.done=%d\n", 2671 sc->tx.req, sc->tx.done); 2672 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 2673 sc->tx.pkt_done, 2674 be32toh(sc->fw_stats->send_done_count)); 2675 } 2676 2677 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { 2678 mxge_close(sc); 2679 err = mxge_open(sc); 2680 } 2681 2682 abort: 2683 /* 2684 * stop the watchdog if the nic is dead, to avoid spamming the 2685 * console 2686 */ 2687 if (err != 0) { 2688 callout_stop(&sc->co_hdl); 2689 } 2690 } 2691 2692 static void 2693 mxge_watchdog(mxge_softc_t *sc) 2694 { 2695 mxge_tx_buf_t *tx = &sc->tx; 2696 2697 /* see if we have outstanding transmits, which 2698 have been pending for more than mxge_ticks */ 2699 if (tx->req != tx->done && 2700 tx->watchdog_req != tx->watchdog_done && 2701 tx->done == tx->watchdog_done) 2702 mxge_watchdog_reset(sc); 2703 2704 tx->watchdog_req = tx->req; 2705 tx->watchdog_done = tx->done; 2706 } 2707 2708 static void 2709 mxge_tick(void *arg) 2710 { 2711 mxge_softc_t *sc = arg; 2712 2713 2714 /* Synchronize with possible callout reset/stop. */ 2715 if (callout_pending(&sc->co_hdl) || 2716 !callout_active(&sc->co_hdl)) { 2717 mtx_unlock(&sc->driver_mtx); 2718 return; 2719 } 2720 2721 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2722 mxge_watchdog(sc); 2723 } 2724 2725 static int 2726 mxge_media_change(struct ifnet *ifp) 2727 { 2728 return EINVAL; 2729 } 2730 2731 static int 2732 mxge_change_mtu(mxge_softc_t *sc, int mtu) 2733 { 2734 struct ifnet *ifp = sc->ifp; 2735 int real_mtu, old_mtu; 2736 int err = 0; 2737 2738 2739 real_mtu = mtu + ETHER_HDR_LEN; 2740 if ((real_mtu > MXGE_MAX_ETHER_MTU) || 2741 real_mtu < 60) 2742 return EINVAL; 2743 mtx_lock(&sc->driver_mtx); 2744 old_mtu = ifp->if_mtu; 2745 ifp->if_mtu = mtu; 2746 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2747 callout_stop(&sc->co_hdl); 2748 mxge_close(sc); 2749 err = mxge_open(sc); 2750 if (err != 0) { 2751 ifp->if_mtu = old_mtu; 2752 mxge_close(sc); 2753 (void) mxge_open(sc); 2754 } 2755 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 2756 } 2757 mtx_unlock(&sc->driver_mtx); 2758 return err; 2759 } 2760 2761 static void 2762 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 2763 { 2764 mxge_softc_t *sc = ifp->if_softc; 2765 2766 2767 if (sc == NULL) 2768 return; 2769 ifmr->ifm_status = IFM_AVALID; 2770 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 2771 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 2772 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 2773 } 2774 2775 static int 2776 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 2777 { 2778 mxge_softc_t *sc = ifp->if_softc; 2779 struct ifreq *ifr = (struct ifreq *)data; 2780 int err, mask; 2781 2782 err = 0; 2783 switch (command) { 2784 case SIOCSIFADDR: 2785 case SIOCGIFADDR: 2786 err = ether_ioctl(ifp, command, data); 2787 break; 2788 2789 case SIOCSIFMTU: 2790 err = mxge_change_mtu(sc, ifr->ifr_mtu); 2791 break; 2792 2793 case SIOCSIFFLAGS: 2794 mtx_lock(&sc->driver_mtx); 2795 if (ifp->if_flags & IFF_UP) { 2796 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 2797 err = mxge_open(sc); 2798 callout_reset(&sc->co_hdl, mxge_ticks, 2799 mxge_tick, sc); 2800 } else { 2801 /* take care of promis can allmulti 2802 flag chages */ 2803 mxge_change_promisc(sc, 2804 ifp->if_flags & IFF_PROMISC); 2805 mxge_set_multicast_list(sc); 2806 } 2807 } else { 2808 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2809 mxge_close(sc); 2810 callout_stop(&sc->co_hdl); 2811 } 2812 } 2813 mtx_unlock(&sc->driver_mtx); 2814 break; 2815 2816 case SIOCADDMULTI: 2817 case SIOCDELMULTI: 2818 mtx_lock(&sc->driver_mtx); 2819 mxge_set_multicast_list(sc); 2820 mtx_unlock(&sc->driver_mtx); 2821 break; 2822 2823 case SIOCSIFCAP: 2824 mtx_lock(&sc->driver_mtx); 2825 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2826 if (mask & IFCAP_TXCSUM) { 2827 if (IFCAP_TXCSUM & ifp->if_capenable) { 2828 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 2829 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 2830 | CSUM_TSO); 2831 } else { 2832 ifp->if_capenable |= IFCAP_TXCSUM; 2833 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 2834 } 2835 } else if (mask & IFCAP_RXCSUM) { 2836 if (IFCAP_RXCSUM & ifp->if_capenable) { 2837 ifp->if_capenable &= ~IFCAP_RXCSUM; 2838 sc->csum_flag = 0; 2839 } else { 2840 ifp->if_capenable |= IFCAP_RXCSUM; 2841 sc->csum_flag = 1; 2842 } 2843 } 2844 if (mask & IFCAP_TSO4) { 2845 if (IFCAP_TSO4 & ifp->if_capenable) { 2846 ifp->if_capenable &= ~IFCAP_TSO4; 2847 ifp->if_hwassist &= ~CSUM_TSO; 2848 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 2849 ifp->if_capenable |= IFCAP_TSO4; 2850 ifp->if_hwassist |= CSUM_TSO; 2851 } else { 2852 printf("mxge requires tx checksum offload" 2853 " be enabled to use TSO\n"); 2854 err = EINVAL; 2855 } 2856 } 2857 mtx_unlock(&sc->driver_mtx); 2858 break; 2859 2860 case SIOCGIFMEDIA: 2861 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 2862 &sc->media, command); 2863 break; 2864 2865 default: 2866 err = ENOTTY; 2867 } 2868 return err; 2869 } 2870 2871 static void 2872 mxge_fetch_tunables(mxge_softc_t *sc) 2873 { 2874 2875 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 2876 &mxge_flow_control); 2877 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 2878 &mxge_intr_coal_delay); 2879 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 2880 &mxge_nvidia_ecrc_enable); 2881 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 2882 &mxge_force_firmware); 2883 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 2884 &mxge_deassert_wait); 2885 TUNABLE_INT_FETCH("hw.mxge.verbose", 2886 &mxge_verbose); 2887 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 2888 2889 if (bootverbose) 2890 mxge_verbose = 1; 2891 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 2892 mxge_intr_coal_delay = 30; 2893 if (mxge_ticks == 0) 2894 mxge_ticks = hz; 2895 sc->pause = mxge_flow_control; 2896 } 2897 2898 static int 2899 mxge_attach(device_t dev) 2900 { 2901 mxge_softc_t *sc = device_get_softc(dev); 2902 struct ifnet *ifp; 2903 size_t bytes; 2904 int count, rid, err; 2905 2906 sc->dev = dev; 2907 mxge_fetch_tunables(sc); 2908 2909 err = bus_dma_tag_create(NULL, /* parent */ 2910 1, /* alignment */ 2911 4096, /* boundary */ 2912 BUS_SPACE_MAXADDR, /* low */ 2913 BUS_SPACE_MAXADDR, /* high */ 2914 NULL, NULL, /* filter */ 2915 65536 + 256, /* maxsize */ 2916 MXGE_MAX_SEND_DESC, /* num segs */ 2917 4096, /* maxsegsize */ 2918 0, /* flags */ 2919 NULL, NULL, /* lock */ 2920 &sc->parent_dmat); /* tag */ 2921 2922 if (err != 0) { 2923 device_printf(sc->dev, "Err %d allocating parent dmat\n", 2924 err); 2925 goto abort_with_nothing; 2926 } 2927 2928 ifp = sc->ifp = if_alloc(IFT_ETHER); 2929 if (ifp == NULL) { 2930 device_printf(dev, "can not if_alloc()\n"); 2931 err = ENOSPC; 2932 goto abort_with_parent_dmat; 2933 } 2934 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 2935 device_get_nameunit(dev)); 2936 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 2937 snprintf(sc->tx_mtx_name, sizeof(sc->tx_mtx_name), "%s:tx", 2938 device_get_nameunit(dev)); 2939 mtx_init(&sc->tx_mtx, sc->tx_mtx_name, NULL, MTX_DEF); 2940 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 2941 "%s:drv", device_get_nameunit(dev)); 2942 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 2943 MTX_NETWORK_LOCK, MTX_DEF); 2944 2945 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 2946 2947 mxge_setup_cfg_space(sc); 2948 2949 /* Map the board into the kernel */ 2950 rid = PCIR_BARS; 2951 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 2952 ~0, 1, RF_ACTIVE); 2953 if (sc->mem_res == NULL) { 2954 device_printf(dev, "could not map memory\n"); 2955 err = ENXIO; 2956 goto abort_with_lock; 2957 } 2958 sc->sram = rman_get_virtual(sc->mem_res); 2959 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 2960 if (sc->sram_size > rman_get_size(sc->mem_res)) { 2961 device_printf(dev, "impossible memory region size %ld\n", 2962 rman_get_size(sc->mem_res)); 2963 err = ENXIO; 2964 goto abort_with_mem_res; 2965 } 2966 2967 /* make NULL terminated copy of the EEPROM strings section of 2968 lanai SRAM */ 2969 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 2970 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 2971 rman_get_bushandle(sc->mem_res), 2972 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 2973 sc->eeprom_strings, 2974 MXGE_EEPROM_STRINGS_SIZE - 2); 2975 err = mxge_parse_strings(sc); 2976 if (err != 0) 2977 goto abort_with_mem_res; 2978 2979 /* Enable write combining for efficient use of PCIe bus */ 2980 mxge_enable_wc(sc); 2981 2982 /* Allocate the out of band dma memory */ 2983 err = mxge_dma_alloc(sc, &sc->cmd_dma, 2984 sizeof (mxge_cmd_t), 64); 2985 if (err != 0) 2986 goto abort_with_mem_res; 2987 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 2988 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 2989 if (err != 0) 2990 goto abort_with_cmd_dma; 2991 2992 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 2993 sizeof (*sc->fw_stats), 64); 2994 if (err != 0) 2995 goto abort_with_zeropad_dma; 2996 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 2997 2998 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 2999 if (err != 0) 3000 goto abort_with_fw_stats; 3001 3002 /* allocate interrupt queues */ 3003 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry); 3004 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 3005 if (err != 0) 3006 goto abort_with_dmabench; 3007 sc->rx_done.entry = sc->rx_done.dma.addr; 3008 bzero(sc->rx_done.entry, bytes); 3009 3010 /* Add our ithread */ 3011 count = pci_msi_count(dev); 3012 if (count == 1 && pci_alloc_msi(dev, &count) == 0) { 3013 rid = 1; 3014 sc->msi_enabled = 1; 3015 } else { 3016 rid = 0; 3017 } 3018 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 3019 1, RF_SHAREABLE | RF_ACTIVE); 3020 if (sc->irq_res == NULL) { 3021 device_printf(dev, "could not alloc interrupt\n"); 3022 goto abort_with_rx_done; 3023 } 3024 if (mxge_verbose) 3025 device_printf(dev, "using %s irq %ld\n", 3026 sc->msi_enabled ? "MSI" : "INTx", 3027 rman_get_start(sc->irq_res)); 3028 /* load the firmware */ 3029 mxge_select_firmware(sc); 3030 3031 err = mxge_load_firmware(sc); 3032 if (err != 0) 3033 goto abort_with_irq_res; 3034 sc->intr_coal_delay = mxge_intr_coal_delay; 3035 err = mxge_reset(sc); 3036 if (err != 0) 3037 goto abort_with_irq_res; 3038 3039 err = mxge_alloc_rings(sc); 3040 if (err != 0) { 3041 device_printf(sc->dev, "failed to allocate rings\n"); 3042 goto abort_with_irq_res; 3043 } 3044 3045 err = bus_setup_intr(sc->dev, sc->irq_res, 3046 INTR_TYPE_NET | INTR_MPSAFE, 3047 NULL, mxge_intr, sc, &sc->ih); 3048 if (err != 0) { 3049 goto abort_with_rings; 3050 } 3051 /* hook into the network stack */ 3052 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3053 ifp->if_baudrate = 100000000; 3054 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 3055 IFCAP_JUMBO_MTU; 3056 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 3057 ifp->if_capenable = ifp->if_capabilities; 3058 sc->csum_flag = 1; 3059 ifp->if_init = mxge_init; 3060 ifp->if_softc = sc; 3061 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3062 ifp->if_ioctl = mxge_ioctl; 3063 ifp->if_start = mxge_start; 3064 ether_ifattach(ifp, sc->mac_addr); 3065 /* ether_ifattach sets mtu to 1500 */ 3066 ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN; 3067 3068 /* Initialise the ifmedia structure */ 3069 ifmedia_init(&sc->media, 0, mxge_media_change, 3070 mxge_media_status); 3071 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 3072 mxge_add_sysctls(sc); 3073 return 0; 3074 3075 abort_with_rings: 3076 mxge_free_rings(sc); 3077 abort_with_irq_res: 3078 bus_release_resource(dev, SYS_RES_IRQ, 3079 sc->msi_enabled ? 1 : 0, sc->irq_res); 3080 if (sc->msi_enabled) 3081 pci_release_msi(dev); 3082 abort_with_rx_done: 3083 sc->rx_done.entry = NULL; 3084 mxge_dma_free(&sc->rx_done.dma); 3085 abort_with_dmabench: 3086 mxge_dma_free(&sc->dmabench_dma); 3087 abort_with_fw_stats: 3088 mxge_dma_free(&sc->fw_stats_dma); 3089 abort_with_zeropad_dma: 3090 mxge_dma_free(&sc->zeropad_dma); 3091 abort_with_cmd_dma: 3092 mxge_dma_free(&sc->cmd_dma); 3093 abort_with_mem_res: 3094 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3095 abort_with_lock: 3096 pci_disable_busmaster(dev); 3097 mtx_destroy(&sc->cmd_mtx); 3098 mtx_destroy(&sc->tx_mtx); 3099 mtx_destroy(&sc->driver_mtx); 3100 if_free(ifp); 3101 abort_with_parent_dmat: 3102 bus_dma_tag_destroy(sc->parent_dmat); 3103 3104 abort_with_nothing: 3105 return err; 3106 } 3107 3108 static int 3109 mxge_detach(device_t dev) 3110 { 3111 mxge_softc_t *sc = device_get_softc(dev); 3112 3113 mtx_lock(&sc->driver_mtx); 3114 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3115 mxge_close(sc); 3116 callout_stop(&sc->co_hdl); 3117 mtx_unlock(&sc->driver_mtx); 3118 ether_ifdetach(sc->ifp); 3119 ifmedia_removeall(&sc->media); 3120 mxge_dummy_rdma(sc, 0); 3121 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 3122 mxge_free_rings(sc); 3123 bus_release_resource(dev, SYS_RES_IRQ, 3124 sc->msi_enabled ? 1 : 0, sc->irq_res); 3125 if (sc->msi_enabled) 3126 pci_release_msi(dev); 3127 3128 sc->rx_done.entry = NULL; 3129 mxge_dma_free(&sc->rx_done.dma); 3130 mxge_dma_free(&sc->fw_stats_dma); 3131 mxge_dma_free(&sc->dmabench_dma); 3132 mxge_dma_free(&sc->zeropad_dma); 3133 mxge_dma_free(&sc->cmd_dma); 3134 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 3135 pci_disable_busmaster(dev); 3136 mtx_destroy(&sc->cmd_mtx); 3137 mtx_destroy(&sc->tx_mtx); 3138 mtx_destroy(&sc->driver_mtx); 3139 if_free(sc->ifp); 3140 bus_dma_tag_destroy(sc->parent_dmat); 3141 return 0; 3142 } 3143 3144 static int 3145 mxge_shutdown(device_t dev) 3146 { 3147 return 0; 3148 } 3149 3150 /* 3151 This file uses Myri10GE driver indentation. 3152 3153 Local Variables: 3154 c-file-style:"linux" 3155 tab-width:8 3156 End: 3157 */ 3158