1 /************************************************************************* 2 * myri10ge.c: Myricom Myri-10G Ethernet driver. 3 * 4 * Copyright (C) 2005 - 2011 Myricom, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of Myricom, Inc. nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 * 32 * If the eeprom on your board is not recent enough, you will need to get a 33 * newer firmware image at: 34 * http://www.myri.com/scs/download-Myri10GE.html 35 * 36 * Contact Information: 37 * <help@myri.com> 38 * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006 39 *************************************************************************/ 40 41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 42 43 #include <linux/tcp.h> 44 #include <linux/netdevice.h> 45 #include <linux/skbuff.h> 46 #include <linux/string.h> 47 #include <linux/module.h> 48 #include <linux/pci.h> 49 #include <linux/dma-mapping.h> 50 #include <linux/etherdevice.h> 51 #include <linux/if_ether.h> 52 #include <linux/if_vlan.h> 53 #include <linux/inet_lro.h> 54 #include <linux/dca.h> 55 #include <linux/ip.h> 56 #include <linux/inet.h> 57 #include <linux/in.h> 58 #include <linux/ethtool.h> 59 #include <linux/firmware.h> 60 #include <linux/delay.h> 61 #include <linux/timer.h> 62 #include <linux/vmalloc.h> 63 #include <linux/crc32.h> 64 #include <linux/moduleparam.h> 65 #include <linux/io.h> 66 #include <linux/log2.h> 67 #include <linux/slab.h> 68 #include <linux/prefetch.h> 69 #include <net/checksum.h> 70 #include <net/ip.h> 71 #include <net/tcp.h> 72 #include <asm/byteorder.h> 73 #include <asm/io.h> 74 #include <asm/processor.h> 75 #ifdef CONFIG_MTRR 76 #include <asm/mtrr.h> 77 #endif 78 79 #include "myri10ge_mcp.h" 80 #include "myri10ge_mcp_gen_header.h" 81 82 #define MYRI10GE_VERSION_STR "1.5.3-1.534" 83 84 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); 85 MODULE_AUTHOR("Maintainer: help@myri.com"); 86 MODULE_VERSION(MYRI10GE_VERSION_STR); 87 MODULE_LICENSE("Dual BSD/GPL"); 88 89 #define MYRI10GE_MAX_ETHER_MTU 9014 90 91 #define MYRI10GE_ETH_STOPPED 0 92 #define MYRI10GE_ETH_STOPPING 1 93 #define MYRI10GE_ETH_STARTING 2 94 #define MYRI10GE_ETH_RUNNING 3 95 #define MYRI10GE_ETH_OPEN_FAILED 4 96 97 #define MYRI10GE_EEPROM_STRINGS_SIZE 256 98 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) 99 #define MYRI10GE_MAX_LRO_DESCRIPTORS 8 100 #define MYRI10GE_LRO_MAX_PKTS 64 101 102 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) 103 #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff 104 105 #define MYRI10GE_ALLOC_ORDER 0 106 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE) 107 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1) 108 109 #define MYRI10GE_MAX_SLICES 32 110 111 struct myri10ge_rx_buffer_state { 112 struct page *page; 113 int page_offset; 114 DEFINE_DMA_UNMAP_ADDR(bus); 115 DEFINE_DMA_UNMAP_LEN(len); 116 }; 117 118 struct myri10ge_tx_buffer_state { 119 struct sk_buff *skb; 120 int last; 121 DEFINE_DMA_UNMAP_ADDR(bus); 122 DEFINE_DMA_UNMAP_LEN(len); 123 }; 124 125 struct myri10ge_cmd { 126 u32 data0; 127 u32 data1; 128 u32 data2; 129 }; 130 131 struct myri10ge_rx_buf { 132 struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */ 133 struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */ 134 struct myri10ge_rx_buffer_state *info; 135 struct page *page; 136 dma_addr_t bus; 137 int page_offset; 138 int cnt; 139 int fill_cnt; 140 int alloc_fail; 141 int mask; /* number of rx slots -1 */ 142 int watchdog_needed; 143 }; 144 145 struct myri10ge_tx_buf { 146 struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */ 147 __be32 __iomem *send_go; /* "go" doorbell ptr */ 148 __be32 __iomem *send_stop; /* "stop" doorbell ptr */ 149 struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ 150 char *req_bytes; 151 struct myri10ge_tx_buffer_state *info; 152 int mask; /* number of transmit slots -1 */ 153 int req ____cacheline_aligned; /* transmit slots submitted */ 154 int pkt_start; /* packets started */ 155 int stop_queue; 156 int linearized; 157 int done ____cacheline_aligned; /* transmit slots completed */ 158 int pkt_done; /* packets completed */ 159 int wake_queue; 160 int queue_active; 161 }; 162 163 struct myri10ge_rx_done { 164 struct mcp_slot *entry; 165 dma_addr_t bus; 166 int cnt; 167 int idx; 168 struct net_lro_mgr lro_mgr; 169 struct net_lro_desc lro_desc[MYRI10GE_MAX_LRO_DESCRIPTORS]; 170 }; 171 172 struct myri10ge_slice_netstats { 173 unsigned long rx_packets; 174 unsigned long tx_packets; 175 unsigned long rx_bytes; 176 unsigned long tx_bytes; 177 unsigned long rx_dropped; 178 unsigned long tx_dropped; 179 }; 180 181 struct myri10ge_slice_state { 182 struct myri10ge_tx_buf tx; /* transmit ring */ 183 struct myri10ge_rx_buf rx_small; 184 struct myri10ge_rx_buf rx_big; 185 struct myri10ge_rx_done rx_done; 186 struct net_device *dev; 187 struct napi_struct napi; 188 struct myri10ge_priv *mgp; 189 struct myri10ge_slice_netstats stats; 190 __be32 __iomem *irq_claim; 191 struct mcp_irq_data *fw_stats; 192 dma_addr_t fw_stats_bus; 193 int watchdog_tx_done; 194 int watchdog_tx_req; 195 int watchdog_rx_done; 196 int stuck; 197 #ifdef CONFIG_MYRI10GE_DCA 198 int cached_dca_tag; 199 int cpu; 200 __be32 __iomem *dca_tag; 201 #endif 202 char irq_desc[32]; 203 }; 204 205 struct myri10ge_priv { 206 struct myri10ge_slice_state *ss; 207 int tx_boundary; /* boundary transmits cannot cross */ 208 int num_slices; 209 int running; /* running? */ 210 int small_bytes; 211 int big_bytes; 212 int max_intr_slots; 213 struct net_device *dev; 214 u8 __iomem *sram; 215 int sram_size; 216 unsigned long board_span; 217 unsigned long iomem_base; 218 __be32 __iomem *irq_deassert; 219 char *mac_addr_string; 220 struct mcp_cmd_response *cmd; 221 dma_addr_t cmd_bus; 222 struct pci_dev *pdev; 223 int msi_enabled; 224 int msix_enabled; 225 struct msix_entry *msix_vectors; 226 #ifdef CONFIG_MYRI10GE_DCA 227 int dca_enabled; 228 int relaxed_order; 229 #endif 230 u32 link_state; 231 unsigned int rdma_tags_available; 232 int intr_coal_delay; 233 __be32 __iomem *intr_coal_delay_ptr; 234 int mtrr; 235 int wc_enabled; 236 int down_cnt; 237 wait_queue_head_t down_wq; 238 struct work_struct watchdog_work; 239 struct timer_list watchdog_timer; 240 int watchdog_resets; 241 int watchdog_pause; 242 int pause; 243 bool fw_name_allocated; 244 char *fw_name; 245 char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; 246 char *product_code_string; 247 char fw_version[128]; 248 int fw_ver_major; 249 int fw_ver_minor; 250 int fw_ver_tiny; 251 int adopted_rx_filter_bug; 252 u8 mac_addr[6]; /* eeprom mac address */ 253 unsigned long serial_number; 254 int vendor_specific_offset; 255 int fw_multicast_support; 256 u32 features; 257 u32 max_tso6; 258 u32 read_dma; 259 u32 write_dma; 260 u32 read_write_dma; 261 u32 link_changes; 262 u32 msg_enable; 263 unsigned int board_number; 264 int rebooted; 265 }; 266 267 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat"; 268 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat"; 269 static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat"; 270 static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat"; 271 MODULE_FIRMWARE("myri10ge_ethp_z8e.dat"); 272 MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); 273 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); 274 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); 275 276 /* Careful: must be accessed under kparam_block_sysfs_write */ 277 static char *myri10ge_fw_name = NULL; 278 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); 279 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); 280 281 #define MYRI10GE_MAX_BOARDS 8 282 static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] = 283 {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL }; 284 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL, 285 0444); 286 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board"); 287 288 static int myri10ge_ecrc_enable = 1; 289 module_param(myri10ge_ecrc_enable, int, S_IRUGO); 290 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E"); 291 292 static int myri10ge_small_bytes = -1; /* -1 == auto */ 293 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR); 294 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets"); 295 296 static int myri10ge_msi = 1; /* enable msi by default */ 297 module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR); 298 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts"); 299 300 static int myri10ge_intr_coal_delay = 75; 301 module_param(myri10ge_intr_coal_delay, int, S_IRUGO); 302 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay"); 303 304 static int myri10ge_flow_control = 1; 305 module_param(myri10ge_flow_control, int, S_IRUGO); 306 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter"); 307 308 static int myri10ge_deassert_wait = 1; 309 module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR); 310 MODULE_PARM_DESC(myri10ge_deassert_wait, 311 "Wait when deasserting legacy interrupts"); 312 313 static int myri10ge_force_firmware = 0; 314 module_param(myri10ge_force_firmware, int, S_IRUGO); 315 MODULE_PARM_DESC(myri10ge_force_firmware, 316 "Force firmware to assume aligned completions"); 317 318 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 319 module_param(myri10ge_initial_mtu, int, S_IRUGO); 320 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU"); 321 322 static int myri10ge_napi_weight = 64; 323 module_param(myri10ge_napi_weight, int, S_IRUGO); 324 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight"); 325 326 static int myri10ge_watchdog_timeout = 1; 327 module_param(myri10ge_watchdog_timeout, int, S_IRUGO); 328 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout"); 329 330 static int myri10ge_max_irq_loops = 1048576; 331 module_param(myri10ge_max_irq_loops, int, S_IRUGO); 332 MODULE_PARM_DESC(myri10ge_max_irq_loops, 333 "Set stuck legacy IRQ detection threshold"); 334 335 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK 336 337 static int myri10ge_debug = -1; /* defaults above */ 338 module_param(myri10ge_debug, int, 0); 339 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); 340 341 static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS; 342 module_param(myri10ge_lro_max_pkts, int, S_IRUGO); 343 MODULE_PARM_DESC(myri10ge_lro_max_pkts, 344 "Number of LRO packets to be aggregated"); 345 346 static int myri10ge_fill_thresh = 256; 347 module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR); 348 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed"); 349 350 static int myri10ge_reset_recover = 1; 351 352 static int myri10ge_max_slices = 1; 353 module_param(myri10ge_max_slices, int, S_IRUGO); 354 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues"); 355 356 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 357 module_param(myri10ge_rss_hash, int, S_IRUGO); 358 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do"); 359 360 static int myri10ge_dca = 1; 361 module_param(myri10ge_dca, int, S_IRUGO); 362 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); 363 364 #define MYRI10GE_FW_OFFSET 1024*1024 365 #define MYRI10GE_HIGHPART_TO_U32(X) \ 366 (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0) 367 #define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X)) 368 369 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) 370 371 static void myri10ge_set_multicast_list(struct net_device *dev); 372 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 373 struct net_device *dev); 374 375 static inline void put_be32(__be32 val, __be32 __iomem * p) 376 { 377 __raw_writel((__force __u32) val, (__force void __iomem *)p); 378 } 379 380 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 381 struct rtnl_link_stats64 *stats); 382 383 static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) 384 { 385 if (mgp->fw_name_allocated) 386 kfree(mgp->fw_name); 387 mgp->fw_name = name; 388 mgp->fw_name_allocated = allocated; 389 } 390 391 static int 392 myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, 393 struct myri10ge_cmd *data, int atomic) 394 { 395 struct mcp_cmd *buf; 396 char buf_bytes[sizeof(*buf) + 8]; 397 struct mcp_cmd_response *response = mgp->cmd; 398 char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD; 399 u32 dma_low, dma_high, result, value; 400 int sleep_total = 0; 401 402 /* ensure buf is aligned to 8 bytes */ 403 buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8); 404 405 buf->data0 = htonl(data->data0); 406 buf->data1 = htonl(data->data1); 407 buf->data2 = htonl(data->data2); 408 buf->cmd = htonl(cmd); 409 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 410 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 411 412 buf->response_addr.low = htonl(dma_low); 413 buf->response_addr.high = htonl(dma_high); 414 response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT); 415 mb(); 416 myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf)); 417 418 /* wait up to 15ms. Longest command is the DMA benchmark, 419 * which is capped at 5ms, but runs from a timeout handler 420 * that runs every 7.8ms. So a 15ms timeout leaves us with 421 * a 2.2ms margin 422 */ 423 if (atomic) { 424 /* if atomic is set, do not sleep, 425 * and try to get the completion quickly 426 * (1ms will be enough for those commands) */ 427 for (sleep_total = 0; 428 sleep_total < 1000 && 429 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 430 sleep_total += 10) { 431 udelay(10); 432 mb(); 433 } 434 } else { 435 /* use msleep for most command */ 436 for (sleep_total = 0; 437 sleep_total < 15 && 438 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 439 sleep_total++) 440 msleep(1); 441 } 442 443 result = ntohl(response->result); 444 value = ntohl(response->data); 445 if (result != MYRI10GE_NO_RESPONSE_RESULT) { 446 if (result == 0) { 447 data->data0 = value; 448 return 0; 449 } else if (result == MXGEFW_CMD_UNKNOWN) { 450 return -ENOSYS; 451 } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) { 452 return -E2BIG; 453 } else if (result == MXGEFW_CMD_ERROR_RANGE && 454 cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES && 455 (data-> 456 data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) != 457 0) { 458 return -ERANGE; 459 } else { 460 dev_err(&mgp->pdev->dev, 461 "command %d failed, result = %d\n", 462 cmd, result); 463 return -ENXIO; 464 } 465 } 466 467 dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n", 468 cmd, result); 469 return -EAGAIN; 470 } 471 472 /* 473 * The eeprom strings on the lanaiX have the format 474 * SN=x\0 475 * MAC=x:x:x:x:x:x\0 476 * PT:ddd mmm xx xx:xx:xx xx\0 477 * PV:ddd mmm xx xx:xx:xx xx\0 478 */ 479 static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 480 { 481 char *ptr, *limit; 482 int i; 483 484 ptr = mgp->eeprom_strings; 485 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 486 487 while (*ptr != '\0' && ptr < limit) { 488 if (memcmp(ptr, "MAC=", 4) == 0) { 489 ptr += 4; 490 mgp->mac_addr_string = ptr; 491 for (i = 0; i < 6; i++) { 492 if ((ptr + 2) > limit) 493 goto abort; 494 mgp->mac_addr[i] = 495 simple_strtoul(ptr, &ptr, 16); 496 ptr += 1; 497 } 498 } 499 if (memcmp(ptr, "PC=", 3) == 0) { 500 ptr += 3; 501 mgp->product_code_string = ptr; 502 } 503 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 504 ptr += 3; 505 mgp->serial_number = simple_strtoul(ptr, &ptr, 10); 506 } 507 while (ptr < limit && *ptr++) ; 508 } 509 510 return 0; 511 512 abort: 513 dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n"); 514 return -ENXIO; 515 } 516 517 /* 518 * Enable or disable periodic RDMAs from the host to make certain 519 * chipsets resend dropped PCIe messages 520 */ 521 522 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 523 { 524 char __iomem *submit; 525 __be32 buf[16] __attribute__ ((__aligned__(8))); 526 u32 dma_low, dma_high; 527 int i; 528 529 /* clear confirmation addr */ 530 mgp->cmd->data = 0; 531 mb(); 532 533 /* send a rdma command to the PCIe engine, and wait for the 534 * response in the confirmation address. The firmware should 535 * write a -1 there to indicate it is alive and well 536 */ 537 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 538 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 539 540 buf[0] = htonl(dma_high); /* confirm addr MSW */ 541 buf[1] = htonl(dma_low); /* confirm addr LSW */ 542 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 543 buf[3] = htonl(dma_high); /* dummy addr MSW */ 544 buf[4] = htonl(dma_low); /* dummy addr LSW */ 545 buf[5] = htonl(enable); /* enable? */ 546 547 submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA; 548 549 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 550 for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++) 551 msleep(1); 552 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) 553 dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n", 554 (enable ? "enable" : "disable")); 555 } 556 557 static int 558 myri10ge_validate_firmware(struct myri10ge_priv *mgp, 559 struct mcp_gen_header *hdr) 560 { 561 struct device *dev = &mgp->pdev->dev; 562 563 /* check firmware type */ 564 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 565 dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type)); 566 return -EINVAL; 567 } 568 569 /* save firmware version for ethtool */ 570 strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version)); 571 572 sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major, 573 &mgp->fw_ver_minor, &mgp->fw_ver_tiny); 574 575 if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR && 576 mgp->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 577 dev_err(dev, "Found firmware version %s\n", mgp->fw_version); 578 dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, 579 MXGEFW_VERSION_MINOR); 580 return -EINVAL; 581 } 582 return 0; 583 } 584 585 static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size) 586 { 587 unsigned crc, reread_crc; 588 const struct firmware *fw; 589 struct device *dev = &mgp->pdev->dev; 590 unsigned char *fw_readback; 591 struct mcp_gen_header *hdr; 592 size_t hdr_offset; 593 int status; 594 unsigned i; 595 596 if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) { 597 dev_err(dev, "Unable to load %s firmware image via hotplug\n", 598 mgp->fw_name); 599 status = -EINVAL; 600 goto abort_with_nothing; 601 } 602 603 /* check size */ 604 605 if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET || 606 fw->size < MCP_HEADER_PTR_OFFSET + 4) { 607 dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size); 608 status = -EINVAL; 609 goto abort_with_fw; 610 } 611 612 /* check id */ 613 hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); 614 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) { 615 dev_err(dev, "Bad firmware file\n"); 616 status = -EINVAL; 617 goto abort_with_fw; 618 } 619 hdr = (void *)(fw->data + hdr_offset); 620 621 status = myri10ge_validate_firmware(mgp, hdr); 622 if (status != 0) 623 goto abort_with_fw; 624 625 crc = crc32(~0, fw->data, fw->size); 626 for (i = 0; i < fw->size; i += 256) { 627 myri10ge_pio_copy(mgp->sram + MYRI10GE_FW_OFFSET + i, 628 fw->data + i, 629 min(256U, (unsigned)(fw->size - i))); 630 mb(); 631 readb(mgp->sram); 632 } 633 fw_readback = vmalloc(fw->size); 634 if (!fw_readback) { 635 status = -ENOMEM; 636 goto abort_with_fw; 637 } 638 /* corruption checking is good for parity recovery and buggy chipset */ 639 memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size); 640 reread_crc = crc32(~0, fw_readback, fw->size); 641 vfree(fw_readback); 642 if (crc != reread_crc) { 643 dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n", 644 (unsigned)fw->size, reread_crc, crc); 645 status = -EIO; 646 goto abort_with_fw; 647 } 648 *size = (u32) fw->size; 649 650 abort_with_fw: 651 release_firmware(fw); 652 653 abort_with_nothing: 654 return status; 655 } 656 657 static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) 658 { 659 struct mcp_gen_header *hdr; 660 struct device *dev = &mgp->pdev->dev; 661 const size_t bytes = sizeof(struct mcp_gen_header); 662 size_t hdr_offset; 663 int status; 664 665 /* find running firmware header */ 666 hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 667 668 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) { 669 dev_err(dev, "Running firmware has bad header offset (%d)\n", 670 (int)hdr_offset); 671 return -EIO; 672 } 673 674 /* copy header of running firmware from SRAM to host memory to 675 * validate firmware */ 676 hdr = kmalloc(bytes, GFP_KERNEL); 677 if (hdr == NULL) { 678 dev_err(dev, "could not malloc firmware hdr\n"); 679 return -ENOMEM; 680 } 681 memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes); 682 status = myri10ge_validate_firmware(mgp, hdr); 683 kfree(hdr); 684 685 /* check to see if adopted firmware has bug where adopting 686 * it will cause broadcasts to be filtered unless the NIC 687 * is kept in ALLMULTI mode */ 688 if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 && 689 mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) { 690 mgp->adopted_rx_filter_bug = 1; 691 dev_warn(dev, "Adopting fw %d.%d.%d: " 692 "working around rx filter bug\n", 693 mgp->fw_ver_major, mgp->fw_ver_minor, 694 mgp->fw_ver_tiny); 695 } 696 return status; 697 } 698 699 static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) 700 { 701 struct myri10ge_cmd cmd; 702 int status; 703 704 /* probe for IPv6 TSO support */ 705 mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; 706 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, 707 &cmd, 0); 708 if (status == 0) { 709 mgp->max_tso6 = cmd.data0; 710 mgp->features |= NETIF_F_TSO6; 711 } 712 713 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 714 if (status != 0) { 715 dev_err(&mgp->pdev->dev, 716 "failed MXGEFW_CMD_GET_RX_RING_SIZE\n"); 717 return -ENXIO; 718 } 719 720 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr)); 721 722 return 0; 723 } 724 725 static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) 726 { 727 char __iomem *submit; 728 __be32 buf[16] __attribute__ ((__aligned__(8))); 729 u32 dma_low, dma_high, size; 730 int status, i; 731 732 size = 0; 733 status = myri10ge_load_hotplug_firmware(mgp, &size); 734 if (status) { 735 if (!adopt) 736 return status; 737 dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n"); 738 739 /* Do not attempt to adopt firmware if there 740 * was a bad crc */ 741 if (status == -EIO) 742 return status; 743 744 status = myri10ge_adopt_running_firmware(mgp); 745 if (status != 0) { 746 dev_err(&mgp->pdev->dev, 747 "failed to adopt running firmware\n"); 748 return status; 749 } 750 dev_info(&mgp->pdev->dev, 751 "Successfully adopted running firmware\n"); 752 if (mgp->tx_boundary == 4096) { 753 dev_warn(&mgp->pdev->dev, 754 "Using firmware currently running on NIC" 755 ". For optimal\n"); 756 dev_warn(&mgp->pdev->dev, 757 "performance consider loading optimized " 758 "firmware\n"); 759 dev_warn(&mgp->pdev->dev, "via hotplug\n"); 760 } 761 762 set_fw_name(mgp, "adopted", false); 763 mgp->tx_boundary = 2048; 764 myri10ge_dummy_rdma(mgp, 1); 765 status = myri10ge_get_firmware_capabilities(mgp); 766 return status; 767 } 768 769 /* clear confirmation addr */ 770 mgp->cmd->data = 0; 771 mb(); 772 773 /* send a reload command to the bootstrap MCP, and wait for the 774 * response in the confirmation address. The firmware should 775 * write a -1 there to indicate it is alive and well 776 */ 777 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 778 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 779 780 buf[0] = htonl(dma_high); /* confirm addr MSW */ 781 buf[1] = htonl(dma_low); /* confirm addr LSW */ 782 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 783 784 /* FIX: All newest firmware should un-protect the bottom of 785 * the sram before handoff. However, the very first interfaces 786 * do not. Therefore the handoff copy must skip the first 8 bytes 787 */ 788 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 789 buf[4] = htonl(size - 8); /* length of code */ 790 buf[5] = htonl(8); /* where to copy to */ 791 buf[6] = htonl(0); /* where to jump to */ 792 793 submit = mgp->sram + MXGEFW_BOOT_HANDOFF; 794 795 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 796 mb(); 797 msleep(1); 798 mb(); 799 i = 0; 800 while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) { 801 msleep(1 << i); 802 i++; 803 } 804 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) { 805 dev_err(&mgp->pdev->dev, "handoff failed\n"); 806 return -ENXIO; 807 } 808 myri10ge_dummy_rdma(mgp, 1); 809 status = myri10ge_get_firmware_capabilities(mgp); 810 811 return status; 812 } 813 814 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr) 815 { 816 struct myri10ge_cmd cmd; 817 int status; 818 819 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 820 | (addr[2] << 8) | addr[3]); 821 822 cmd.data1 = ((addr[4] << 8) | (addr[5])); 823 824 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0); 825 return status; 826 } 827 828 static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 829 { 830 struct myri10ge_cmd cmd; 831 int status, ctl; 832 833 ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL; 834 status = myri10ge_send_cmd(mgp, ctl, &cmd, 0); 835 836 if (status) { 837 netdev_err(mgp->dev, "Failed to set flow control mode\n"); 838 return status; 839 } 840 mgp->pause = pause; 841 return 0; 842 } 843 844 static void 845 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic) 846 { 847 struct myri10ge_cmd cmd; 848 int status, ctl; 849 850 ctl = promisc ? MXGEFW_ENABLE_PROMISC : MXGEFW_DISABLE_PROMISC; 851 status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic); 852 if (status) 853 netdev_err(mgp->dev, "Failed to set promisc mode\n"); 854 } 855 856 static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 857 { 858 struct myri10ge_cmd cmd; 859 int status; 860 u32 len; 861 struct page *dmatest_page; 862 dma_addr_t dmatest_bus; 863 char *test = " "; 864 865 dmatest_page = alloc_page(GFP_KERNEL); 866 if (!dmatest_page) 867 return -ENOMEM; 868 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, 869 DMA_BIDIRECTIONAL); 870 871 /* Run a small DMA test. 872 * The magic multipliers to the length tell the firmware 873 * to do DMA read, write, or read+write tests. The 874 * results are returned in cmd.data0. The upper 16 875 * bits or the return is the number of transfers completed. 876 * The lower 16 bits is the time in 0.5us ticks that the 877 * transfers took to complete. 878 */ 879 880 len = mgp->tx_boundary; 881 882 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 883 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 884 cmd.data2 = len * 0x10000; 885 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 886 if (status != 0) { 887 test = "read"; 888 goto abort; 889 } 890 mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 891 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 892 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 893 cmd.data2 = len * 0x1; 894 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 895 if (status != 0) { 896 test = "write"; 897 goto abort; 898 } 899 mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 900 901 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 902 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 903 cmd.data2 = len * 0x10001; 904 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 905 if (status != 0) { 906 test = "read/write"; 907 goto abort; 908 } 909 mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) / 910 (cmd.data0 & 0xffff); 911 912 abort: 913 pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL); 914 put_page(dmatest_page); 915 916 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 917 dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n", 918 test, status); 919 920 return status; 921 } 922 923 static int myri10ge_reset(struct myri10ge_priv *mgp) 924 { 925 struct myri10ge_cmd cmd; 926 struct myri10ge_slice_state *ss; 927 int i, status; 928 size_t bytes; 929 #ifdef CONFIG_MYRI10GE_DCA 930 unsigned long dca_tag_off; 931 #endif 932 933 /* try to send a reset command to the card to see if it 934 * is alive */ 935 memset(&cmd, 0, sizeof(cmd)); 936 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 937 if (status != 0) { 938 dev_err(&mgp->pdev->dev, "failed reset\n"); 939 return -ENXIO; 940 } 941 942 (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 943 /* 944 * Use non-ndis mcp_slot (eg, 4 bytes total, 945 * no toeplitz hash value returned. Older firmware will 946 * not understand this command, but will use the correct 947 * sized mcp_slot, so we ignore error returns 948 */ 949 cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN; 950 (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0); 951 952 /* Now exchange information about interrupts */ 953 954 bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry); 955 cmd.data0 = (u32) bytes; 956 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 957 958 /* 959 * Even though we already know how many slices are supported 960 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 961 * has magic side effects, and must be called after a reset. 962 * It must be called prior to calling any RSS related cmds, 963 * including assigning an interrupt queue for anything but 964 * slice 0. It must also be called *after* 965 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 966 * the firmware to compute offsets. 967 */ 968 969 if (mgp->num_slices > 1) { 970 971 /* ask the maximum number of slices it supports */ 972 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 973 &cmd, 0); 974 if (status != 0) { 975 dev_err(&mgp->pdev->dev, 976 "failed to get number of slices\n"); 977 } 978 979 /* 980 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 981 * to setting up the interrupt queue DMA 982 */ 983 984 cmd.data0 = mgp->num_slices; 985 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 986 if (mgp->dev->real_num_tx_queues > 1) 987 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 988 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 989 &cmd, 0); 990 991 /* Firmware older than 1.4.32 only supports multiple 992 * RX queues, so if we get an error, first retry using a 993 * single TX queue before giving up */ 994 if (status != 0 && mgp->dev->real_num_tx_queues > 1) { 995 netif_set_real_num_tx_queues(mgp->dev, 1); 996 cmd.data0 = mgp->num_slices; 997 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 998 status = myri10ge_send_cmd(mgp, 999 MXGEFW_CMD_ENABLE_RSS_QUEUES, 1000 &cmd, 0); 1001 } 1002 1003 if (status != 0) { 1004 dev_err(&mgp->pdev->dev, 1005 "failed to set number of slices\n"); 1006 1007 return status; 1008 } 1009 } 1010 for (i = 0; i < mgp->num_slices; i++) { 1011 ss = &mgp->ss[i]; 1012 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus); 1013 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus); 1014 cmd.data2 = i; 1015 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1016 &cmd, 0); 1017 } 1018 1019 status |= 1020 myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); 1021 for (i = 0; i < mgp->num_slices; i++) { 1022 ss = &mgp->ss[i]; 1023 ss->irq_claim = 1024 (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i); 1025 } 1026 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1027 &cmd, 0); 1028 mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); 1029 1030 status |= myri10ge_send_cmd 1031 (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0); 1032 mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); 1033 if (status != 0) { 1034 dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); 1035 return status; 1036 } 1037 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1038 1039 #ifdef CONFIG_MYRI10GE_DCA 1040 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0); 1041 dca_tag_off = cmd.data0; 1042 for (i = 0; i < mgp->num_slices; i++) { 1043 ss = &mgp->ss[i]; 1044 if (status == 0) { 1045 ss->dca_tag = (__iomem __be32 *) 1046 (mgp->sram + dca_tag_off + 4 * i); 1047 } else { 1048 ss->dca_tag = NULL; 1049 } 1050 } 1051 #endif /* CONFIG_MYRI10GE_DCA */ 1052 1053 /* reset mcp/driver shared state back to 0 */ 1054 1055 mgp->link_changes = 0; 1056 for (i = 0; i < mgp->num_slices; i++) { 1057 ss = &mgp->ss[i]; 1058 1059 memset(ss->rx_done.entry, 0, bytes); 1060 ss->tx.req = 0; 1061 ss->tx.done = 0; 1062 ss->tx.pkt_start = 0; 1063 ss->tx.pkt_done = 0; 1064 ss->rx_big.cnt = 0; 1065 ss->rx_small.cnt = 0; 1066 ss->rx_done.idx = 0; 1067 ss->rx_done.cnt = 0; 1068 ss->tx.wake_queue = 0; 1069 ss->tx.stop_queue = 0; 1070 } 1071 1072 status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr); 1073 myri10ge_change_pause(mgp, mgp->pause); 1074 myri10ge_set_multicast_list(mgp->dev); 1075 return status; 1076 } 1077 1078 #ifdef CONFIG_MYRI10GE_DCA 1079 static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on) 1080 { 1081 int ret; 1082 u16 ctl; 1083 1084 pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &ctl); 1085 1086 ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4; 1087 if (ret != on) { 1088 ctl &= ~PCI_EXP_DEVCTL_RELAX_EN; 1089 ctl |= (on << 4); 1090 pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, ctl); 1091 } 1092 return ret; 1093 } 1094 1095 static void 1096 myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag) 1097 { 1098 ss->cached_dca_tag = tag; 1099 put_be32(htonl(tag), ss->dca_tag); 1100 } 1101 1102 static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss) 1103 { 1104 int cpu = get_cpu(); 1105 int tag; 1106 1107 if (cpu != ss->cpu) { 1108 tag = dca3_get_tag(&ss->mgp->pdev->dev, cpu); 1109 if (ss->cached_dca_tag != tag) 1110 myri10ge_write_dca(ss, cpu, tag); 1111 ss->cpu = cpu; 1112 } 1113 put_cpu(); 1114 } 1115 1116 static void myri10ge_setup_dca(struct myri10ge_priv *mgp) 1117 { 1118 int err, i; 1119 struct pci_dev *pdev = mgp->pdev; 1120 1121 if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled) 1122 return; 1123 if (!myri10ge_dca) { 1124 dev_err(&pdev->dev, "dca disabled by administrator\n"); 1125 return; 1126 } 1127 err = dca_add_requester(&pdev->dev); 1128 if (err) { 1129 if (err != -ENODEV) 1130 dev_err(&pdev->dev, 1131 "dca_add_requester() failed, err=%d\n", err); 1132 return; 1133 } 1134 mgp->relaxed_order = myri10ge_toggle_relaxed(pdev, 0); 1135 mgp->dca_enabled = 1; 1136 for (i = 0; i < mgp->num_slices; i++) { 1137 mgp->ss[i].cpu = -1; 1138 mgp->ss[i].cached_dca_tag = -1; 1139 myri10ge_update_dca(&mgp->ss[i]); 1140 } 1141 } 1142 1143 static void myri10ge_teardown_dca(struct myri10ge_priv *mgp) 1144 { 1145 struct pci_dev *pdev = mgp->pdev; 1146 1147 if (!mgp->dca_enabled) 1148 return; 1149 mgp->dca_enabled = 0; 1150 if (mgp->relaxed_order) 1151 myri10ge_toggle_relaxed(pdev, 1); 1152 dca_remove_requester(&pdev->dev); 1153 } 1154 1155 static int myri10ge_notify_dca_device(struct device *dev, void *data) 1156 { 1157 struct myri10ge_priv *mgp; 1158 unsigned long event; 1159 1160 mgp = dev_get_drvdata(dev); 1161 event = *(unsigned long *)data; 1162 1163 if (event == DCA_PROVIDER_ADD) 1164 myri10ge_setup_dca(mgp); 1165 else if (event == DCA_PROVIDER_REMOVE) 1166 myri10ge_teardown_dca(mgp); 1167 return 0; 1168 } 1169 #endif /* CONFIG_MYRI10GE_DCA */ 1170 1171 static inline void 1172 myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, 1173 struct mcp_kreq_ether_recv *src) 1174 { 1175 __be32 low; 1176 1177 low = src->addr_low; 1178 src->addr_low = htonl(DMA_BIT_MASK(32)); 1179 myri10ge_pio_copy(dst, src, 4 * sizeof(*src)); 1180 mb(); 1181 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src)); 1182 mb(); 1183 src->addr_low = low; 1184 put_be32(low, &dst->addr_low); 1185 mb(); 1186 } 1187 1188 static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum) 1189 { 1190 struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data); 1191 1192 if ((skb->protocol == htons(ETH_P_8021Q)) && 1193 (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || 1194 vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { 1195 skb->csum = hw_csum; 1196 skb->ip_summed = CHECKSUM_COMPLETE; 1197 } 1198 } 1199 1200 static inline void 1201 myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va, 1202 struct skb_frag_struct *rx_frags, int len, int hlen) 1203 { 1204 struct skb_frag_struct *skb_frags; 1205 1206 skb->len = skb->data_len = len; 1207 /* attach the page(s) */ 1208 1209 skb_frags = skb_shinfo(skb)->frags; 1210 while (len > 0) { 1211 memcpy(skb_frags, rx_frags, sizeof(*skb_frags)); 1212 len -= skb_frag_size(rx_frags); 1213 skb_frags++; 1214 rx_frags++; 1215 skb_shinfo(skb)->nr_frags++; 1216 } 1217 1218 /* pskb_may_pull is not available in irq context, but 1219 * skb_pull() (for ether_pad and eth_type_trans()) requires 1220 * the beginning of the packet in skb_headlen(), move it 1221 * manually */ 1222 skb_copy_to_linear_data(skb, va, hlen); 1223 skb_shinfo(skb)->frags[0].page_offset += hlen; 1224 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hlen); 1225 skb->data_len -= hlen; 1226 skb->tail += hlen; 1227 skb_pull(skb, MXGEFW_PAD); 1228 } 1229 1230 static void 1231 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, 1232 int bytes, int watchdog) 1233 { 1234 struct page *page; 1235 int idx; 1236 #if MYRI10GE_ALLOC_SIZE > 4096 1237 int end_offset; 1238 #endif 1239 1240 if (unlikely(rx->watchdog_needed && !watchdog)) 1241 return; 1242 1243 /* try to refill entire ring */ 1244 while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) { 1245 idx = rx->fill_cnt & rx->mask; 1246 if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) { 1247 /* we can use part of previous page */ 1248 get_page(rx->page); 1249 } else { 1250 /* we need a new page */ 1251 page = 1252 alloc_pages(GFP_ATOMIC | __GFP_COMP, 1253 MYRI10GE_ALLOC_ORDER); 1254 if (unlikely(page == NULL)) { 1255 if (rx->fill_cnt - rx->cnt < 16) 1256 rx->watchdog_needed = 1; 1257 return; 1258 } 1259 rx->page = page; 1260 rx->page_offset = 0; 1261 rx->bus = pci_map_page(mgp->pdev, page, 0, 1262 MYRI10GE_ALLOC_SIZE, 1263 PCI_DMA_FROMDEVICE); 1264 } 1265 rx->info[idx].page = rx->page; 1266 rx->info[idx].page_offset = rx->page_offset; 1267 /* note that this is the address of the start of the 1268 * page */ 1269 dma_unmap_addr_set(&rx->info[idx], bus, rx->bus); 1270 rx->shadow[idx].addr_low = 1271 htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset); 1272 rx->shadow[idx].addr_high = 1273 htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus)); 1274 1275 /* start next packet on a cacheline boundary */ 1276 rx->page_offset += SKB_DATA_ALIGN(bytes); 1277 1278 #if MYRI10GE_ALLOC_SIZE > 4096 1279 /* don't cross a 4KB boundary */ 1280 end_offset = rx->page_offset + bytes - 1; 1281 if ((unsigned)(rx->page_offset ^ end_offset) > 4095) 1282 rx->page_offset = end_offset & ~4095; 1283 #endif 1284 rx->fill_cnt++; 1285 1286 /* copy 8 descriptors to the firmware at a time */ 1287 if ((idx & 7) == 7) { 1288 myri10ge_submit_8rx(&rx->lanai[idx - 7], 1289 &rx->shadow[idx - 7]); 1290 } 1291 } 1292 } 1293 1294 static inline void 1295 myri10ge_unmap_rx_page(struct pci_dev *pdev, 1296 struct myri10ge_rx_buffer_state *info, int bytes) 1297 { 1298 /* unmap the recvd page if we're the only or last user of it */ 1299 if (bytes >= MYRI10GE_ALLOC_SIZE / 2 || 1300 (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) { 1301 pci_unmap_page(pdev, (dma_unmap_addr(info, bus) 1302 & ~(MYRI10GE_ALLOC_SIZE - 1)), 1303 MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE); 1304 } 1305 } 1306 1307 #define MYRI10GE_HLEN 64 /* The number of bytes to copy from a 1308 * page into an skb */ 1309 1310 static inline int 1311 myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, 1312 bool lro_enabled) 1313 { 1314 struct myri10ge_priv *mgp = ss->mgp; 1315 struct sk_buff *skb; 1316 struct skb_frag_struct rx_frags[MYRI10GE_MAX_FRAGS_PER_FRAME]; 1317 struct myri10ge_rx_buf *rx; 1318 int i, idx, hlen, remainder, bytes; 1319 struct pci_dev *pdev = mgp->pdev; 1320 struct net_device *dev = mgp->dev; 1321 u8 *va; 1322 1323 if (len <= mgp->small_bytes) { 1324 rx = &ss->rx_small; 1325 bytes = mgp->small_bytes; 1326 } else { 1327 rx = &ss->rx_big; 1328 bytes = mgp->big_bytes; 1329 } 1330 1331 len += MXGEFW_PAD; 1332 idx = rx->cnt & rx->mask; 1333 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; 1334 prefetch(va); 1335 /* Fill skb_frag_struct(s) with data from our receive */ 1336 for (i = 0, remainder = len; remainder > 0; i++) { 1337 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1338 __skb_frag_set_page(&rx_frags[i], rx->info[idx].page); 1339 rx_frags[i].page_offset = rx->info[idx].page_offset; 1340 if (remainder < MYRI10GE_ALLOC_SIZE) 1341 skb_frag_size_set(&rx_frags[i], remainder); 1342 else 1343 skb_frag_size_set(&rx_frags[i], MYRI10GE_ALLOC_SIZE); 1344 rx->cnt++; 1345 idx = rx->cnt & rx->mask; 1346 remainder -= MYRI10GE_ALLOC_SIZE; 1347 } 1348 1349 if (lro_enabled) { 1350 rx_frags[0].page_offset += MXGEFW_PAD; 1351 skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); 1352 len -= MXGEFW_PAD; 1353 lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags, 1354 /* opaque, will come back in get_frag_header */ 1355 len, len, 1356 (void *)(__force unsigned long)csum, csum); 1357 1358 return 1; 1359 } 1360 1361 hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN; 1362 1363 /* allocate an skb to attach the page(s) to. This is done 1364 * after trying LRO, so as to avoid skb allocation overheads */ 1365 1366 skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16); 1367 if (unlikely(skb == NULL)) { 1368 ss->stats.rx_dropped++; 1369 do { 1370 i--; 1371 __skb_frag_unref(&rx_frags[i]); 1372 } while (i != 0); 1373 return 0; 1374 } 1375 1376 /* Attach the pages to the skb, and trim off any padding */ 1377 myri10ge_rx_skb_build(skb, va, rx_frags, len, hlen); 1378 if (skb_frag_size(&skb_shinfo(skb)->frags[0]) <= 0) { 1379 skb_frag_unref(skb, 0); 1380 skb_shinfo(skb)->nr_frags = 0; 1381 } else { 1382 skb->truesize += bytes * skb_shinfo(skb)->nr_frags; 1383 } 1384 skb->protocol = eth_type_trans(skb, dev); 1385 skb_record_rx_queue(skb, ss - &mgp->ss[0]); 1386 1387 if (dev->features & NETIF_F_RXCSUM) { 1388 if ((skb->protocol == htons(ETH_P_IP)) || 1389 (skb->protocol == htons(ETH_P_IPV6))) { 1390 skb->csum = csum; 1391 skb->ip_summed = CHECKSUM_COMPLETE; 1392 } else 1393 myri10ge_vlan_ip_csum(skb, csum); 1394 } 1395 netif_receive_skb(skb); 1396 return 1; 1397 } 1398 1399 static inline void 1400 myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) 1401 { 1402 struct pci_dev *pdev = ss->mgp->pdev; 1403 struct myri10ge_tx_buf *tx = &ss->tx; 1404 struct netdev_queue *dev_queue; 1405 struct sk_buff *skb; 1406 int idx, len; 1407 1408 while (tx->pkt_done != mcp_index) { 1409 idx = tx->done & tx->mask; 1410 skb = tx->info[idx].skb; 1411 1412 /* Mark as free */ 1413 tx->info[idx].skb = NULL; 1414 if (tx->info[idx].last) { 1415 tx->pkt_done++; 1416 tx->info[idx].last = 0; 1417 } 1418 tx->done++; 1419 len = dma_unmap_len(&tx->info[idx], len); 1420 dma_unmap_len_set(&tx->info[idx], len, 0); 1421 if (skb) { 1422 ss->stats.tx_bytes += skb->len; 1423 ss->stats.tx_packets++; 1424 dev_kfree_skb_irq(skb); 1425 if (len) 1426 pci_unmap_single(pdev, 1427 dma_unmap_addr(&tx->info[idx], 1428 bus), len, 1429 PCI_DMA_TODEVICE); 1430 } else { 1431 if (len) 1432 pci_unmap_page(pdev, 1433 dma_unmap_addr(&tx->info[idx], 1434 bus), len, 1435 PCI_DMA_TODEVICE); 1436 } 1437 } 1438 1439 dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss); 1440 /* 1441 * Make a minimal effort to prevent the NIC from polling an 1442 * idle tx queue. If we can't get the lock we leave the queue 1443 * active. In this case, either a thread was about to start 1444 * using the queue anyway, or we lost a race and the NIC will 1445 * waste some of its resources polling an inactive queue for a 1446 * while. 1447 */ 1448 1449 if ((ss->mgp->dev->real_num_tx_queues > 1) && 1450 __netif_tx_trylock(dev_queue)) { 1451 if (tx->req == tx->done) { 1452 tx->queue_active = 0; 1453 put_be32(htonl(1), tx->send_stop); 1454 mb(); 1455 mmiowb(); 1456 } 1457 __netif_tx_unlock(dev_queue); 1458 } 1459 1460 /* start the queue if we've stopped it */ 1461 if (netif_tx_queue_stopped(dev_queue) && 1462 tx->req - tx->done < (tx->mask >> 1) && 1463 ss->mgp->running == MYRI10GE_ETH_RUNNING) { 1464 tx->wake_queue++; 1465 netif_tx_wake_queue(dev_queue); 1466 } 1467 } 1468 1469 static inline int 1470 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) 1471 { 1472 struct myri10ge_rx_done *rx_done = &ss->rx_done; 1473 struct myri10ge_priv *mgp = ss->mgp; 1474 unsigned long rx_bytes = 0; 1475 unsigned long rx_packets = 0; 1476 unsigned long rx_ok; 1477 int idx = rx_done->idx; 1478 int cnt = rx_done->cnt; 1479 int work_done = 0; 1480 u16 length; 1481 __wsum checksum; 1482 1483 /* 1484 * Prevent compiler from generating more than one ->features memory 1485 * access to avoid theoretical race condition with functions that 1486 * change NETIF_F_LRO flag at runtime. 1487 */ 1488 bool lro_enabled = !!(ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO); 1489 1490 while (rx_done->entry[idx].length != 0 && work_done < budget) { 1491 length = ntohs(rx_done->entry[idx].length); 1492 rx_done->entry[idx].length = 0; 1493 checksum = csum_unfold(rx_done->entry[idx].checksum); 1494 rx_ok = myri10ge_rx_done(ss, length, checksum, lro_enabled); 1495 rx_packets += rx_ok; 1496 rx_bytes += rx_ok * (unsigned long)length; 1497 cnt++; 1498 idx = cnt & (mgp->max_intr_slots - 1); 1499 work_done++; 1500 } 1501 rx_done->idx = idx; 1502 rx_done->cnt = cnt; 1503 ss->stats.rx_packets += rx_packets; 1504 ss->stats.rx_bytes += rx_bytes; 1505 1506 if (lro_enabled) 1507 lro_flush_all(&rx_done->lro_mgr); 1508 1509 /* restock receive rings if needed */ 1510 if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh) 1511 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 1512 mgp->small_bytes + MXGEFW_PAD, 0); 1513 if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh) 1514 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 1515 1516 return work_done; 1517 } 1518 1519 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) 1520 { 1521 struct mcp_irq_data *stats = mgp->ss[0].fw_stats; 1522 1523 if (unlikely(stats->stats_updated)) { 1524 unsigned link_up = ntohl(stats->link_up); 1525 if (mgp->link_state != link_up) { 1526 mgp->link_state = link_up; 1527 1528 if (mgp->link_state == MXGEFW_LINK_UP) { 1529 netif_info(mgp, link, mgp->dev, "link up\n"); 1530 netif_carrier_on(mgp->dev); 1531 mgp->link_changes++; 1532 } else { 1533 netif_info(mgp, link, mgp->dev, "link %s\n", 1534 (link_up == MXGEFW_LINK_MYRINET ? 1535 "mismatch (Myrinet detected)" : 1536 "down")); 1537 netif_carrier_off(mgp->dev); 1538 mgp->link_changes++; 1539 } 1540 } 1541 if (mgp->rdma_tags_available != 1542 ntohl(stats->rdma_tags_available)) { 1543 mgp->rdma_tags_available = 1544 ntohl(stats->rdma_tags_available); 1545 netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n", 1546 mgp->rdma_tags_available); 1547 } 1548 mgp->down_cnt += stats->link_down; 1549 if (stats->link_down) 1550 wake_up(&mgp->down_wq); 1551 } 1552 } 1553 1554 static int myri10ge_poll(struct napi_struct *napi, int budget) 1555 { 1556 struct myri10ge_slice_state *ss = 1557 container_of(napi, struct myri10ge_slice_state, napi); 1558 int work_done; 1559 1560 #ifdef CONFIG_MYRI10GE_DCA 1561 if (ss->mgp->dca_enabled) 1562 myri10ge_update_dca(ss); 1563 #endif 1564 1565 /* process as many rx events as NAPI will allow */ 1566 work_done = myri10ge_clean_rx_done(ss, budget); 1567 1568 if (work_done < budget) { 1569 napi_complete(napi); 1570 put_be32(htonl(3), ss->irq_claim); 1571 } 1572 return work_done; 1573 } 1574 1575 static irqreturn_t myri10ge_intr(int irq, void *arg) 1576 { 1577 struct myri10ge_slice_state *ss = arg; 1578 struct myri10ge_priv *mgp = ss->mgp; 1579 struct mcp_irq_data *stats = ss->fw_stats; 1580 struct myri10ge_tx_buf *tx = &ss->tx; 1581 u32 send_done_count; 1582 int i; 1583 1584 /* an interrupt on a non-zero receive-only slice is implicitly 1585 * valid since MSI-X irqs are not shared */ 1586 if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) { 1587 napi_schedule(&ss->napi); 1588 return IRQ_HANDLED; 1589 } 1590 1591 /* make sure it is our IRQ, and that the DMA has finished */ 1592 if (unlikely(!stats->valid)) 1593 return IRQ_NONE; 1594 1595 /* low bit indicates receives are present, so schedule 1596 * napi poll handler */ 1597 if (stats->valid & 1) 1598 napi_schedule(&ss->napi); 1599 1600 if (!mgp->msi_enabled && !mgp->msix_enabled) { 1601 put_be32(0, mgp->irq_deassert); 1602 if (!myri10ge_deassert_wait) 1603 stats->valid = 0; 1604 mb(); 1605 } else 1606 stats->valid = 0; 1607 1608 /* Wait for IRQ line to go low, if using INTx */ 1609 i = 0; 1610 while (1) { 1611 i++; 1612 /* check for transmit completes and receives */ 1613 send_done_count = ntohl(stats->send_done_count); 1614 if (send_done_count != tx->pkt_done) 1615 myri10ge_tx_done(ss, (int)send_done_count); 1616 if (unlikely(i > myri10ge_max_irq_loops)) { 1617 netdev_warn(mgp->dev, "irq stuck?\n"); 1618 stats->valid = 0; 1619 schedule_work(&mgp->watchdog_work); 1620 } 1621 if (likely(stats->valid == 0)) 1622 break; 1623 cpu_relax(); 1624 barrier(); 1625 } 1626 1627 /* Only slice 0 updates stats */ 1628 if (ss == mgp->ss) 1629 myri10ge_check_statblock(mgp); 1630 1631 put_be32(htonl(3), ss->irq_claim + 1); 1632 return IRQ_HANDLED; 1633 } 1634 1635 static int 1636 myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) 1637 { 1638 struct myri10ge_priv *mgp = netdev_priv(netdev); 1639 char *ptr; 1640 int i; 1641 1642 cmd->autoneg = AUTONEG_DISABLE; 1643 ethtool_cmd_speed_set(cmd, SPEED_10000); 1644 cmd->duplex = DUPLEX_FULL; 1645 1646 /* 1647 * parse the product code to deterimine the interface type 1648 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 1649 * after the 3rd dash in the driver's cached copy of the 1650 * EEPROM's product code string. 1651 */ 1652 ptr = mgp->product_code_string; 1653 if (ptr == NULL) { 1654 netdev_err(netdev, "Missing product code\n"); 1655 return 0; 1656 } 1657 for (i = 0; i < 3; i++, ptr++) { 1658 ptr = strchr(ptr, '-'); 1659 if (ptr == NULL) { 1660 netdev_err(netdev, "Invalid product code %s\n", 1661 mgp->product_code_string); 1662 return 0; 1663 } 1664 } 1665 if (*ptr == '2') 1666 ptr++; 1667 if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') { 1668 /* We've found either an XFP, quad ribbon fiber, or SFP+ */ 1669 cmd->port = PORT_FIBRE; 1670 cmd->supported |= SUPPORTED_FIBRE; 1671 cmd->advertising |= ADVERTISED_FIBRE; 1672 } else { 1673 cmd->port = PORT_OTHER; 1674 } 1675 if (*ptr == 'R' || *ptr == 'S') 1676 cmd->transceiver = XCVR_EXTERNAL; 1677 else 1678 cmd->transceiver = XCVR_INTERNAL; 1679 1680 return 0; 1681 } 1682 1683 static void 1684 myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) 1685 { 1686 struct myri10ge_priv *mgp = netdev_priv(netdev); 1687 1688 strlcpy(info->driver, "myri10ge", sizeof(info->driver)); 1689 strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version)); 1690 strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version)); 1691 strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info)); 1692 } 1693 1694 static int 1695 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1696 { 1697 struct myri10ge_priv *mgp = netdev_priv(netdev); 1698 1699 coal->rx_coalesce_usecs = mgp->intr_coal_delay; 1700 return 0; 1701 } 1702 1703 static int 1704 myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1705 { 1706 struct myri10ge_priv *mgp = netdev_priv(netdev); 1707 1708 mgp->intr_coal_delay = coal->rx_coalesce_usecs; 1709 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1710 return 0; 1711 } 1712 1713 static void 1714 myri10ge_get_pauseparam(struct net_device *netdev, 1715 struct ethtool_pauseparam *pause) 1716 { 1717 struct myri10ge_priv *mgp = netdev_priv(netdev); 1718 1719 pause->autoneg = 0; 1720 pause->rx_pause = mgp->pause; 1721 pause->tx_pause = mgp->pause; 1722 } 1723 1724 static int 1725 myri10ge_set_pauseparam(struct net_device *netdev, 1726 struct ethtool_pauseparam *pause) 1727 { 1728 struct myri10ge_priv *mgp = netdev_priv(netdev); 1729 1730 if (pause->tx_pause != mgp->pause) 1731 return myri10ge_change_pause(mgp, pause->tx_pause); 1732 if (pause->rx_pause != mgp->pause) 1733 return myri10ge_change_pause(mgp, pause->rx_pause); 1734 if (pause->autoneg != 0) 1735 return -EINVAL; 1736 return 0; 1737 } 1738 1739 static void 1740 myri10ge_get_ringparam(struct net_device *netdev, 1741 struct ethtool_ringparam *ring) 1742 { 1743 struct myri10ge_priv *mgp = netdev_priv(netdev); 1744 1745 ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1; 1746 ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1; 1747 ring->rx_jumbo_max_pending = 0; 1748 ring->tx_max_pending = mgp->ss[0].tx.mask + 1; 1749 ring->rx_mini_pending = ring->rx_mini_max_pending; 1750 ring->rx_pending = ring->rx_max_pending; 1751 ring->rx_jumbo_pending = ring->rx_jumbo_max_pending; 1752 ring->tx_pending = ring->tx_max_pending; 1753 } 1754 1755 static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { 1756 "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", 1757 "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", 1758 "rx_length_errors", "rx_over_errors", "rx_crc_errors", 1759 "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", 1760 "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", 1761 "tx_heartbeat_errors", "tx_window_errors", 1762 /* device-specific stats */ 1763 "tx_boundary", "WC", "irq", "MSI", "MSIX", 1764 "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", 1765 "serial_number", "watchdog_resets", 1766 #ifdef CONFIG_MYRI10GE_DCA 1767 "dca_capable_firmware", "dca_device_present", 1768 #endif 1769 "link_changes", "link_up", "dropped_link_overflow", 1770 "dropped_link_error_or_filtered", 1771 "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", 1772 "dropped_unicast_filtered", "dropped_multicast_filtered", 1773 "dropped_runt", "dropped_overrun", "dropped_no_small_buffer", 1774 "dropped_no_big_buffer" 1775 }; 1776 1777 static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = { 1778 "----------- slice ---------", 1779 "tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done", 1780 "rx_small_cnt", "rx_big_cnt", 1781 "wake_queue", "stop_queue", "tx_linearized", 1782 "LRO aggregated", "LRO flushed", "LRO avg aggr", "LRO no_desc", 1783 }; 1784 1785 #define MYRI10GE_NET_STATS_LEN 21 1786 #define MYRI10GE_MAIN_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_main_stats) 1787 #define MYRI10GE_SLICE_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_slice_stats) 1788 1789 static void 1790 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data) 1791 { 1792 struct myri10ge_priv *mgp = netdev_priv(netdev); 1793 int i; 1794 1795 switch (stringset) { 1796 case ETH_SS_STATS: 1797 memcpy(data, *myri10ge_gstrings_main_stats, 1798 sizeof(myri10ge_gstrings_main_stats)); 1799 data += sizeof(myri10ge_gstrings_main_stats); 1800 for (i = 0; i < mgp->num_slices; i++) { 1801 memcpy(data, *myri10ge_gstrings_slice_stats, 1802 sizeof(myri10ge_gstrings_slice_stats)); 1803 data += sizeof(myri10ge_gstrings_slice_stats); 1804 } 1805 break; 1806 } 1807 } 1808 1809 static int myri10ge_get_sset_count(struct net_device *netdev, int sset) 1810 { 1811 struct myri10ge_priv *mgp = netdev_priv(netdev); 1812 1813 switch (sset) { 1814 case ETH_SS_STATS: 1815 return MYRI10GE_MAIN_STATS_LEN + 1816 mgp->num_slices * MYRI10GE_SLICE_STATS_LEN; 1817 default: 1818 return -EOPNOTSUPP; 1819 } 1820 } 1821 1822 static void 1823 myri10ge_get_ethtool_stats(struct net_device *netdev, 1824 struct ethtool_stats *stats, u64 * data) 1825 { 1826 struct myri10ge_priv *mgp = netdev_priv(netdev); 1827 struct myri10ge_slice_state *ss; 1828 struct rtnl_link_stats64 link_stats; 1829 int slice; 1830 int i; 1831 1832 /* force stats update */ 1833 memset(&link_stats, 0, sizeof(link_stats)); 1834 (void)myri10ge_get_stats(netdev, &link_stats); 1835 for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++) 1836 data[i] = ((u64 *)&link_stats)[i]; 1837 1838 data[i++] = (unsigned int)mgp->tx_boundary; 1839 data[i++] = (unsigned int)mgp->wc_enabled; 1840 data[i++] = (unsigned int)mgp->pdev->irq; 1841 data[i++] = (unsigned int)mgp->msi_enabled; 1842 data[i++] = (unsigned int)mgp->msix_enabled; 1843 data[i++] = (unsigned int)mgp->read_dma; 1844 data[i++] = (unsigned int)mgp->write_dma; 1845 data[i++] = (unsigned int)mgp->read_write_dma; 1846 data[i++] = (unsigned int)mgp->serial_number; 1847 data[i++] = (unsigned int)mgp->watchdog_resets; 1848 #ifdef CONFIG_MYRI10GE_DCA 1849 data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL); 1850 data[i++] = (unsigned int)(mgp->dca_enabled); 1851 #endif 1852 data[i++] = (unsigned int)mgp->link_changes; 1853 1854 /* firmware stats are useful only in the first slice */ 1855 ss = &mgp->ss[0]; 1856 data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up); 1857 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow); 1858 data[i++] = 1859 (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered); 1860 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause); 1861 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy); 1862 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32); 1863 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered); 1864 data[i++] = 1865 (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered); 1866 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt); 1867 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun); 1868 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer); 1869 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer); 1870 1871 for (slice = 0; slice < mgp->num_slices; slice++) { 1872 ss = &mgp->ss[slice]; 1873 data[i++] = slice; 1874 data[i++] = (unsigned int)ss->tx.pkt_start; 1875 data[i++] = (unsigned int)ss->tx.pkt_done; 1876 data[i++] = (unsigned int)ss->tx.req; 1877 data[i++] = (unsigned int)ss->tx.done; 1878 data[i++] = (unsigned int)ss->rx_small.cnt; 1879 data[i++] = (unsigned int)ss->rx_big.cnt; 1880 data[i++] = (unsigned int)ss->tx.wake_queue; 1881 data[i++] = (unsigned int)ss->tx.stop_queue; 1882 data[i++] = (unsigned int)ss->tx.linearized; 1883 data[i++] = ss->rx_done.lro_mgr.stats.aggregated; 1884 data[i++] = ss->rx_done.lro_mgr.stats.flushed; 1885 if (ss->rx_done.lro_mgr.stats.flushed) 1886 data[i++] = ss->rx_done.lro_mgr.stats.aggregated / 1887 ss->rx_done.lro_mgr.stats.flushed; 1888 else 1889 data[i++] = 0; 1890 data[i++] = ss->rx_done.lro_mgr.stats.no_desc; 1891 } 1892 } 1893 1894 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) 1895 { 1896 struct myri10ge_priv *mgp = netdev_priv(netdev); 1897 mgp->msg_enable = value; 1898 } 1899 1900 static u32 myri10ge_get_msglevel(struct net_device *netdev) 1901 { 1902 struct myri10ge_priv *mgp = netdev_priv(netdev); 1903 return mgp->msg_enable; 1904 } 1905 1906 /* 1907 * Use a low-level command to change the LED behavior. Rather than 1908 * blinking (which is the normal case), when identify is used, the 1909 * yellow LED turns solid. 1910 */ 1911 static int myri10ge_led(struct myri10ge_priv *mgp, int on) 1912 { 1913 struct mcp_gen_header *hdr; 1914 struct device *dev = &mgp->pdev->dev; 1915 size_t hdr_off, pattern_off, hdr_len; 1916 u32 pattern = 0xfffffffe; 1917 1918 /* find running firmware header */ 1919 hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 1920 if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) { 1921 dev_err(dev, "Running firmware has bad header offset (%d)\n", 1922 (int)hdr_off); 1923 return -EIO; 1924 } 1925 hdr_len = swab32(readl(mgp->sram + hdr_off + 1926 offsetof(struct mcp_gen_header, header_length))); 1927 pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern); 1928 if (pattern_off >= (hdr_len + hdr_off)) { 1929 dev_info(dev, "Firmware does not support LED identification\n"); 1930 return -EINVAL; 1931 } 1932 if (!on) 1933 pattern = swab32(readl(mgp->sram + pattern_off + 4)); 1934 writel(htonl(pattern), mgp->sram + pattern_off); 1935 return 0; 1936 } 1937 1938 static int 1939 myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) 1940 { 1941 struct myri10ge_priv *mgp = netdev_priv(netdev); 1942 int rc; 1943 1944 switch (state) { 1945 case ETHTOOL_ID_ACTIVE: 1946 rc = myri10ge_led(mgp, 1); 1947 break; 1948 1949 case ETHTOOL_ID_INACTIVE: 1950 rc = myri10ge_led(mgp, 0); 1951 break; 1952 1953 default: 1954 rc = -EINVAL; 1955 } 1956 1957 return rc; 1958 } 1959 1960 static const struct ethtool_ops myri10ge_ethtool_ops = { 1961 .get_settings = myri10ge_get_settings, 1962 .get_drvinfo = myri10ge_get_drvinfo, 1963 .get_coalesce = myri10ge_get_coalesce, 1964 .set_coalesce = myri10ge_set_coalesce, 1965 .get_pauseparam = myri10ge_get_pauseparam, 1966 .set_pauseparam = myri10ge_set_pauseparam, 1967 .get_ringparam = myri10ge_get_ringparam, 1968 .get_link = ethtool_op_get_link, 1969 .get_strings = myri10ge_get_strings, 1970 .get_sset_count = myri10ge_get_sset_count, 1971 .get_ethtool_stats = myri10ge_get_ethtool_stats, 1972 .set_msglevel = myri10ge_set_msglevel, 1973 .get_msglevel = myri10ge_get_msglevel, 1974 .set_phys_id = myri10ge_phys_id, 1975 }; 1976 1977 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) 1978 { 1979 struct myri10ge_priv *mgp = ss->mgp; 1980 struct myri10ge_cmd cmd; 1981 struct net_device *dev = mgp->dev; 1982 int tx_ring_size, rx_ring_size; 1983 int tx_ring_entries, rx_ring_entries; 1984 int i, slice, status; 1985 size_t bytes; 1986 1987 /* get ring sizes */ 1988 slice = ss - mgp->ss; 1989 cmd.data0 = slice; 1990 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0); 1991 tx_ring_size = cmd.data0; 1992 cmd.data0 = slice; 1993 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 1994 if (status != 0) 1995 return status; 1996 rx_ring_size = cmd.data0; 1997 1998 tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send); 1999 rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr); 2000 ss->tx.mask = tx_ring_entries - 1; 2001 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2002 2003 status = -ENOMEM; 2004 2005 /* allocate the host shadow rings */ 2006 2007 bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4) 2008 * sizeof(*ss->tx.req_list); 2009 ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL); 2010 if (ss->tx.req_bytes == NULL) 2011 goto abort_with_nothing; 2012 2013 /* ensure req_list entries are aligned to 8 bytes */ 2014 ss->tx.req_list = (struct mcp_kreq_ether_send *) 2015 ALIGN((unsigned long)ss->tx.req_bytes, 8); 2016 ss->tx.queue_active = 0; 2017 2018 bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow); 2019 ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL); 2020 if (ss->rx_small.shadow == NULL) 2021 goto abort_with_tx_req_bytes; 2022 2023 bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow); 2024 ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL); 2025 if (ss->rx_big.shadow == NULL) 2026 goto abort_with_rx_small_shadow; 2027 2028 /* allocate the host info rings */ 2029 2030 bytes = tx_ring_entries * sizeof(*ss->tx.info); 2031 ss->tx.info = kzalloc(bytes, GFP_KERNEL); 2032 if (ss->tx.info == NULL) 2033 goto abort_with_rx_big_shadow; 2034 2035 bytes = rx_ring_entries * sizeof(*ss->rx_small.info); 2036 ss->rx_small.info = kzalloc(bytes, GFP_KERNEL); 2037 if (ss->rx_small.info == NULL) 2038 goto abort_with_tx_info; 2039 2040 bytes = rx_ring_entries * sizeof(*ss->rx_big.info); 2041 ss->rx_big.info = kzalloc(bytes, GFP_KERNEL); 2042 if (ss->rx_big.info == NULL) 2043 goto abort_with_rx_small_info; 2044 2045 /* Fill the receive rings */ 2046 ss->rx_big.cnt = 0; 2047 ss->rx_small.cnt = 0; 2048 ss->rx_big.fill_cnt = 0; 2049 ss->rx_small.fill_cnt = 0; 2050 ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE; 2051 ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE; 2052 ss->rx_small.watchdog_needed = 0; 2053 ss->rx_big.watchdog_needed = 0; 2054 if (mgp->small_bytes == 0) { 2055 ss->rx_small.fill_cnt = ss->rx_small.mask + 1; 2056 } else { 2057 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 2058 mgp->small_bytes + MXGEFW_PAD, 0); 2059 } 2060 2061 if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) { 2062 netdev_err(dev, "slice-%d: alloced only %d small bufs\n", 2063 slice, ss->rx_small.fill_cnt); 2064 goto abort_with_rx_small_ring; 2065 } 2066 2067 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 2068 if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) { 2069 netdev_err(dev, "slice-%d: alloced only %d big bufs\n", 2070 slice, ss->rx_big.fill_cnt); 2071 goto abort_with_rx_big_ring; 2072 } 2073 2074 return 0; 2075 2076 abort_with_rx_big_ring: 2077 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2078 int idx = i & ss->rx_big.mask; 2079 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2080 mgp->big_bytes); 2081 put_page(ss->rx_big.info[idx].page); 2082 } 2083 2084 abort_with_rx_small_ring: 2085 if (mgp->small_bytes == 0) 2086 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2087 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2088 int idx = i & ss->rx_small.mask; 2089 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2090 mgp->small_bytes + MXGEFW_PAD); 2091 put_page(ss->rx_small.info[idx].page); 2092 } 2093 2094 kfree(ss->rx_big.info); 2095 2096 abort_with_rx_small_info: 2097 kfree(ss->rx_small.info); 2098 2099 abort_with_tx_info: 2100 kfree(ss->tx.info); 2101 2102 abort_with_rx_big_shadow: 2103 kfree(ss->rx_big.shadow); 2104 2105 abort_with_rx_small_shadow: 2106 kfree(ss->rx_small.shadow); 2107 2108 abort_with_tx_req_bytes: 2109 kfree(ss->tx.req_bytes); 2110 ss->tx.req_bytes = NULL; 2111 ss->tx.req_list = NULL; 2112 2113 abort_with_nothing: 2114 return status; 2115 } 2116 2117 static void myri10ge_free_rings(struct myri10ge_slice_state *ss) 2118 { 2119 struct myri10ge_priv *mgp = ss->mgp; 2120 struct sk_buff *skb; 2121 struct myri10ge_tx_buf *tx; 2122 int i, len, idx; 2123 2124 /* If not allocated, skip it */ 2125 if (ss->tx.req_list == NULL) 2126 return; 2127 2128 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2129 idx = i & ss->rx_big.mask; 2130 if (i == ss->rx_big.fill_cnt - 1) 2131 ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE; 2132 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2133 mgp->big_bytes); 2134 put_page(ss->rx_big.info[idx].page); 2135 } 2136 2137 if (mgp->small_bytes == 0) 2138 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2139 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2140 idx = i & ss->rx_small.mask; 2141 if (i == ss->rx_small.fill_cnt - 1) 2142 ss->rx_small.info[idx].page_offset = 2143 MYRI10GE_ALLOC_SIZE; 2144 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2145 mgp->small_bytes + MXGEFW_PAD); 2146 put_page(ss->rx_small.info[idx].page); 2147 } 2148 tx = &ss->tx; 2149 while (tx->done != tx->req) { 2150 idx = tx->done & tx->mask; 2151 skb = tx->info[idx].skb; 2152 2153 /* Mark as free */ 2154 tx->info[idx].skb = NULL; 2155 tx->done++; 2156 len = dma_unmap_len(&tx->info[idx], len); 2157 dma_unmap_len_set(&tx->info[idx], len, 0); 2158 if (skb) { 2159 ss->stats.tx_dropped++; 2160 dev_kfree_skb_any(skb); 2161 if (len) 2162 pci_unmap_single(mgp->pdev, 2163 dma_unmap_addr(&tx->info[idx], 2164 bus), len, 2165 PCI_DMA_TODEVICE); 2166 } else { 2167 if (len) 2168 pci_unmap_page(mgp->pdev, 2169 dma_unmap_addr(&tx->info[idx], 2170 bus), len, 2171 PCI_DMA_TODEVICE); 2172 } 2173 } 2174 kfree(ss->rx_big.info); 2175 2176 kfree(ss->rx_small.info); 2177 2178 kfree(ss->tx.info); 2179 2180 kfree(ss->rx_big.shadow); 2181 2182 kfree(ss->rx_small.shadow); 2183 2184 kfree(ss->tx.req_bytes); 2185 ss->tx.req_bytes = NULL; 2186 ss->tx.req_list = NULL; 2187 } 2188 2189 static int myri10ge_request_irq(struct myri10ge_priv *mgp) 2190 { 2191 struct pci_dev *pdev = mgp->pdev; 2192 struct myri10ge_slice_state *ss; 2193 struct net_device *netdev = mgp->dev; 2194 int i; 2195 int status; 2196 2197 mgp->msi_enabled = 0; 2198 mgp->msix_enabled = 0; 2199 status = 0; 2200 if (myri10ge_msi) { 2201 if (mgp->num_slices > 1) { 2202 status = 2203 pci_enable_msix(pdev, mgp->msix_vectors, 2204 mgp->num_slices); 2205 if (status == 0) { 2206 mgp->msix_enabled = 1; 2207 } else { 2208 dev_err(&pdev->dev, 2209 "Error %d setting up MSI-X\n", status); 2210 return status; 2211 } 2212 } 2213 if (mgp->msix_enabled == 0) { 2214 status = pci_enable_msi(pdev); 2215 if (status != 0) { 2216 dev_err(&pdev->dev, 2217 "Error %d setting up MSI; falling back to xPIC\n", 2218 status); 2219 } else { 2220 mgp->msi_enabled = 1; 2221 } 2222 } 2223 } 2224 if (mgp->msix_enabled) { 2225 for (i = 0; i < mgp->num_slices; i++) { 2226 ss = &mgp->ss[i]; 2227 snprintf(ss->irq_desc, sizeof(ss->irq_desc), 2228 "%s:slice-%d", netdev->name, i); 2229 status = request_irq(mgp->msix_vectors[i].vector, 2230 myri10ge_intr, 0, ss->irq_desc, 2231 ss); 2232 if (status != 0) { 2233 dev_err(&pdev->dev, 2234 "slice %d failed to allocate IRQ\n", i); 2235 i--; 2236 while (i >= 0) { 2237 free_irq(mgp->msix_vectors[i].vector, 2238 &mgp->ss[i]); 2239 i--; 2240 } 2241 pci_disable_msix(pdev); 2242 return status; 2243 } 2244 } 2245 } else { 2246 status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, 2247 mgp->dev->name, &mgp->ss[0]); 2248 if (status != 0) { 2249 dev_err(&pdev->dev, "failed to allocate IRQ\n"); 2250 if (mgp->msi_enabled) 2251 pci_disable_msi(pdev); 2252 } 2253 } 2254 return status; 2255 } 2256 2257 static void myri10ge_free_irq(struct myri10ge_priv *mgp) 2258 { 2259 struct pci_dev *pdev = mgp->pdev; 2260 int i; 2261 2262 if (mgp->msix_enabled) { 2263 for (i = 0; i < mgp->num_slices; i++) 2264 free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]); 2265 } else { 2266 free_irq(pdev->irq, &mgp->ss[0]); 2267 } 2268 if (mgp->msi_enabled) 2269 pci_disable_msi(pdev); 2270 if (mgp->msix_enabled) 2271 pci_disable_msix(pdev); 2272 } 2273 2274 static int 2275 myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, 2276 void **ip_hdr, void **tcpudp_hdr, 2277 u64 * hdr_flags, void *priv) 2278 { 2279 struct ethhdr *eh; 2280 struct vlan_ethhdr *veh; 2281 struct iphdr *iph; 2282 u8 *va = skb_frag_address(frag); 2283 unsigned long ll_hlen; 2284 /* passed opaque through lro_receive_frags() */ 2285 __wsum csum = (__force __wsum) (unsigned long)priv; 2286 2287 /* find the mac header, aborting if not IPv4 */ 2288 2289 eh = (struct ethhdr *)va; 2290 *mac_hdr = eh; 2291 ll_hlen = ETH_HLEN; 2292 if (eh->h_proto != htons(ETH_P_IP)) { 2293 if (eh->h_proto == htons(ETH_P_8021Q)) { 2294 veh = (struct vlan_ethhdr *)va; 2295 if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) 2296 return -1; 2297 2298 ll_hlen += VLAN_HLEN; 2299 2300 /* 2301 * HW checksum starts ETH_HLEN bytes into 2302 * frame, so we must subtract off the VLAN 2303 * header's checksum before csum can be used 2304 */ 2305 csum = csum_sub(csum, csum_partial(va + ETH_HLEN, 2306 VLAN_HLEN, 0)); 2307 } else { 2308 return -1; 2309 } 2310 } 2311 *hdr_flags = LRO_IPV4; 2312 2313 iph = (struct iphdr *)(va + ll_hlen); 2314 *ip_hdr = iph; 2315 if (iph->protocol != IPPROTO_TCP) 2316 return -1; 2317 if (ip_is_fragment(iph)) 2318 return -1; 2319 *hdr_flags |= LRO_TCP; 2320 *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); 2321 2322 /* verify the IP checksum */ 2323 if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl))) 2324 return -1; 2325 2326 /* verify the checksum */ 2327 if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr, 2328 ntohs(iph->tot_len) - (iph->ihl << 2), 2329 IPPROTO_TCP, csum))) 2330 return -1; 2331 2332 return 0; 2333 } 2334 2335 static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice) 2336 { 2337 struct myri10ge_cmd cmd; 2338 struct myri10ge_slice_state *ss; 2339 int status; 2340 2341 ss = &mgp->ss[slice]; 2342 status = 0; 2343 if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) { 2344 cmd.data0 = slice; 2345 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, 2346 &cmd, 0); 2347 ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) 2348 (mgp->sram + cmd.data0); 2349 } 2350 cmd.data0 = slice; 2351 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, 2352 &cmd, 0); 2353 ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *) 2354 (mgp->sram + cmd.data0); 2355 2356 cmd.data0 = slice; 2357 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); 2358 ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) 2359 (mgp->sram + cmd.data0); 2360 2361 ss->tx.send_go = (__iomem __be32 *) 2362 (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 2363 ss->tx.send_stop = (__iomem __be32 *) 2364 (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 2365 return status; 2366 2367 } 2368 2369 static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice) 2370 { 2371 struct myri10ge_cmd cmd; 2372 struct myri10ge_slice_state *ss; 2373 int status; 2374 2375 ss = &mgp->ss[slice]; 2376 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); 2377 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); 2378 cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16); 2379 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); 2380 if (status == -ENOSYS) { 2381 dma_addr_t bus = ss->fw_stats_bus; 2382 if (slice != 0) 2383 return -EINVAL; 2384 bus += offsetof(struct mcp_irq_data, send_done_count); 2385 cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); 2386 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); 2387 status = myri10ge_send_cmd(mgp, 2388 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2389 &cmd, 0); 2390 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2391 mgp->fw_multicast_support = 0; 2392 } else { 2393 mgp->fw_multicast_support = 1; 2394 } 2395 return 0; 2396 } 2397 2398 static int myri10ge_open(struct net_device *dev) 2399 { 2400 struct myri10ge_slice_state *ss; 2401 struct myri10ge_priv *mgp = netdev_priv(dev); 2402 struct myri10ge_cmd cmd; 2403 int i, status, big_pow2, slice; 2404 u8 *itable; 2405 struct net_lro_mgr *lro_mgr; 2406 2407 if (mgp->running != MYRI10GE_ETH_STOPPED) 2408 return -EBUSY; 2409 2410 mgp->running = MYRI10GE_ETH_STARTING; 2411 status = myri10ge_reset(mgp); 2412 if (status != 0) { 2413 netdev_err(dev, "failed reset\n"); 2414 goto abort_with_nothing; 2415 } 2416 2417 if (mgp->num_slices > 1) { 2418 cmd.data0 = mgp->num_slices; 2419 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 2420 if (mgp->dev->real_num_tx_queues > 1) 2421 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 2422 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2423 &cmd, 0); 2424 if (status != 0) { 2425 netdev_err(dev, "failed to set number of slices\n"); 2426 goto abort_with_nothing; 2427 } 2428 /* setup the indirection table */ 2429 cmd.data0 = mgp->num_slices; 2430 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2431 &cmd, 0); 2432 2433 status |= myri10ge_send_cmd(mgp, 2434 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 2435 &cmd, 0); 2436 if (status != 0) { 2437 netdev_err(dev, "failed to setup rss tables\n"); 2438 goto abort_with_nothing; 2439 } 2440 2441 /* just enable an identity mapping */ 2442 itable = mgp->sram + cmd.data0; 2443 for (i = 0; i < mgp->num_slices; i++) 2444 __raw_writeb(i, &itable[i]); 2445 2446 cmd.data0 = 1; 2447 cmd.data1 = myri10ge_rss_hash; 2448 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2449 &cmd, 0); 2450 if (status != 0) { 2451 netdev_err(dev, "failed to enable slices\n"); 2452 goto abort_with_nothing; 2453 } 2454 } 2455 2456 status = myri10ge_request_irq(mgp); 2457 if (status != 0) 2458 goto abort_with_nothing; 2459 2460 /* decide what small buffer size to use. For good TCP rx 2461 * performance, it is important to not receive 1514 byte 2462 * frames into jumbo buffers, as it confuses the socket buffer 2463 * accounting code, leading to drops and erratic performance. 2464 */ 2465 2466 if (dev->mtu <= ETH_DATA_LEN) 2467 /* enough for a TCP header */ 2468 mgp->small_bytes = (128 > SMP_CACHE_BYTES) 2469 ? (128 - MXGEFW_PAD) 2470 : (SMP_CACHE_BYTES - MXGEFW_PAD); 2471 else 2472 /* enough for a vlan encapsulated ETH_DATA_LEN frame */ 2473 mgp->small_bytes = VLAN_ETH_FRAME_LEN; 2474 2475 /* Override the small buffer size? */ 2476 if (myri10ge_small_bytes >= 0) 2477 mgp->small_bytes = myri10ge_small_bytes; 2478 2479 /* Firmware needs the big buff size as a power of 2. Lie and 2480 * tell him the buffer is larger, because we only use 1 2481 * buffer/pkt, and the mtu will prevent overruns. 2482 */ 2483 big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2484 if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) { 2485 while (!is_power_of_2(big_pow2)) 2486 big_pow2++; 2487 mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2488 } else { 2489 big_pow2 = MYRI10GE_ALLOC_SIZE; 2490 mgp->big_bytes = big_pow2; 2491 } 2492 2493 /* setup the per-slice data structures */ 2494 for (slice = 0; slice < mgp->num_slices; slice++) { 2495 ss = &mgp->ss[slice]; 2496 2497 status = myri10ge_get_txrx(mgp, slice); 2498 if (status != 0) { 2499 netdev_err(dev, "failed to get ring sizes or locations\n"); 2500 goto abort_with_rings; 2501 } 2502 status = myri10ge_allocate_rings(ss); 2503 if (status != 0) 2504 goto abort_with_rings; 2505 2506 /* only firmware which supports multiple TX queues 2507 * supports setting up the tx stats on non-zero 2508 * slices */ 2509 if (slice == 0 || mgp->dev->real_num_tx_queues > 1) 2510 status = myri10ge_set_stats(mgp, slice); 2511 if (status) { 2512 netdev_err(dev, "Couldn't set stats DMA\n"); 2513 goto abort_with_rings; 2514 } 2515 2516 lro_mgr = &ss->rx_done.lro_mgr; 2517 lro_mgr->dev = dev; 2518 lro_mgr->features = LRO_F_NAPI; 2519 lro_mgr->ip_summed = CHECKSUM_COMPLETE; 2520 lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; 2521 lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; 2522 lro_mgr->lro_arr = ss->rx_done.lro_desc; 2523 lro_mgr->get_frag_header = myri10ge_get_frag_header; 2524 lro_mgr->max_aggr = myri10ge_lro_max_pkts; 2525 lro_mgr->frag_align_pad = 2; 2526 if (lro_mgr->max_aggr > MAX_SKB_FRAGS) 2527 lro_mgr->max_aggr = MAX_SKB_FRAGS; 2528 2529 /* must happen prior to any irq */ 2530 napi_enable(&(ss)->napi); 2531 } 2532 2533 /* now give firmware buffers sizes, and MTU */ 2534 cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; 2535 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0); 2536 cmd.data0 = mgp->small_bytes; 2537 status |= 2538 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0); 2539 cmd.data0 = big_pow2; 2540 status |= 2541 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0); 2542 if (status) { 2543 netdev_err(dev, "Couldn't set buffer sizes\n"); 2544 goto abort_with_rings; 2545 } 2546 2547 /* 2548 * Set Linux style TSO mode; this is needed only on newer 2549 * firmware versions. Older versions default to Linux 2550 * style TSO 2551 */ 2552 cmd.data0 = 0; 2553 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0); 2554 if (status && status != -ENOSYS) { 2555 netdev_err(dev, "Couldn't set TSO mode\n"); 2556 goto abort_with_rings; 2557 } 2558 2559 mgp->link_state = ~0U; 2560 mgp->rdma_tags_available = 15; 2561 2562 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); 2563 if (status) { 2564 netdev_err(dev, "Couldn't bring up link\n"); 2565 goto abort_with_rings; 2566 } 2567 2568 mgp->running = MYRI10GE_ETH_RUNNING; 2569 mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; 2570 add_timer(&mgp->watchdog_timer); 2571 netif_tx_wake_all_queues(dev); 2572 2573 return 0; 2574 2575 abort_with_rings: 2576 while (slice) { 2577 slice--; 2578 napi_disable(&mgp->ss[slice].napi); 2579 } 2580 for (i = 0; i < mgp->num_slices; i++) 2581 myri10ge_free_rings(&mgp->ss[i]); 2582 2583 myri10ge_free_irq(mgp); 2584 2585 abort_with_nothing: 2586 mgp->running = MYRI10GE_ETH_STOPPED; 2587 return -ENOMEM; 2588 } 2589 2590 static int myri10ge_close(struct net_device *dev) 2591 { 2592 struct myri10ge_priv *mgp = netdev_priv(dev); 2593 struct myri10ge_cmd cmd; 2594 int status, old_down_cnt; 2595 int i; 2596 2597 if (mgp->running != MYRI10GE_ETH_RUNNING) 2598 return 0; 2599 2600 if (mgp->ss[0].tx.req_bytes == NULL) 2601 return 0; 2602 2603 del_timer_sync(&mgp->watchdog_timer); 2604 mgp->running = MYRI10GE_ETH_STOPPING; 2605 for (i = 0; i < mgp->num_slices; i++) { 2606 napi_disable(&mgp->ss[i].napi); 2607 } 2608 netif_carrier_off(dev); 2609 2610 netif_tx_stop_all_queues(dev); 2611 if (mgp->rebooted == 0) { 2612 old_down_cnt = mgp->down_cnt; 2613 mb(); 2614 status = 2615 myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); 2616 if (status) 2617 netdev_err(dev, "Couldn't bring down link\n"); 2618 2619 wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt, 2620 HZ); 2621 if (old_down_cnt == mgp->down_cnt) 2622 netdev_err(dev, "never got down irq\n"); 2623 } 2624 netif_tx_disable(dev); 2625 myri10ge_free_irq(mgp); 2626 for (i = 0; i < mgp->num_slices; i++) 2627 myri10ge_free_rings(&mgp->ss[i]); 2628 2629 mgp->running = MYRI10GE_ETH_STOPPED; 2630 return 0; 2631 } 2632 2633 /* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2634 * backwards one at a time and handle ring wraps */ 2635 2636 static inline void 2637 myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx, 2638 struct mcp_kreq_ether_send *src, int cnt) 2639 { 2640 int idx, starting_slot; 2641 starting_slot = tx->req; 2642 while (cnt > 1) { 2643 cnt--; 2644 idx = (starting_slot + cnt) & tx->mask; 2645 myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 2646 mb(); 2647 } 2648 } 2649 2650 /* 2651 * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2652 * at most 32 bytes at a time, so as to avoid involving the software 2653 * pio handler in the nic. We re-write the first segment's flags 2654 * to mark them valid only after writing the entire chain. 2655 */ 2656 2657 static inline void 2658 myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, 2659 int cnt) 2660 { 2661 int idx, i; 2662 struct mcp_kreq_ether_send __iomem *dstp, *dst; 2663 struct mcp_kreq_ether_send *srcp; 2664 u8 last_flags; 2665 2666 idx = tx->req & tx->mask; 2667 2668 last_flags = src->flags; 2669 src->flags = 0; 2670 mb(); 2671 dst = dstp = &tx->lanai[idx]; 2672 srcp = src; 2673 2674 if ((idx + cnt) < tx->mask) { 2675 for (i = 0; i < (cnt - 1); i += 2) { 2676 myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 2677 mb(); /* force write every 32 bytes */ 2678 srcp += 2; 2679 dstp += 2; 2680 } 2681 } else { 2682 /* submit all but the first request, and ensure 2683 * that it is submitted below */ 2684 myri10ge_submit_req_backwards(tx, src, cnt); 2685 i = 0; 2686 } 2687 if (i < cnt) { 2688 /* submit the first request */ 2689 myri10ge_pio_copy(dstp, srcp, sizeof(*src)); 2690 mb(); /* barrier before setting valid flag */ 2691 } 2692 2693 /* re-write the last 32-bits with the valid flags */ 2694 src->flags = last_flags; 2695 put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); 2696 tx->req += cnt; 2697 mb(); 2698 } 2699 2700 /* 2701 * Transmit a packet. We need to split the packet so that a single 2702 * segment does not cross myri10ge->tx_boundary, so this makes segment 2703 * counting tricky. So rather than try to count segments up front, we 2704 * just give up if there are too few segments to hold a reasonably 2705 * fragmented packet currently available. If we run 2706 * out of segments while preparing a packet for DMA, we just linearize 2707 * it and try again. 2708 */ 2709 2710 static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, 2711 struct net_device *dev) 2712 { 2713 struct myri10ge_priv *mgp = netdev_priv(dev); 2714 struct myri10ge_slice_state *ss; 2715 struct mcp_kreq_ether_send *req; 2716 struct myri10ge_tx_buf *tx; 2717 struct skb_frag_struct *frag; 2718 struct netdev_queue *netdev_queue; 2719 dma_addr_t bus; 2720 u32 low; 2721 __be32 high_swapped; 2722 unsigned int len; 2723 int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; 2724 u16 pseudo_hdr_offset, cksum_offset, queue; 2725 int cum_len, seglen, boundary, rdma_count; 2726 u8 flags, odd_flag; 2727 2728 queue = skb_get_queue_mapping(skb); 2729 ss = &mgp->ss[queue]; 2730 netdev_queue = netdev_get_tx_queue(mgp->dev, queue); 2731 tx = &ss->tx; 2732 2733 again: 2734 req = tx->req_list; 2735 avail = tx->mask - 1 - (tx->req - tx->done); 2736 2737 mss = 0; 2738 max_segments = MXGEFW_MAX_SEND_DESC; 2739 2740 if (skb_is_gso(skb)) { 2741 mss = skb_shinfo(skb)->gso_size; 2742 max_segments = MYRI10GE_MAX_SEND_DESC_TSO; 2743 } 2744 2745 if ((unlikely(avail < max_segments))) { 2746 /* we are out of transmit resources */ 2747 tx->stop_queue++; 2748 netif_tx_stop_queue(netdev_queue); 2749 return NETDEV_TX_BUSY; 2750 } 2751 2752 /* Setup checksum offloading, if needed */ 2753 cksum_offset = 0; 2754 pseudo_hdr_offset = 0; 2755 odd_flag = 0; 2756 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 2757 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2758 cksum_offset = skb_checksum_start_offset(skb); 2759 pseudo_hdr_offset = cksum_offset + skb->csum_offset; 2760 /* If the headers are excessively large, then we must 2761 * fall back to a software checksum */ 2762 if (unlikely(!mss && (cksum_offset > 255 || 2763 pseudo_hdr_offset > 127))) { 2764 if (skb_checksum_help(skb)) 2765 goto drop; 2766 cksum_offset = 0; 2767 pseudo_hdr_offset = 0; 2768 } else { 2769 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2770 flags |= MXGEFW_FLAGS_CKSUM; 2771 } 2772 } 2773 2774 cum_len = 0; 2775 2776 if (mss) { /* TSO */ 2777 /* this removes any CKSUM flag from before */ 2778 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 2779 2780 /* negative cum_len signifies to the 2781 * send loop that we are still in the 2782 * header portion of the TSO packet. 2783 * TSO header can be at most 1KB long */ 2784 cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); 2785 2786 /* for IPv6 TSO, the checksum offset stores the 2787 * TCP header length, to save the firmware from 2788 * the need to parse the headers */ 2789 if (skb_is_gso_v6(skb)) { 2790 cksum_offset = tcp_hdrlen(skb); 2791 /* Can only handle headers <= max_tso6 long */ 2792 if (unlikely(-cum_len > mgp->max_tso6)) 2793 return myri10ge_sw_tso(skb, dev); 2794 } 2795 /* for TSO, pseudo_hdr_offset holds mss. 2796 * The firmware figures out where to put 2797 * the checksum by parsing the header. */ 2798 pseudo_hdr_offset = mss; 2799 } else 2800 /* Mark small packets, and pad out tiny packets */ 2801 if (skb->len <= MXGEFW_SEND_SMALL_SIZE) { 2802 flags |= MXGEFW_FLAGS_SMALL; 2803 2804 /* pad frames to at least ETH_ZLEN bytes */ 2805 if (unlikely(skb->len < ETH_ZLEN)) { 2806 if (skb_padto(skb, ETH_ZLEN)) { 2807 /* The packet is gone, so we must 2808 * return 0 */ 2809 ss->stats.tx_dropped += 1; 2810 return NETDEV_TX_OK; 2811 } 2812 /* adjust the len to account for the zero pad 2813 * so that the nic can know how long it is */ 2814 skb->len = ETH_ZLEN; 2815 } 2816 } 2817 2818 /* map the skb for DMA */ 2819 len = skb_headlen(skb); 2820 idx = tx->req & tx->mask; 2821 tx->info[idx].skb = skb; 2822 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); 2823 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2824 dma_unmap_len_set(&tx->info[idx], len, len); 2825 2826 frag_cnt = skb_shinfo(skb)->nr_frags; 2827 frag_idx = 0; 2828 count = 0; 2829 rdma_count = 0; 2830 2831 /* "rdma_count" is the number of RDMAs belonging to the 2832 * current packet BEFORE the current send request. For 2833 * non-TSO packets, this is equal to "count". 2834 * For TSO packets, rdma_count needs to be reset 2835 * to 0 after a segment cut. 2836 * 2837 * The rdma_count field of the send request is 2838 * the number of RDMAs of the packet starting at 2839 * that request. For TSO send requests with one ore more cuts 2840 * in the middle, this is the number of RDMAs starting 2841 * after the last cut in the request. All previous 2842 * segments before the last cut implicitly have 1 RDMA. 2843 * 2844 * Since the number of RDMAs is not known beforehand, 2845 * it must be filled-in retroactively - after each 2846 * segmentation cut or at the end of the entire packet. 2847 */ 2848 2849 while (1) { 2850 /* Break the SKB or Fragment up into pieces which 2851 * do not cross mgp->tx_boundary */ 2852 low = MYRI10GE_LOWPART_TO_U32(bus); 2853 high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus)); 2854 while (len) { 2855 u8 flags_next; 2856 int cum_len_next; 2857 2858 if (unlikely(count == max_segments)) 2859 goto abort_linearize; 2860 2861 boundary = 2862 (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1); 2863 seglen = boundary - low; 2864 if (seglen > len) 2865 seglen = len; 2866 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 2867 cum_len_next = cum_len + seglen; 2868 if (mss) { /* TSO */ 2869 (req - rdma_count)->rdma_count = rdma_count + 1; 2870 2871 if (likely(cum_len >= 0)) { /* payload */ 2872 int next_is_first, chop; 2873 2874 chop = (cum_len_next > mss); 2875 cum_len_next = cum_len_next % mss; 2876 next_is_first = (cum_len_next == 0); 2877 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 2878 flags_next |= next_is_first * 2879 MXGEFW_FLAGS_FIRST; 2880 rdma_count |= -(chop | next_is_first); 2881 rdma_count += chop & !next_is_first; 2882 } else if (likely(cum_len_next >= 0)) { /* header ends */ 2883 int small; 2884 2885 rdma_count = -1; 2886 cum_len_next = 0; 2887 seglen = -cum_len; 2888 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 2889 flags_next = MXGEFW_FLAGS_TSO_PLD | 2890 MXGEFW_FLAGS_FIRST | 2891 (small * MXGEFW_FLAGS_SMALL); 2892 } 2893 } 2894 req->addr_high = high_swapped; 2895 req->addr_low = htonl(low); 2896 req->pseudo_hdr_offset = htons(pseudo_hdr_offset); 2897 req->pad = 0; /* complete solid 16-byte block; does this matter? */ 2898 req->rdma_count = 1; 2899 req->length = htons(seglen); 2900 req->cksum_offset = cksum_offset; 2901 req->flags = flags | ((cum_len & 1) * odd_flag); 2902 2903 low += seglen; 2904 len -= seglen; 2905 cum_len = cum_len_next; 2906 flags = flags_next; 2907 req++; 2908 count++; 2909 rdma_count++; 2910 if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { 2911 if (unlikely(cksum_offset > seglen)) 2912 cksum_offset -= seglen; 2913 else 2914 cksum_offset = 0; 2915 } 2916 } 2917 if (frag_idx == frag_cnt) 2918 break; 2919 2920 /* map next fragment for DMA */ 2921 idx = (count + tx->req) & tx->mask; 2922 frag = &skb_shinfo(skb)->frags[frag_idx]; 2923 frag_idx++; 2924 len = skb_frag_size(frag); 2925 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, 2926 DMA_TO_DEVICE); 2927 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2928 dma_unmap_len_set(&tx->info[idx], len, len); 2929 } 2930 2931 (req - rdma_count)->rdma_count = rdma_count; 2932 if (mss) 2933 do { 2934 req--; 2935 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2936 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 2937 MXGEFW_FLAGS_FIRST))); 2938 idx = ((count - 1) + tx->req) & tx->mask; 2939 tx->info[idx].last = 1; 2940 myri10ge_submit_req(tx, tx->req_list, count); 2941 /* if using multiple tx queues, make sure NIC polls the 2942 * current slice */ 2943 if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) { 2944 tx->queue_active = 1; 2945 put_be32(htonl(1), tx->send_go); 2946 mb(); 2947 mmiowb(); 2948 } 2949 tx->pkt_start++; 2950 if ((avail - count) < MXGEFW_MAX_SEND_DESC) { 2951 tx->stop_queue++; 2952 netif_tx_stop_queue(netdev_queue); 2953 } 2954 return NETDEV_TX_OK; 2955 2956 abort_linearize: 2957 /* Free any DMA resources we've alloced and clear out the skb 2958 * slot so as to not trip up assertions, and to avoid a 2959 * double-free if linearizing fails */ 2960 2961 last_idx = (idx + 1) & tx->mask; 2962 idx = tx->req & tx->mask; 2963 tx->info[idx].skb = NULL; 2964 do { 2965 len = dma_unmap_len(&tx->info[idx], len); 2966 if (len) { 2967 if (tx->info[idx].skb != NULL) 2968 pci_unmap_single(mgp->pdev, 2969 dma_unmap_addr(&tx->info[idx], 2970 bus), len, 2971 PCI_DMA_TODEVICE); 2972 else 2973 pci_unmap_page(mgp->pdev, 2974 dma_unmap_addr(&tx->info[idx], 2975 bus), len, 2976 PCI_DMA_TODEVICE); 2977 dma_unmap_len_set(&tx->info[idx], len, 0); 2978 tx->info[idx].skb = NULL; 2979 } 2980 idx = (idx + 1) & tx->mask; 2981 } while (idx != last_idx); 2982 if (skb_is_gso(skb)) { 2983 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); 2984 goto drop; 2985 } 2986 2987 if (skb_linearize(skb)) 2988 goto drop; 2989 2990 tx->linearized++; 2991 goto again; 2992 2993 drop: 2994 dev_kfree_skb_any(skb); 2995 ss->stats.tx_dropped += 1; 2996 return NETDEV_TX_OK; 2997 2998 } 2999 3000 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 3001 struct net_device *dev) 3002 { 3003 struct sk_buff *segs, *curr; 3004 struct myri10ge_priv *mgp = netdev_priv(dev); 3005 struct myri10ge_slice_state *ss; 3006 netdev_tx_t status; 3007 3008 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); 3009 if (IS_ERR(segs)) 3010 goto drop; 3011 3012 while (segs) { 3013 curr = segs; 3014 segs = segs->next; 3015 curr->next = NULL; 3016 status = myri10ge_xmit(curr, dev); 3017 if (status != 0) { 3018 dev_kfree_skb_any(curr); 3019 if (segs != NULL) { 3020 curr = segs; 3021 segs = segs->next; 3022 curr->next = NULL; 3023 dev_kfree_skb_any(segs); 3024 } 3025 goto drop; 3026 } 3027 } 3028 dev_kfree_skb_any(skb); 3029 return NETDEV_TX_OK; 3030 3031 drop: 3032 ss = &mgp->ss[skb_get_queue_mapping(skb)]; 3033 dev_kfree_skb_any(skb); 3034 ss->stats.tx_dropped += 1; 3035 return NETDEV_TX_OK; 3036 } 3037 3038 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 3039 struct rtnl_link_stats64 *stats) 3040 { 3041 const struct myri10ge_priv *mgp = netdev_priv(dev); 3042 const struct myri10ge_slice_netstats *slice_stats; 3043 int i; 3044 3045 for (i = 0; i < mgp->num_slices; i++) { 3046 slice_stats = &mgp->ss[i].stats; 3047 stats->rx_packets += slice_stats->rx_packets; 3048 stats->tx_packets += slice_stats->tx_packets; 3049 stats->rx_bytes += slice_stats->rx_bytes; 3050 stats->tx_bytes += slice_stats->tx_bytes; 3051 stats->rx_dropped += slice_stats->rx_dropped; 3052 stats->tx_dropped += slice_stats->tx_dropped; 3053 } 3054 return stats; 3055 } 3056 3057 static void myri10ge_set_multicast_list(struct net_device *dev) 3058 { 3059 struct myri10ge_priv *mgp = netdev_priv(dev); 3060 struct myri10ge_cmd cmd; 3061 struct netdev_hw_addr *ha; 3062 __be32 data[2] = { 0, 0 }; 3063 int err; 3064 3065 /* can be called from atomic contexts, 3066 * pass 1 to force atomicity in myri10ge_send_cmd() */ 3067 myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1); 3068 3069 /* This firmware is known to not support multicast */ 3070 if (!mgp->fw_multicast_support) 3071 return; 3072 3073 /* Disable multicast filtering */ 3074 3075 err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1); 3076 if (err != 0) { 3077 netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n", 3078 err); 3079 goto abort; 3080 } 3081 3082 if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) { 3083 /* request to disable multicast filtering, so quit here */ 3084 return; 3085 } 3086 3087 /* Flush the filters */ 3088 3089 err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, 3090 &cmd, 1); 3091 if (err != 0) { 3092 netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n", 3093 err); 3094 goto abort; 3095 } 3096 3097 /* Walk the multicast list, and add each address */ 3098 netdev_for_each_mc_addr(ha, dev) { 3099 memcpy(data, &ha->addr, 6); 3100 cmd.data0 = ntohl(data[0]); 3101 cmd.data1 = ntohl(data[1]); 3102 err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP, 3103 &cmd, 1); 3104 3105 if (err != 0) { 3106 netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n", 3107 err, ha->addr); 3108 goto abort; 3109 } 3110 } 3111 /* Enable multicast filtering */ 3112 err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1); 3113 if (err != 0) { 3114 netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n", 3115 err); 3116 goto abort; 3117 } 3118 3119 return; 3120 3121 abort: 3122 return; 3123 } 3124 3125 static int myri10ge_set_mac_address(struct net_device *dev, void *addr) 3126 { 3127 struct sockaddr *sa = addr; 3128 struct myri10ge_priv *mgp = netdev_priv(dev); 3129 int status; 3130 3131 if (!is_valid_ether_addr(sa->sa_data)) 3132 return -EADDRNOTAVAIL; 3133 3134 status = myri10ge_update_mac_address(mgp, sa->sa_data); 3135 if (status != 0) { 3136 netdev_err(dev, "changing mac address failed with %d\n", 3137 status); 3138 return status; 3139 } 3140 3141 /* change the dev structure */ 3142 memcpy(dev->dev_addr, sa->sa_data, 6); 3143 return 0; 3144 } 3145 3146 static netdev_features_t myri10ge_fix_features(struct net_device *dev, 3147 netdev_features_t features) 3148 { 3149 if (!(features & NETIF_F_RXCSUM)) 3150 features &= ~NETIF_F_LRO; 3151 3152 return features; 3153 } 3154 3155 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) 3156 { 3157 struct myri10ge_priv *mgp = netdev_priv(dev); 3158 int error = 0; 3159 3160 if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) { 3161 netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu); 3162 return -EINVAL; 3163 } 3164 netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); 3165 if (mgp->running) { 3166 /* if we change the mtu on an active device, we must 3167 * reset the device so the firmware sees the change */ 3168 myri10ge_close(dev); 3169 dev->mtu = new_mtu; 3170 myri10ge_open(dev); 3171 } else 3172 dev->mtu = new_mtu; 3173 3174 return error; 3175 } 3176 3177 /* 3178 * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary. 3179 * Only do it if the bridge is a root port since we don't want to disturb 3180 * any other device, except if forced with myri10ge_ecrc_enable > 1. 3181 */ 3182 3183 static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) 3184 { 3185 struct pci_dev *bridge = mgp->pdev->bus->self; 3186 struct device *dev = &mgp->pdev->dev; 3187 int cap; 3188 unsigned err_cap; 3189 int ret; 3190 3191 if (!myri10ge_ecrc_enable || !bridge) 3192 return; 3193 3194 /* check that the bridge is a root port */ 3195 if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) { 3196 if (myri10ge_ecrc_enable > 1) { 3197 struct pci_dev *prev_bridge, *old_bridge = bridge; 3198 3199 /* Walk the hierarchy up to the root port 3200 * where ECRC has to be enabled */ 3201 do { 3202 prev_bridge = bridge; 3203 bridge = bridge->bus->self; 3204 if (!bridge || prev_bridge == bridge) { 3205 dev_err(dev, 3206 "Failed to find root port" 3207 " to force ECRC\n"); 3208 return; 3209 } 3210 } while (pci_pcie_type(bridge) != 3211 PCI_EXP_TYPE_ROOT_PORT); 3212 3213 dev_info(dev, 3214 "Forcing ECRC on non-root port %s" 3215 " (enabling on root port %s)\n", 3216 pci_name(old_bridge), pci_name(bridge)); 3217 } else { 3218 dev_err(dev, 3219 "Not enabling ECRC on non-root port %s\n", 3220 pci_name(bridge)); 3221 return; 3222 } 3223 } 3224 3225 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3226 if (!cap) 3227 return; 3228 3229 ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap); 3230 if (ret) { 3231 dev_err(dev, "failed reading ext-conf-space of %s\n", 3232 pci_name(bridge)); 3233 dev_err(dev, "\t pci=nommconf in use? " 3234 "or buggy/incomplete/absent ACPI MCFG attr?\n"); 3235 return; 3236 } 3237 if (!(err_cap & PCI_ERR_CAP_ECRC_GENC)) 3238 return; 3239 3240 err_cap |= PCI_ERR_CAP_ECRC_GENE; 3241 pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap); 3242 dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge)); 3243 } 3244 3245 /* 3246 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 3247 * when the PCI-E Completion packets are aligned on an 8-byte 3248 * boundary. Some PCI-E chip sets always align Completion packets; on 3249 * the ones that do not, the alignment can be enforced by enabling 3250 * ECRC generation (if supported). 3251 * 3252 * When PCI-E Completion packets are not aligned, it is actually more 3253 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 3254 * 3255 * If the driver can neither enable ECRC nor verify that it has 3256 * already been enabled, then it must use a firmware image which works 3257 * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it 3258 * should also ensure that it never gives the device a Read-DMA which is 3259 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 3260 * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat) 3261 * firmware image, and set tx_boundary to 4KB. 3262 */ 3263 3264 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) 3265 { 3266 struct pci_dev *pdev = mgp->pdev; 3267 struct device *dev = &pdev->dev; 3268 int status; 3269 3270 mgp->tx_boundary = 4096; 3271 /* 3272 * Verify the max read request size was set to 4KB 3273 * before trying the test with 4KB. 3274 */ 3275 status = pcie_get_readrq(pdev); 3276 if (status < 0) { 3277 dev_err(dev, "Couldn't read max read req size: %d\n", status); 3278 goto abort; 3279 } 3280 if (status != 4096) { 3281 dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status); 3282 mgp->tx_boundary = 2048; 3283 } 3284 /* 3285 * load the optimized firmware (which assumes aligned PCIe 3286 * completions) in order to see if it works on this host. 3287 */ 3288 set_fw_name(mgp, myri10ge_fw_aligned, false); 3289 status = myri10ge_load_firmware(mgp, 1); 3290 if (status != 0) { 3291 goto abort; 3292 } 3293 3294 /* 3295 * Enable ECRC if possible 3296 */ 3297 myri10ge_enable_ecrc(mgp); 3298 3299 /* 3300 * Run a DMA test which watches for unaligned completions and 3301 * aborts on the first one seen. 3302 */ 3303 3304 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 3305 if (status == 0) 3306 return; /* keep the aligned firmware */ 3307 3308 if (status != -E2BIG) 3309 dev_warn(dev, "DMA test failed: %d\n", status); 3310 if (status == -ENOSYS) 3311 dev_warn(dev, "Falling back to ethp! " 3312 "Please install up to date fw\n"); 3313 abort: 3314 /* fall back to using the unaligned firmware */ 3315 mgp->tx_boundary = 2048; 3316 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3317 } 3318 3319 static void myri10ge_select_firmware(struct myri10ge_priv *mgp) 3320 { 3321 int overridden = 0; 3322 3323 if (myri10ge_force_firmware == 0) { 3324 int link_width; 3325 u16 lnk; 3326 3327 pcie_capability_read_word(mgp->pdev, PCI_EXP_LNKSTA, &lnk); 3328 link_width = (lnk >> 4) & 0x3f; 3329 3330 /* Check to see if Link is less than 8 or if the 3331 * upstream bridge is known to provide aligned 3332 * completions */ 3333 if (link_width < 8) { 3334 dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", 3335 link_width); 3336 mgp->tx_boundary = 4096; 3337 set_fw_name(mgp, myri10ge_fw_aligned, false); 3338 } else { 3339 myri10ge_firmware_probe(mgp); 3340 } 3341 } else { 3342 if (myri10ge_force_firmware == 1) { 3343 dev_info(&mgp->pdev->dev, 3344 "Assuming aligned completions (forced)\n"); 3345 mgp->tx_boundary = 4096; 3346 set_fw_name(mgp, myri10ge_fw_aligned, false); 3347 } else { 3348 dev_info(&mgp->pdev->dev, 3349 "Assuming unaligned completions (forced)\n"); 3350 mgp->tx_boundary = 2048; 3351 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3352 } 3353 } 3354 3355 kparam_block_sysfs_write(myri10ge_fw_name); 3356 if (myri10ge_fw_name != NULL) { 3357 char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); 3358 if (fw_name) { 3359 overridden = 1; 3360 set_fw_name(mgp, fw_name, true); 3361 } 3362 } 3363 kparam_unblock_sysfs_write(myri10ge_fw_name); 3364 3365 if (mgp->board_number < MYRI10GE_MAX_BOARDS && 3366 myri10ge_fw_names[mgp->board_number] != NULL && 3367 strlen(myri10ge_fw_names[mgp->board_number])) { 3368 set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); 3369 overridden = 1; 3370 } 3371 if (overridden) 3372 dev_info(&mgp->pdev->dev, "overriding firmware to %s\n", 3373 mgp->fw_name); 3374 } 3375 3376 static void myri10ge_mask_surprise_down(struct pci_dev *pdev) 3377 { 3378 struct pci_dev *bridge = pdev->bus->self; 3379 int cap; 3380 u32 mask; 3381 3382 if (bridge == NULL) 3383 return; 3384 3385 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3386 if (cap) { 3387 /* a sram parity error can cause a surprise link 3388 * down; since we expect and can recover from sram 3389 * parity errors, mask surprise link down events */ 3390 pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask); 3391 mask |= 0x20; 3392 pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask); 3393 } 3394 } 3395 3396 #ifdef CONFIG_PM 3397 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) 3398 { 3399 struct myri10ge_priv *mgp; 3400 struct net_device *netdev; 3401 3402 mgp = pci_get_drvdata(pdev); 3403 if (mgp == NULL) 3404 return -EINVAL; 3405 netdev = mgp->dev; 3406 3407 netif_device_detach(netdev); 3408 if (netif_running(netdev)) { 3409 netdev_info(netdev, "closing\n"); 3410 rtnl_lock(); 3411 myri10ge_close(netdev); 3412 rtnl_unlock(); 3413 } 3414 myri10ge_dummy_rdma(mgp, 0); 3415 pci_save_state(pdev); 3416 pci_disable_device(pdev); 3417 3418 return pci_set_power_state(pdev, pci_choose_state(pdev, state)); 3419 } 3420 3421 static int myri10ge_resume(struct pci_dev *pdev) 3422 { 3423 struct myri10ge_priv *mgp; 3424 struct net_device *netdev; 3425 int status; 3426 u16 vendor; 3427 3428 mgp = pci_get_drvdata(pdev); 3429 if (mgp == NULL) 3430 return -EINVAL; 3431 netdev = mgp->dev; 3432 pci_set_power_state(pdev, 0); /* zeros conf space as a side effect */ 3433 msleep(5); /* give card time to respond */ 3434 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3435 if (vendor == 0xffff) { 3436 netdev_err(mgp->dev, "device disappeared!\n"); 3437 return -EIO; 3438 } 3439 3440 pci_restore_state(pdev); 3441 3442 status = pci_enable_device(pdev); 3443 if (status) { 3444 dev_err(&pdev->dev, "failed to enable device\n"); 3445 return status; 3446 } 3447 3448 pci_set_master(pdev); 3449 3450 myri10ge_reset(mgp); 3451 myri10ge_dummy_rdma(mgp, 1); 3452 3453 /* Save configuration space to be restored if the 3454 * nic resets due to a parity error */ 3455 pci_save_state(pdev); 3456 3457 if (netif_running(netdev)) { 3458 rtnl_lock(); 3459 status = myri10ge_open(netdev); 3460 rtnl_unlock(); 3461 if (status != 0) 3462 goto abort_with_enabled; 3463 3464 } 3465 netif_device_attach(netdev); 3466 3467 return 0; 3468 3469 abort_with_enabled: 3470 pci_disable_device(pdev); 3471 return -EIO; 3472 3473 } 3474 #endif /* CONFIG_PM */ 3475 3476 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) 3477 { 3478 struct pci_dev *pdev = mgp->pdev; 3479 int vs = mgp->vendor_specific_offset; 3480 u32 reboot; 3481 3482 /*enter read32 mode */ 3483 pci_write_config_byte(pdev, vs + 0x10, 0x3); 3484 3485 /*read REBOOT_STATUS (0xfffffff0) */ 3486 pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0); 3487 pci_read_config_dword(pdev, vs + 0x14, &reboot); 3488 return reboot; 3489 } 3490 3491 static void 3492 myri10ge_check_slice(struct myri10ge_slice_state *ss, int *reset_needed, 3493 int *busy_slice_cnt, u32 rx_pause_cnt) 3494 { 3495 struct myri10ge_priv *mgp = ss->mgp; 3496 int slice = ss - mgp->ss; 3497 3498 if (ss->tx.req != ss->tx.done && 3499 ss->tx.done == ss->watchdog_tx_done && 3500 ss->watchdog_tx_req != ss->watchdog_tx_done) { 3501 /* nic seems like it might be stuck.. */ 3502 if (rx_pause_cnt != mgp->watchdog_pause) { 3503 if (net_ratelimit()) 3504 netdev_warn(mgp->dev, "slice %d: TX paused, " 3505 "check link partner\n", slice); 3506 } else { 3507 netdev_warn(mgp->dev, 3508 "slice %d: TX stuck %d %d %d %d %d %d\n", 3509 slice, ss->tx.queue_active, ss->tx.req, 3510 ss->tx.done, ss->tx.pkt_start, 3511 ss->tx.pkt_done, 3512 (int)ntohl(mgp->ss[slice].fw_stats-> 3513 send_done_count)); 3514 *reset_needed = 1; 3515 ss->stuck = 1; 3516 } 3517 } 3518 if (ss->watchdog_tx_done != ss->tx.done || 3519 ss->watchdog_rx_done != ss->rx_done.cnt) { 3520 *busy_slice_cnt += 1; 3521 } 3522 ss->watchdog_tx_done = ss->tx.done; 3523 ss->watchdog_tx_req = ss->tx.req; 3524 ss->watchdog_rx_done = ss->rx_done.cnt; 3525 } 3526 3527 /* 3528 * This watchdog is used to check whether the board has suffered 3529 * from a parity error and needs to be recovered. 3530 */ 3531 static void myri10ge_watchdog(struct work_struct *work) 3532 { 3533 struct myri10ge_priv *mgp = 3534 container_of(work, struct myri10ge_priv, watchdog_work); 3535 struct myri10ge_slice_state *ss; 3536 u32 reboot, rx_pause_cnt; 3537 int status, rebooted; 3538 int i; 3539 int reset_needed = 0; 3540 int busy_slice_cnt = 0; 3541 u16 cmd, vendor; 3542 3543 mgp->watchdog_resets++; 3544 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3545 rebooted = 0; 3546 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3547 /* Bus master DMA disabled? Check to see 3548 * if the card rebooted due to a parity error 3549 * For now, just report it */ 3550 reboot = myri10ge_read_reboot(mgp); 3551 netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n", 3552 reboot, myri10ge_reset_recover ? "" : " not"); 3553 if (myri10ge_reset_recover == 0) 3554 return; 3555 rtnl_lock(); 3556 mgp->rebooted = 1; 3557 rebooted = 1; 3558 myri10ge_close(mgp->dev); 3559 myri10ge_reset_recover--; 3560 mgp->rebooted = 0; 3561 /* 3562 * A rebooted nic will come back with config space as 3563 * it was after power was applied to PCIe bus. 3564 * Attempt to restore config space which was saved 3565 * when the driver was loaded, or the last time the 3566 * nic was resumed from power saving mode. 3567 */ 3568 pci_restore_state(mgp->pdev); 3569 3570 /* save state again for accounting reasons */ 3571 pci_save_state(mgp->pdev); 3572 3573 } else { 3574 /* if we get back -1's from our slot, perhaps somebody 3575 * powered off our card. Don't try to reset it in 3576 * this case */ 3577 if (cmd == 0xffff) { 3578 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3579 if (vendor == 0xffff) { 3580 netdev_err(mgp->dev, "device disappeared!\n"); 3581 return; 3582 } 3583 } 3584 /* Perhaps it is a software error. See if stuck slice 3585 * has recovered, reset if not */ 3586 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3587 for (i = 0; i < mgp->num_slices; i++) { 3588 ss = mgp->ss; 3589 if (ss->stuck) { 3590 myri10ge_check_slice(ss, &reset_needed, 3591 &busy_slice_cnt, 3592 rx_pause_cnt); 3593 ss->stuck = 0; 3594 } 3595 } 3596 if (!reset_needed) { 3597 netdev_dbg(mgp->dev, "not resetting\n"); 3598 return; 3599 } 3600 3601 netdev_err(mgp->dev, "device timeout, resetting\n"); 3602 } 3603 3604 if (!rebooted) { 3605 rtnl_lock(); 3606 myri10ge_close(mgp->dev); 3607 } 3608 status = myri10ge_load_firmware(mgp, 1); 3609 if (status != 0) 3610 netdev_err(mgp->dev, "failed to load firmware\n"); 3611 else 3612 myri10ge_open(mgp->dev); 3613 rtnl_unlock(); 3614 } 3615 3616 /* 3617 * We use our own timer routine rather than relying upon 3618 * netdev->tx_timeout because we have a very large hardware transmit 3619 * queue. Due to the large queue, the netdev->tx_timeout function 3620 * cannot detect a NIC with a parity error in a timely fashion if the 3621 * NIC is lightly loaded. 3622 */ 3623 static void myri10ge_watchdog_timer(unsigned long arg) 3624 { 3625 struct myri10ge_priv *mgp; 3626 struct myri10ge_slice_state *ss; 3627 int i, reset_needed, busy_slice_cnt; 3628 u32 rx_pause_cnt; 3629 u16 cmd; 3630 3631 mgp = (struct myri10ge_priv *)arg; 3632 3633 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3634 busy_slice_cnt = 0; 3635 for (i = 0, reset_needed = 0; 3636 i < mgp->num_slices && reset_needed == 0; ++i) { 3637 3638 ss = &mgp->ss[i]; 3639 if (ss->rx_small.watchdog_needed) { 3640 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 3641 mgp->small_bytes + MXGEFW_PAD, 3642 1); 3643 if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= 3644 myri10ge_fill_thresh) 3645 ss->rx_small.watchdog_needed = 0; 3646 } 3647 if (ss->rx_big.watchdog_needed) { 3648 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, 3649 mgp->big_bytes, 1); 3650 if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= 3651 myri10ge_fill_thresh) 3652 ss->rx_big.watchdog_needed = 0; 3653 } 3654 myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt, 3655 rx_pause_cnt); 3656 } 3657 /* if we've sent or received no traffic, poll the NIC to 3658 * ensure it is still there. Otherwise, we risk not noticing 3659 * an error in a timely fashion */ 3660 if (busy_slice_cnt == 0) { 3661 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3662 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3663 reset_needed = 1; 3664 } 3665 } 3666 mgp->watchdog_pause = rx_pause_cnt; 3667 3668 if (reset_needed) { 3669 schedule_work(&mgp->watchdog_work); 3670 } else { 3671 /* rearm timer */ 3672 mod_timer(&mgp->watchdog_timer, 3673 jiffies + myri10ge_watchdog_timeout * HZ); 3674 } 3675 } 3676 3677 static void myri10ge_free_slices(struct myri10ge_priv *mgp) 3678 { 3679 struct myri10ge_slice_state *ss; 3680 struct pci_dev *pdev = mgp->pdev; 3681 size_t bytes; 3682 int i; 3683 3684 if (mgp->ss == NULL) 3685 return; 3686 3687 for (i = 0; i < mgp->num_slices; i++) { 3688 ss = &mgp->ss[i]; 3689 if (ss->rx_done.entry != NULL) { 3690 bytes = mgp->max_intr_slots * 3691 sizeof(*ss->rx_done.entry); 3692 dma_free_coherent(&pdev->dev, bytes, 3693 ss->rx_done.entry, ss->rx_done.bus); 3694 ss->rx_done.entry = NULL; 3695 } 3696 if (ss->fw_stats != NULL) { 3697 bytes = sizeof(*ss->fw_stats); 3698 dma_free_coherent(&pdev->dev, bytes, 3699 ss->fw_stats, ss->fw_stats_bus); 3700 ss->fw_stats = NULL; 3701 } 3702 netif_napi_del(&ss->napi); 3703 } 3704 kfree(mgp->ss); 3705 mgp->ss = NULL; 3706 } 3707 3708 static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) 3709 { 3710 struct myri10ge_slice_state *ss; 3711 struct pci_dev *pdev = mgp->pdev; 3712 size_t bytes; 3713 int i; 3714 3715 bytes = sizeof(*mgp->ss) * mgp->num_slices; 3716 mgp->ss = kzalloc(bytes, GFP_KERNEL); 3717 if (mgp->ss == NULL) { 3718 return -ENOMEM; 3719 } 3720 3721 for (i = 0; i < mgp->num_slices; i++) { 3722 ss = &mgp->ss[i]; 3723 bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); 3724 ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, 3725 &ss->rx_done.bus, 3726 GFP_KERNEL); 3727 if (ss->rx_done.entry == NULL) 3728 goto abort; 3729 memset(ss->rx_done.entry, 0, bytes); 3730 bytes = sizeof(*ss->fw_stats); 3731 ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, 3732 &ss->fw_stats_bus, 3733 GFP_KERNEL); 3734 if (ss->fw_stats == NULL) 3735 goto abort; 3736 ss->mgp = mgp; 3737 ss->dev = mgp->dev; 3738 netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, 3739 myri10ge_napi_weight); 3740 } 3741 return 0; 3742 abort: 3743 myri10ge_free_slices(mgp); 3744 return -ENOMEM; 3745 } 3746 3747 /* 3748 * This function determines the number of slices supported. 3749 * The number slices is the minimum of the number of CPUS, 3750 * the number of MSI-X irqs supported, the number of slices 3751 * supported by the firmware 3752 */ 3753 static void myri10ge_probe_slices(struct myri10ge_priv *mgp) 3754 { 3755 struct myri10ge_cmd cmd; 3756 struct pci_dev *pdev = mgp->pdev; 3757 char *old_fw; 3758 bool old_allocated; 3759 int i, status, ncpus, msix_cap; 3760 3761 mgp->num_slices = 1; 3762 msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX); 3763 ncpus = netif_get_num_default_rss_queues(); 3764 3765 if (myri10ge_max_slices == 1 || msix_cap == 0 || 3766 (myri10ge_max_slices == -1 && ncpus < 2)) 3767 return; 3768 3769 /* try to load the slice aware rss firmware */ 3770 old_fw = mgp->fw_name; 3771 old_allocated = mgp->fw_name_allocated; 3772 /* don't free old_fw if we override it. */ 3773 mgp->fw_name_allocated = false; 3774 3775 if (myri10ge_fw_name != NULL) { 3776 dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", 3777 myri10ge_fw_name); 3778 set_fw_name(mgp, myri10ge_fw_name, false); 3779 } else if (old_fw == myri10ge_fw_aligned) 3780 set_fw_name(mgp, myri10ge_fw_rss_aligned, false); 3781 else 3782 set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); 3783 status = myri10ge_load_firmware(mgp, 0); 3784 if (status != 0) { 3785 dev_info(&pdev->dev, "Rss firmware not found\n"); 3786 if (old_allocated) 3787 kfree(old_fw); 3788 return; 3789 } 3790 3791 /* hit the board with a reset to ensure it is alive */ 3792 memset(&cmd, 0, sizeof(cmd)); 3793 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 3794 if (status != 0) { 3795 dev_err(&mgp->pdev->dev, "failed reset\n"); 3796 goto abort_with_fw; 3797 } 3798 3799 mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot); 3800 3801 /* tell it the size of the interrupt queues */ 3802 cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot); 3803 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 3804 if (status != 0) { 3805 dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3806 goto abort_with_fw; 3807 } 3808 3809 /* ask the maximum number of slices it supports */ 3810 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0); 3811 if (status != 0) 3812 goto abort_with_fw; 3813 else 3814 mgp->num_slices = cmd.data0; 3815 3816 /* Only allow multiple slices if MSI-X is usable */ 3817 if (!myri10ge_msi) { 3818 goto abort_with_fw; 3819 } 3820 3821 /* if the admin did not specify a limit to how many 3822 * slices we should use, cap it automatically to the 3823 * number of CPUs currently online */ 3824 if (myri10ge_max_slices == -1) 3825 myri10ge_max_slices = ncpus; 3826 3827 if (mgp->num_slices > myri10ge_max_slices) 3828 mgp->num_slices = myri10ge_max_slices; 3829 3830 /* Now try to allocate as many MSI-X vectors as we have 3831 * slices. We give up on MSI-X if we can only get a single 3832 * vector. */ 3833 3834 mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors), 3835 GFP_KERNEL); 3836 if (mgp->msix_vectors == NULL) 3837 goto disable_msix; 3838 for (i = 0; i < mgp->num_slices; i++) { 3839 mgp->msix_vectors[i].entry = i; 3840 } 3841 3842 while (mgp->num_slices > 1) { 3843 /* make sure it is a power of two */ 3844 while (!is_power_of_2(mgp->num_slices)) 3845 mgp->num_slices--; 3846 if (mgp->num_slices == 1) 3847 goto disable_msix; 3848 status = pci_enable_msix(pdev, mgp->msix_vectors, 3849 mgp->num_slices); 3850 if (status == 0) { 3851 pci_disable_msix(pdev); 3852 if (old_allocated) 3853 kfree(old_fw); 3854 return; 3855 } 3856 if (status > 0) 3857 mgp->num_slices = status; 3858 else 3859 goto disable_msix; 3860 } 3861 3862 disable_msix: 3863 if (mgp->msix_vectors != NULL) { 3864 kfree(mgp->msix_vectors); 3865 mgp->msix_vectors = NULL; 3866 } 3867 3868 abort_with_fw: 3869 mgp->num_slices = 1; 3870 set_fw_name(mgp, old_fw, old_allocated); 3871 myri10ge_load_firmware(mgp, 0); 3872 } 3873 3874 static const struct net_device_ops myri10ge_netdev_ops = { 3875 .ndo_open = myri10ge_open, 3876 .ndo_stop = myri10ge_close, 3877 .ndo_start_xmit = myri10ge_xmit, 3878 .ndo_get_stats64 = myri10ge_get_stats, 3879 .ndo_validate_addr = eth_validate_addr, 3880 .ndo_change_mtu = myri10ge_change_mtu, 3881 .ndo_fix_features = myri10ge_fix_features, 3882 .ndo_set_rx_mode = myri10ge_set_multicast_list, 3883 .ndo_set_mac_address = myri10ge_set_mac_address, 3884 }; 3885 3886 static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 3887 { 3888 struct net_device *netdev; 3889 struct myri10ge_priv *mgp; 3890 struct device *dev = &pdev->dev; 3891 int i; 3892 int status = -ENXIO; 3893 int dac_enabled; 3894 unsigned hdr_offset, ss_offset; 3895 static int board_number; 3896 3897 netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES); 3898 if (netdev == NULL) 3899 return -ENOMEM; 3900 3901 SET_NETDEV_DEV(netdev, &pdev->dev); 3902 3903 mgp = netdev_priv(netdev); 3904 mgp->dev = netdev; 3905 mgp->pdev = pdev; 3906 mgp->pause = myri10ge_flow_control; 3907 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 3908 mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT); 3909 mgp->board_number = board_number; 3910 init_waitqueue_head(&mgp->down_wq); 3911 3912 if (pci_enable_device(pdev)) { 3913 dev_err(&pdev->dev, "pci_enable_device call failed\n"); 3914 status = -ENODEV; 3915 goto abort_with_netdev; 3916 } 3917 3918 /* Find the vendor-specific cap so we can check 3919 * the reboot register later on */ 3920 mgp->vendor_specific_offset 3921 = pci_find_capability(pdev, PCI_CAP_ID_VNDR); 3922 3923 /* Set our max read request to 4KB */ 3924 status = pcie_set_readrq(pdev, 4096); 3925 if (status != 0) { 3926 dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n", 3927 status); 3928 goto abort_with_enabled; 3929 } 3930 3931 myri10ge_mask_surprise_down(pdev); 3932 pci_set_master(pdev); 3933 dac_enabled = 1; 3934 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3935 if (status != 0) { 3936 dac_enabled = 0; 3937 dev_err(&pdev->dev, 3938 "64-bit pci address mask was refused, " 3939 "trying 32-bit\n"); 3940 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3941 } 3942 if (status != 0) { 3943 dev_err(&pdev->dev, "Error %d setting DMA mask\n", status); 3944 goto abort_with_enabled; 3945 } 3946 (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3947 mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd), 3948 &mgp->cmd_bus, GFP_KERNEL); 3949 if (mgp->cmd == NULL) 3950 goto abort_with_enabled; 3951 3952 mgp->board_span = pci_resource_len(pdev, 0); 3953 mgp->iomem_base = pci_resource_start(pdev, 0); 3954 mgp->mtrr = -1; 3955 mgp->wc_enabled = 0; 3956 #ifdef CONFIG_MTRR 3957 mgp->mtrr = mtrr_add(mgp->iomem_base, mgp->board_span, 3958 MTRR_TYPE_WRCOMB, 1); 3959 if (mgp->mtrr >= 0) 3960 mgp->wc_enabled = 1; 3961 #endif 3962 mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span); 3963 if (mgp->sram == NULL) { 3964 dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n", 3965 mgp->board_span, mgp->iomem_base); 3966 status = -ENXIO; 3967 goto abort_with_mtrr; 3968 } 3969 hdr_offset = 3970 ntohl(__raw_readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc; 3971 ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs); 3972 mgp->sram_size = ntohl(__raw_readl(mgp->sram + ss_offset)); 3973 if (mgp->sram_size > mgp->board_span || 3974 mgp->sram_size <= MYRI10GE_FW_OFFSET) { 3975 dev_err(&pdev->dev, 3976 "invalid sram_size %dB or board span %ldB\n", 3977 mgp->sram_size, mgp->board_span); 3978 goto abort_with_ioremap; 3979 } 3980 memcpy_fromio(mgp->eeprom_strings, 3981 mgp->sram + mgp->sram_size, MYRI10GE_EEPROM_STRINGS_SIZE); 3982 memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 3983 status = myri10ge_read_mac_addr(mgp); 3984 if (status) 3985 goto abort_with_ioremap; 3986 3987 for (i = 0; i < ETH_ALEN; i++) 3988 netdev->dev_addr[i] = mgp->mac_addr[i]; 3989 3990 myri10ge_select_firmware(mgp); 3991 3992 status = myri10ge_load_firmware(mgp, 1); 3993 if (status != 0) { 3994 dev_err(&pdev->dev, "failed to load firmware\n"); 3995 goto abort_with_ioremap; 3996 } 3997 myri10ge_probe_slices(mgp); 3998 status = myri10ge_alloc_slices(mgp); 3999 if (status != 0) { 4000 dev_err(&pdev->dev, "failed to alloc slice state\n"); 4001 goto abort_with_firmware; 4002 } 4003 netif_set_real_num_tx_queues(netdev, mgp->num_slices); 4004 netif_set_real_num_rx_queues(netdev, mgp->num_slices); 4005 status = myri10ge_reset(mgp); 4006 if (status != 0) { 4007 dev_err(&pdev->dev, "failed reset\n"); 4008 goto abort_with_slices; 4009 } 4010 #ifdef CONFIG_MYRI10GE_DCA 4011 myri10ge_setup_dca(mgp); 4012 #endif 4013 pci_set_drvdata(pdev, mgp); 4014 if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU) 4015 myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 4016 if ((myri10ge_initial_mtu + ETH_HLEN) < 68) 4017 myri10ge_initial_mtu = 68; 4018 4019 netdev->netdev_ops = &myri10ge_netdev_ops; 4020 netdev->mtu = myri10ge_initial_mtu; 4021 netdev->hw_features = mgp->features | NETIF_F_LRO | NETIF_F_RXCSUM; 4022 netdev->features = netdev->hw_features; 4023 4024 if (dac_enabled) 4025 netdev->features |= NETIF_F_HIGHDMA; 4026 4027 netdev->vlan_features |= mgp->features; 4028 if (mgp->fw_ver_tiny < 37) 4029 netdev->vlan_features &= ~NETIF_F_TSO6; 4030 if (mgp->fw_ver_tiny < 32) 4031 netdev->vlan_features &= ~NETIF_F_TSO; 4032 4033 /* make sure we can get an irq, and that MSI can be 4034 * setup (if available). */ 4035 status = myri10ge_request_irq(mgp); 4036 if (status != 0) 4037 goto abort_with_firmware; 4038 myri10ge_free_irq(mgp); 4039 4040 /* Save configuration space to be restored if the 4041 * nic resets due to a parity error */ 4042 pci_save_state(pdev); 4043 4044 /* Setup the watchdog timer */ 4045 setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer, 4046 (unsigned long)mgp); 4047 4048 SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops); 4049 INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); 4050 status = register_netdev(netdev); 4051 if (status != 0) { 4052 dev_err(&pdev->dev, "register_netdev failed: %d\n", status); 4053 goto abort_with_state; 4054 } 4055 if (mgp->msix_enabled) 4056 dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n", 4057 mgp->num_slices, mgp->tx_boundary, mgp->fw_name, 4058 (mgp->wc_enabled ? "Enabled" : "Disabled")); 4059 else 4060 dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n", 4061 mgp->msi_enabled ? "MSI" : "xPIC", 4062 pdev->irq, mgp->tx_boundary, mgp->fw_name, 4063 (mgp->wc_enabled ? "Enabled" : "Disabled")); 4064 4065 board_number++; 4066 return 0; 4067 4068 abort_with_state: 4069 pci_restore_state(pdev); 4070 4071 abort_with_slices: 4072 myri10ge_free_slices(mgp); 4073 4074 abort_with_firmware: 4075 myri10ge_dummy_rdma(mgp, 0); 4076 4077 abort_with_ioremap: 4078 if (mgp->mac_addr_string != NULL) 4079 dev_err(&pdev->dev, 4080 "myri10ge_probe() failed: MAC=%s, SN=%ld\n", 4081 mgp->mac_addr_string, mgp->serial_number); 4082 iounmap(mgp->sram); 4083 4084 abort_with_mtrr: 4085 #ifdef CONFIG_MTRR 4086 if (mgp->mtrr >= 0) 4087 mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); 4088 #endif 4089 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4090 mgp->cmd, mgp->cmd_bus); 4091 4092 abort_with_enabled: 4093 pci_disable_device(pdev); 4094 4095 abort_with_netdev: 4096 set_fw_name(mgp, NULL, false); 4097 free_netdev(netdev); 4098 return status; 4099 } 4100 4101 /* 4102 * myri10ge_remove 4103 * 4104 * Does what is necessary to shutdown one Myrinet device. Called 4105 * once for each Myrinet card by the kernel when a module is 4106 * unloaded. 4107 */ 4108 static void myri10ge_remove(struct pci_dev *pdev) 4109 { 4110 struct myri10ge_priv *mgp; 4111 struct net_device *netdev; 4112 4113 mgp = pci_get_drvdata(pdev); 4114 if (mgp == NULL) 4115 return; 4116 4117 cancel_work_sync(&mgp->watchdog_work); 4118 netdev = mgp->dev; 4119 unregister_netdev(netdev); 4120 4121 #ifdef CONFIG_MYRI10GE_DCA 4122 myri10ge_teardown_dca(mgp); 4123 #endif 4124 myri10ge_dummy_rdma(mgp, 0); 4125 4126 /* avoid a memory leak */ 4127 pci_restore_state(pdev); 4128 4129 iounmap(mgp->sram); 4130 4131 #ifdef CONFIG_MTRR 4132 if (mgp->mtrr >= 0) 4133 mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); 4134 #endif 4135 myri10ge_free_slices(mgp); 4136 if (mgp->msix_vectors != NULL) 4137 kfree(mgp->msix_vectors); 4138 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4139 mgp->cmd, mgp->cmd_bus); 4140 4141 set_fw_name(mgp, NULL, false); 4142 free_netdev(netdev); 4143 pci_disable_device(pdev); 4144 pci_set_drvdata(pdev, NULL); 4145 } 4146 4147 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008 4148 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009 4149 4150 static DEFINE_PCI_DEVICE_TABLE(myri10ge_pci_tbl) = { 4151 {PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)}, 4152 {PCI_DEVICE 4153 (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)}, 4154 {0}, 4155 }; 4156 4157 MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl); 4158 4159 static struct pci_driver myri10ge_driver = { 4160 .name = "myri10ge", 4161 .probe = myri10ge_probe, 4162 .remove = myri10ge_remove, 4163 .id_table = myri10ge_pci_tbl, 4164 #ifdef CONFIG_PM 4165 .suspend = myri10ge_suspend, 4166 .resume = myri10ge_resume, 4167 #endif 4168 }; 4169 4170 #ifdef CONFIG_MYRI10GE_DCA 4171 static int 4172 myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p) 4173 { 4174 int err = driver_for_each_device(&myri10ge_driver.driver, 4175 NULL, &event, 4176 myri10ge_notify_dca_device); 4177 4178 if (err) 4179 return NOTIFY_BAD; 4180 return NOTIFY_DONE; 4181 } 4182 4183 static struct notifier_block myri10ge_dca_notifier = { 4184 .notifier_call = myri10ge_notify_dca, 4185 .next = NULL, 4186 .priority = 0, 4187 }; 4188 #endif /* CONFIG_MYRI10GE_DCA */ 4189 4190 static __init int myri10ge_init_module(void) 4191 { 4192 pr_info("Version %s\n", MYRI10GE_VERSION_STR); 4193 4194 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) { 4195 pr_err("Illegal rssh hash type %d, defaulting to source port\n", 4196 myri10ge_rss_hash); 4197 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4198 } 4199 #ifdef CONFIG_MYRI10GE_DCA 4200 dca_register_notify(&myri10ge_dca_notifier); 4201 #endif 4202 if (myri10ge_max_slices > MYRI10GE_MAX_SLICES) 4203 myri10ge_max_slices = MYRI10GE_MAX_SLICES; 4204 4205 return pci_register_driver(&myri10ge_driver); 4206 } 4207 4208 module_init(myri10ge_init_module); 4209 4210 static __exit void myri10ge_cleanup_module(void) 4211 { 4212 #ifdef CONFIG_MYRI10GE_DCA 4213 dca_unregister_notify(&myri10ge_dca_notifier); 4214 #endif 4215 pci_unregister_driver(&myri10ge_driver); 4216 } 4217 4218 module_exit(myri10ge_cleanup_module); 4219