1 /************************************************************************* 2 * myri10ge.c: Myricom Myri-10G Ethernet driver. 3 * 4 * Copyright (C) 2005 - 2011 Myricom, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of Myricom, Inc. nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 * 32 * If the eeprom on your board is not recent enough, you will need to get a 33 * newer firmware image at: 34 * http://www.myri.com/scs/download-Myri10GE.html 35 * 36 * Contact Information: 37 * <help@myri.com> 38 * Myricom, Inc., 325N Santa Anita Avenue, Arcadia, CA 91006 39 *************************************************************************/ 40 41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 42 43 #include <linux/tcp.h> 44 #include <linux/netdevice.h> 45 #include <linux/skbuff.h> 46 #include <linux/string.h> 47 #include <linux/module.h> 48 #include <linux/pci.h> 49 #include <linux/dma-mapping.h> 50 #include <linux/etherdevice.h> 51 #include <linux/if_ether.h> 52 #include <linux/if_vlan.h> 53 #include <linux/dca.h> 54 #include <linux/ip.h> 55 #include <linux/inet.h> 56 #include <linux/in.h> 57 #include <linux/ethtool.h> 58 #include <linux/firmware.h> 59 #include <linux/delay.h> 60 #include <linux/timer.h> 61 #include <linux/vmalloc.h> 62 #include <linux/crc32.h> 63 #include <linux/moduleparam.h> 64 #include <linux/io.h> 65 #include <linux/log2.h> 66 #include <linux/slab.h> 67 #include <linux/prefetch.h> 68 #include <net/checksum.h> 69 #include <net/ip.h> 70 #include <net/tcp.h> 71 #include <asm/byteorder.h> 72 #include <asm/processor.h> 73 #include <net/busy_poll.h> 74 75 #include "myri10ge_mcp.h" 76 #include "myri10ge_mcp_gen_header.h" 77 78 #define MYRI10GE_VERSION_STR "1.5.3-1.534" 79 80 MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); 81 MODULE_AUTHOR("Maintainer: help@myri.com"); 82 MODULE_VERSION(MYRI10GE_VERSION_STR); 83 MODULE_LICENSE("Dual BSD/GPL"); 84 85 #define MYRI10GE_MAX_ETHER_MTU 9014 86 87 #define MYRI10GE_ETH_STOPPED 0 88 #define MYRI10GE_ETH_STOPPING 1 89 #define MYRI10GE_ETH_STARTING 2 90 #define MYRI10GE_ETH_RUNNING 3 91 #define MYRI10GE_ETH_OPEN_FAILED 4 92 93 #define MYRI10GE_EEPROM_STRINGS_SIZE 256 94 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) 95 96 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) 97 #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff 98 99 #define MYRI10GE_ALLOC_ORDER 0 100 #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE) 101 #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1) 102 103 #define MYRI10GE_MAX_SLICES 32 104 105 struct myri10ge_rx_buffer_state { 106 struct page *page; 107 int page_offset; 108 DEFINE_DMA_UNMAP_ADDR(bus); 109 DEFINE_DMA_UNMAP_LEN(len); 110 }; 111 112 struct myri10ge_tx_buffer_state { 113 struct sk_buff *skb; 114 int last; 115 DEFINE_DMA_UNMAP_ADDR(bus); 116 DEFINE_DMA_UNMAP_LEN(len); 117 }; 118 119 struct myri10ge_cmd { 120 u32 data0; 121 u32 data1; 122 u32 data2; 123 }; 124 125 struct myri10ge_rx_buf { 126 struct mcp_kreq_ether_recv __iomem *lanai; /* lanai ptr for recv ring */ 127 struct mcp_kreq_ether_recv *shadow; /* host shadow of recv ring */ 128 struct myri10ge_rx_buffer_state *info; 129 struct page *page; 130 dma_addr_t bus; 131 int page_offset; 132 int cnt; 133 int fill_cnt; 134 int alloc_fail; 135 int mask; /* number of rx slots -1 */ 136 int watchdog_needed; 137 }; 138 139 struct myri10ge_tx_buf { 140 struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */ 141 __be32 __iomem *send_go; /* "go" doorbell ptr */ 142 __be32 __iomem *send_stop; /* "stop" doorbell ptr */ 143 struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ 144 char *req_bytes; 145 struct myri10ge_tx_buffer_state *info; 146 int mask; /* number of transmit slots -1 */ 147 int req ____cacheline_aligned; /* transmit slots submitted */ 148 int pkt_start; /* packets started */ 149 int stop_queue; 150 int linearized; 151 int done ____cacheline_aligned; /* transmit slots completed */ 152 int pkt_done; /* packets completed */ 153 int wake_queue; 154 int queue_active; 155 }; 156 157 struct myri10ge_rx_done { 158 struct mcp_slot *entry; 159 dma_addr_t bus; 160 int cnt; 161 int idx; 162 }; 163 164 struct myri10ge_slice_netstats { 165 unsigned long rx_packets; 166 unsigned long tx_packets; 167 unsigned long rx_bytes; 168 unsigned long tx_bytes; 169 unsigned long rx_dropped; 170 unsigned long tx_dropped; 171 }; 172 173 struct myri10ge_slice_state { 174 struct myri10ge_tx_buf tx; /* transmit ring */ 175 struct myri10ge_rx_buf rx_small; 176 struct myri10ge_rx_buf rx_big; 177 struct myri10ge_rx_done rx_done; 178 struct net_device *dev; 179 struct napi_struct napi; 180 struct myri10ge_priv *mgp; 181 struct myri10ge_slice_netstats stats; 182 __be32 __iomem *irq_claim; 183 struct mcp_irq_data *fw_stats; 184 dma_addr_t fw_stats_bus; 185 int watchdog_tx_done; 186 int watchdog_tx_req; 187 int watchdog_rx_done; 188 int stuck; 189 #ifdef CONFIG_MYRI10GE_DCA 190 int cached_dca_tag; 191 int cpu; 192 __be32 __iomem *dca_tag; 193 #endif 194 #ifdef CONFIG_NET_RX_BUSY_POLL 195 unsigned int state; 196 #define SLICE_STATE_IDLE 0 197 #define SLICE_STATE_NAPI 1 /* NAPI owns this slice */ 198 #define SLICE_STATE_POLL 2 /* poll owns this slice */ 199 #define SLICE_LOCKED (SLICE_STATE_NAPI | SLICE_STATE_POLL) 200 #define SLICE_STATE_NAPI_YIELD 4 /* NAPI yielded this slice */ 201 #define SLICE_STATE_POLL_YIELD 8 /* poll yielded this slice */ 202 #define SLICE_USER_PEND (SLICE_STATE_POLL | SLICE_STATE_POLL_YIELD) 203 spinlock_t lock; 204 unsigned long lock_napi_yield; 205 unsigned long lock_poll_yield; 206 unsigned long busy_poll_miss; 207 unsigned long busy_poll_cnt; 208 #endif /* CONFIG_NET_RX_BUSY_POLL */ 209 char irq_desc[32]; 210 }; 211 212 struct myri10ge_priv { 213 struct myri10ge_slice_state *ss; 214 int tx_boundary; /* boundary transmits cannot cross */ 215 int num_slices; 216 int running; /* running? */ 217 int small_bytes; 218 int big_bytes; 219 int max_intr_slots; 220 struct net_device *dev; 221 u8 __iomem *sram; 222 int sram_size; 223 unsigned long board_span; 224 unsigned long iomem_base; 225 __be32 __iomem *irq_deassert; 226 char *mac_addr_string; 227 struct mcp_cmd_response *cmd; 228 dma_addr_t cmd_bus; 229 struct pci_dev *pdev; 230 int msi_enabled; 231 int msix_enabled; 232 struct msix_entry *msix_vectors; 233 #ifdef CONFIG_MYRI10GE_DCA 234 int dca_enabled; 235 int relaxed_order; 236 #endif 237 u32 link_state; 238 unsigned int rdma_tags_available; 239 int intr_coal_delay; 240 __be32 __iomem *intr_coal_delay_ptr; 241 int wc_cookie; 242 int down_cnt; 243 wait_queue_head_t down_wq; 244 struct work_struct watchdog_work; 245 struct timer_list watchdog_timer; 246 int watchdog_resets; 247 int watchdog_pause; 248 int pause; 249 bool fw_name_allocated; 250 char *fw_name; 251 char eeprom_strings[MYRI10GE_EEPROM_STRINGS_SIZE]; 252 char *product_code_string; 253 char fw_version[128]; 254 int fw_ver_major; 255 int fw_ver_minor; 256 int fw_ver_tiny; 257 int adopted_rx_filter_bug; 258 u8 mac_addr[ETH_ALEN]; /* eeprom mac address */ 259 unsigned long serial_number; 260 int vendor_specific_offset; 261 int fw_multicast_support; 262 u32 features; 263 u32 max_tso6; 264 u32 read_dma; 265 u32 write_dma; 266 u32 read_write_dma; 267 u32 link_changes; 268 u32 msg_enable; 269 unsigned int board_number; 270 int rebooted; 271 }; 272 273 static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat"; 274 static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat"; 275 static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat"; 276 static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat"; 277 MODULE_FIRMWARE("myri10ge_ethp_z8e.dat"); 278 MODULE_FIRMWARE("myri10ge_eth_z8e.dat"); 279 MODULE_FIRMWARE("myri10ge_rss_ethp_z8e.dat"); 280 MODULE_FIRMWARE("myri10ge_rss_eth_z8e.dat"); 281 282 /* Careful: must be accessed under kernel_param_lock() */ 283 static char *myri10ge_fw_name = NULL; 284 module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); 285 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image name"); 286 287 #define MYRI10GE_MAX_BOARDS 8 288 static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] = 289 {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL }; 290 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL, 291 0444); 292 MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board"); 293 294 static int myri10ge_ecrc_enable = 1; 295 module_param(myri10ge_ecrc_enable, int, S_IRUGO); 296 MODULE_PARM_DESC(myri10ge_ecrc_enable, "Enable Extended CRC on PCI-E"); 297 298 static int myri10ge_small_bytes = -1; /* -1 == auto */ 299 module_param(myri10ge_small_bytes, int, S_IRUGO | S_IWUSR); 300 MODULE_PARM_DESC(myri10ge_small_bytes, "Threshold of small packets"); 301 302 static int myri10ge_msi = 1; /* enable msi by default */ 303 module_param(myri10ge_msi, int, S_IRUGO | S_IWUSR); 304 MODULE_PARM_DESC(myri10ge_msi, "Enable Message Signalled Interrupts"); 305 306 static int myri10ge_intr_coal_delay = 75; 307 module_param(myri10ge_intr_coal_delay, int, S_IRUGO); 308 MODULE_PARM_DESC(myri10ge_intr_coal_delay, "Interrupt coalescing delay"); 309 310 static int myri10ge_flow_control = 1; 311 module_param(myri10ge_flow_control, int, S_IRUGO); 312 MODULE_PARM_DESC(myri10ge_flow_control, "Pause parameter"); 313 314 static int myri10ge_deassert_wait = 1; 315 module_param(myri10ge_deassert_wait, int, S_IRUGO | S_IWUSR); 316 MODULE_PARM_DESC(myri10ge_deassert_wait, 317 "Wait when deasserting legacy interrupts"); 318 319 static int myri10ge_force_firmware = 0; 320 module_param(myri10ge_force_firmware, int, S_IRUGO); 321 MODULE_PARM_DESC(myri10ge_force_firmware, 322 "Force firmware to assume aligned completions"); 323 324 static int myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 325 module_param(myri10ge_initial_mtu, int, S_IRUGO); 326 MODULE_PARM_DESC(myri10ge_initial_mtu, "Initial MTU"); 327 328 static int myri10ge_napi_weight = 64; 329 module_param(myri10ge_napi_weight, int, S_IRUGO); 330 MODULE_PARM_DESC(myri10ge_napi_weight, "Set NAPI weight"); 331 332 static int myri10ge_watchdog_timeout = 1; 333 module_param(myri10ge_watchdog_timeout, int, S_IRUGO); 334 MODULE_PARM_DESC(myri10ge_watchdog_timeout, "Set watchdog timeout"); 335 336 static int myri10ge_max_irq_loops = 1048576; 337 module_param(myri10ge_max_irq_loops, int, S_IRUGO); 338 MODULE_PARM_DESC(myri10ge_max_irq_loops, 339 "Set stuck legacy IRQ detection threshold"); 340 341 #define MYRI10GE_MSG_DEFAULT NETIF_MSG_LINK 342 343 static int myri10ge_debug = -1; /* defaults above */ 344 module_param(myri10ge_debug, int, 0); 345 MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); 346 347 static int myri10ge_fill_thresh = 256; 348 module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR); 349 MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed"); 350 351 static int myri10ge_reset_recover = 1; 352 353 static int myri10ge_max_slices = 1; 354 module_param(myri10ge_max_slices, int, S_IRUGO); 355 MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues"); 356 357 static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 358 module_param(myri10ge_rss_hash, int, S_IRUGO); 359 MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do"); 360 361 static int myri10ge_dca = 1; 362 module_param(myri10ge_dca, int, S_IRUGO); 363 MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); 364 365 #define MYRI10GE_FW_OFFSET 1024*1024 366 #define MYRI10GE_HIGHPART_TO_U32(X) \ 367 (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0) 368 #define MYRI10GE_LOWPART_TO_U32(X) ((u32)(X)) 369 370 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) 371 372 static void myri10ge_set_multicast_list(struct net_device *dev); 373 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 374 struct net_device *dev); 375 376 static inline void put_be32(__be32 val, __be32 __iomem * p) 377 { 378 __raw_writel((__force __u32) val, (__force void __iomem *)p); 379 } 380 381 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 382 struct rtnl_link_stats64 *stats); 383 384 static void set_fw_name(struct myri10ge_priv *mgp, char *name, bool allocated) 385 { 386 if (mgp->fw_name_allocated) 387 kfree(mgp->fw_name); 388 mgp->fw_name = name; 389 mgp->fw_name_allocated = allocated; 390 } 391 392 static int 393 myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd, 394 struct myri10ge_cmd *data, int atomic) 395 { 396 struct mcp_cmd *buf; 397 char buf_bytes[sizeof(*buf) + 8]; 398 struct mcp_cmd_response *response = mgp->cmd; 399 char __iomem *cmd_addr = mgp->sram + MXGEFW_ETH_CMD; 400 u32 dma_low, dma_high, result, value; 401 int sleep_total = 0; 402 403 /* ensure buf is aligned to 8 bytes */ 404 buf = (struct mcp_cmd *)ALIGN((unsigned long)buf_bytes, 8); 405 406 buf->data0 = htonl(data->data0); 407 buf->data1 = htonl(data->data1); 408 buf->data2 = htonl(data->data2); 409 buf->cmd = htonl(cmd); 410 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 411 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 412 413 buf->response_addr.low = htonl(dma_low); 414 buf->response_addr.high = htonl(dma_high); 415 response->result = htonl(MYRI10GE_NO_RESPONSE_RESULT); 416 mb(); 417 myri10ge_pio_copy(cmd_addr, buf, sizeof(*buf)); 418 419 /* wait up to 15ms. Longest command is the DMA benchmark, 420 * which is capped at 5ms, but runs from a timeout handler 421 * that runs every 7.8ms. So a 15ms timeout leaves us with 422 * a 2.2ms margin 423 */ 424 if (atomic) { 425 /* if atomic is set, do not sleep, 426 * and try to get the completion quickly 427 * (1ms will be enough for those commands) */ 428 for (sleep_total = 0; 429 sleep_total < 1000 && 430 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 431 sleep_total += 10) { 432 udelay(10); 433 mb(); 434 } 435 } else { 436 /* use msleep for most command */ 437 for (sleep_total = 0; 438 sleep_total < 15 && 439 response->result == htonl(MYRI10GE_NO_RESPONSE_RESULT); 440 sleep_total++) 441 msleep(1); 442 } 443 444 result = ntohl(response->result); 445 value = ntohl(response->data); 446 if (result != MYRI10GE_NO_RESPONSE_RESULT) { 447 if (result == 0) { 448 data->data0 = value; 449 return 0; 450 } else if (result == MXGEFW_CMD_UNKNOWN) { 451 return -ENOSYS; 452 } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) { 453 return -E2BIG; 454 } else if (result == MXGEFW_CMD_ERROR_RANGE && 455 cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES && 456 (data-> 457 data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) != 458 0) { 459 return -ERANGE; 460 } else { 461 dev_err(&mgp->pdev->dev, 462 "command %d failed, result = %d\n", 463 cmd, result); 464 return -ENXIO; 465 } 466 } 467 468 dev_err(&mgp->pdev->dev, "command %d timed out, result = %d\n", 469 cmd, result); 470 return -EAGAIN; 471 } 472 473 /* 474 * The eeprom strings on the lanaiX have the format 475 * SN=x\0 476 * MAC=x:x:x:x:x:x\0 477 * PT:ddd mmm xx xx:xx:xx xx\0 478 * PV:ddd mmm xx xx:xx:xx xx\0 479 */ 480 static int myri10ge_read_mac_addr(struct myri10ge_priv *mgp) 481 { 482 char *ptr, *limit; 483 int i; 484 485 ptr = mgp->eeprom_strings; 486 limit = mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE; 487 488 while (*ptr != '\0' && ptr < limit) { 489 if (memcmp(ptr, "MAC=", 4) == 0) { 490 ptr += 4; 491 mgp->mac_addr_string = ptr; 492 for (i = 0; i < 6; i++) { 493 if ((ptr + 2) > limit) 494 goto abort; 495 mgp->mac_addr[i] = 496 simple_strtoul(ptr, &ptr, 16); 497 ptr += 1; 498 } 499 } 500 if (memcmp(ptr, "PC=", 3) == 0) { 501 ptr += 3; 502 mgp->product_code_string = ptr; 503 } 504 if (memcmp((const void *)ptr, "SN=", 3) == 0) { 505 ptr += 3; 506 mgp->serial_number = simple_strtoul(ptr, &ptr, 10); 507 } 508 while (ptr < limit && *ptr++) ; 509 } 510 511 return 0; 512 513 abort: 514 dev_err(&mgp->pdev->dev, "failed to parse eeprom_strings\n"); 515 return -ENXIO; 516 } 517 518 /* 519 * Enable or disable periodic RDMAs from the host to make certain 520 * chipsets resend dropped PCIe messages 521 */ 522 523 static void myri10ge_dummy_rdma(struct myri10ge_priv *mgp, int enable) 524 { 525 char __iomem *submit; 526 __be32 buf[16] __attribute__ ((__aligned__(8))); 527 u32 dma_low, dma_high; 528 int i; 529 530 /* clear confirmation addr */ 531 mgp->cmd->data = 0; 532 mb(); 533 534 /* send a rdma command to the PCIe engine, and wait for the 535 * response in the confirmation address. The firmware should 536 * write a -1 there to indicate it is alive and well 537 */ 538 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 539 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 540 541 buf[0] = htonl(dma_high); /* confirm addr MSW */ 542 buf[1] = htonl(dma_low); /* confirm addr LSW */ 543 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 544 buf[3] = htonl(dma_high); /* dummy addr MSW */ 545 buf[4] = htonl(dma_low); /* dummy addr LSW */ 546 buf[5] = htonl(enable); /* enable? */ 547 548 submit = mgp->sram + MXGEFW_BOOT_DUMMY_RDMA; 549 550 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 551 for (i = 0; mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 20; i++) 552 msleep(1); 553 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) 554 dev_err(&mgp->pdev->dev, "dummy rdma %s failed\n", 555 (enable ? "enable" : "disable")); 556 } 557 558 static int 559 myri10ge_validate_firmware(struct myri10ge_priv *mgp, 560 struct mcp_gen_header *hdr) 561 { 562 struct device *dev = &mgp->pdev->dev; 563 564 /* check firmware type */ 565 if (ntohl(hdr->mcp_type) != MCP_TYPE_ETH) { 566 dev_err(dev, "Bad firmware type: 0x%x\n", ntohl(hdr->mcp_type)); 567 return -EINVAL; 568 } 569 570 /* save firmware version for ethtool */ 571 strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version)); 572 mgp->fw_version[sizeof(mgp->fw_version) - 1] = '\0'; 573 574 sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major, 575 &mgp->fw_ver_minor, &mgp->fw_ver_tiny); 576 577 if (!(mgp->fw_ver_major == MXGEFW_VERSION_MAJOR && 578 mgp->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 579 dev_err(dev, "Found firmware version %s\n", mgp->fw_version); 580 dev_err(dev, "Driver needs %d.%d\n", MXGEFW_VERSION_MAJOR, 581 MXGEFW_VERSION_MINOR); 582 return -EINVAL; 583 } 584 return 0; 585 } 586 587 static int myri10ge_load_hotplug_firmware(struct myri10ge_priv *mgp, u32 * size) 588 { 589 unsigned crc, reread_crc; 590 const struct firmware *fw; 591 struct device *dev = &mgp->pdev->dev; 592 unsigned char *fw_readback; 593 struct mcp_gen_header *hdr; 594 size_t hdr_offset; 595 int status; 596 unsigned i; 597 598 if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) { 599 dev_err(dev, "Unable to load %s firmware image via hotplug\n", 600 mgp->fw_name); 601 status = -EINVAL; 602 goto abort_with_nothing; 603 } 604 605 /* check size */ 606 607 if (fw->size >= mgp->sram_size - MYRI10GE_FW_OFFSET || 608 fw->size < MCP_HEADER_PTR_OFFSET + 4) { 609 dev_err(dev, "Firmware size invalid:%d\n", (int)fw->size); 610 status = -EINVAL; 611 goto abort_with_fw; 612 } 613 614 /* check id */ 615 hdr_offset = ntohl(*(__be32 *) (fw->data + MCP_HEADER_PTR_OFFSET)); 616 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->size) { 617 dev_err(dev, "Bad firmware file\n"); 618 status = -EINVAL; 619 goto abort_with_fw; 620 } 621 hdr = (void *)(fw->data + hdr_offset); 622 623 status = myri10ge_validate_firmware(mgp, hdr); 624 if (status != 0) 625 goto abort_with_fw; 626 627 crc = crc32(~0, fw->data, fw->size); 628 for (i = 0; i < fw->size; i += 256) { 629 myri10ge_pio_copy(mgp->sram + MYRI10GE_FW_OFFSET + i, 630 fw->data + i, 631 min(256U, (unsigned)(fw->size - i))); 632 mb(); 633 readb(mgp->sram); 634 } 635 fw_readback = vmalloc(fw->size); 636 if (!fw_readback) { 637 status = -ENOMEM; 638 goto abort_with_fw; 639 } 640 /* corruption checking is good for parity recovery and buggy chipset */ 641 memcpy_fromio(fw_readback, mgp->sram + MYRI10GE_FW_OFFSET, fw->size); 642 reread_crc = crc32(~0, fw_readback, fw->size); 643 vfree(fw_readback); 644 if (crc != reread_crc) { 645 dev_err(dev, "CRC failed(fw-len=%u), got 0x%x (expect 0x%x)\n", 646 (unsigned)fw->size, reread_crc, crc); 647 status = -EIO; 648 goto abort_with_fw; 649 } 650 *size = (u32) fw->size; 651 652 abort_with_fw: 653 release_firmware(fw); 654 655 abort_with_nothing: 656 return status; 657 } 658 659 static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) 660 { 661 struct mcp_gen_header *hdr; 662 struct device *dev = &mgp->pdev->dev; 663 const size_t bytes = sizeof(struct mcp_gen_header); 664 size_t hdr_offset; 665 int status; 666 667 /* find running firmware header */ 668 hdr_offset = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 669 670 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > mgp->sram_size) { 671 dev_err(dev, "Running firmware has bad header offset (%d)\n", 672 (int)hdr_offset); 673 return -EIO; 674 } 675 676 /* copy header of running firmware from SRAM to host memory to 677 * validate firmware */ 678 hdr = kmalloc(bytes, GFP_KERNEL); 679 if (hdr == NULL) 680 return -ENOMEM; 681 682 memcpy_fromio(hdr, mgp->sram + hdr_offset, bytes); 683 status = myri10ge_validate_firmware(mgp, hdr); 684 kfree(hdr); 685 686 /* check to see if adopted firmware has bug where adopting 687 * it will cause broadcasts to be filtered unless the NIC 688 * is kept in ALLMULTI mode */ 689 if (mgp->fw_ver_major == 1 && mgp->fw_ver_minor == 4 && 690 mgp->fw_ver_tiny >= 4 && mgp->fw_ver_tiny <= 11) { 691 mgp->adopted_rx_filter_bug = 1; 692 dev_warn(dev, "Adopting fw %d.%d.%d: " 693 "working around rx filter bug\n", 694 mgp->fw_ver_major, mgp->fw_ver_minor, 695 mgp->fw_ver_tiny); 696 } 697 return status; 698 } 699 700 static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) 701 { 702 struct myri10ge_cmd cmd; 703 int status; 704 705 /* probe for IPv6 TSO support */ 706 mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; 707 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, 708 &cmd, 0); 709 if (status == 0) { 710 mgp->max_tso6 = cmd.data0; 711 mgp->features |= NETIF_F_TSO6; 712 } 713 714 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 715 if (status != 0) { 716 dev_err(&mgp->pdev->dev, 717 "failed MXGEFW_CMD_GET_RX_RING_SIZE\n"); 718 return -ENXIO; 719 } 720 721 mgp->max_intr_slots = 2 * (cmd.data0 / sizeof(struct mcp_dma_addr)); 722 723 return 0; 724 } 725 726 static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) 727 { 728 char __iomem *submit; 729 __be32 buf[16] __attribute__ ((__aligned__(8))); 730 u32 dma_low, dma_high, size; 731 int status, i; 732 733 size = 0; 734 status = myri10ge_load_hotplug_firmware(mgp, &size); 735 if (status) { 736 if (!adopt) 737 return status; 738 dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n"); 739 740 /* Do not attempt to adopt firmware if there 741 * was a bad crc */ 742 if (status == -EIO) 743 return status; 744 745 status = myri10ge_adopt_running_firmware(mgp); 746 if (status != 0) { 747 dev_err(&mgp->pdev->dev, 748 "failed to adopt running firmware\n"); 749 return status; 750 } 751 dev_info(&mgp->pdev->dev, 752 "Successfully adopted running firmware\n"); 753 if (mgp->tx_boundary == 4096) { 754 dev_warn(&mgp->pdev->dev, 755 "Using firmware currently running on NIC" 756 ". For optimal\n"); 757 dev_warn(&mgp->pdev->dev, 758 "performance consider loading optimized " 759 "firmware\n"); 760 dev_warn(&mgp->pdev->dev, "via hotplug\n"); 761 } 762 763 set_fw_name(mgp, "adopted", false); 764 mgp->tx_boundary = 2048; 765 myri10ge_dummy_rdma(mgp, 1); 766 status = myri10ge_get_firmware_capabilities(mgp); 767 return status; 768 } 769 770 /* clear confirmation addr */ 771 mgp->cmd->data = 0; 772 mb(); 773 774 /* send a reload command to the bootstrap MCP, and wait for the 775 * response in the confirmation address. The firmware should 776 * write a -1 there to indicate it is alive and well 777 */ 778 dma_low = MYRI10GE_LOWPART_TO_U32(mgp->cmd_bus); 779 dma_high = MYRI10GE_HIGHPART_TO_U32(mgp->cmd_bus); 780 781 buf[0] = htonl(dma_high); /* confirm addr MSW */ 782 buf[1] = htonl(dma_low); /* confirm addr LSW */ 783 buf[2] = MYRI10GE_NO_CONFIRM_DATA; /* confirm data */ 784 785 /* FIX: All newest firmware should un-protect the bottom of 786 * the sram before handoff. However, the very first interfaces 787 * do not. Therefore the handoff copy must skip the first 8 bytes 788 */ 789 buf[3] = htonl(MYRI10GE_FW_OFFSET + 8); /* where the code starts */ 790 buf[4] = htonl(size - 8); /* length of code */ 791 buf[5] = htonl(8); /* where to copy to */ 792 buf[6] = htonl(0); /* where to jump to */ 793 794 submit = mgp->sram + MXGEFW_BOOT_HANDOFF; 795 796 myri10ge_pio_copy(submit, &buf, sizeof(buf)); 797 mb(); 798 msleep(1); 799 mb(); 800 i = 0; 801 while (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA && i < 9) { 802 msleep(1 << i); 803 i++; 804 } 805 if (mgp->cmd->data != MYRI10GE_NO_CONFIRM_DATA) { 806 dev_err(&mgp->pdev->dev, "handoff failed\n"); 807 return -ENXIO; 808 } 809 myri10ge_dummy_rdma(mgp, 1); 810 status = myri10ge_get_firmware_capabilities(mgp); 811 812 return status; 813 } 814 815 static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr) 816 { 817 struct myri10ge_cmd cmd; 818 int status; 819 820 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 821 | (addr[2] << 8) | addr[3]); 822 823 cmd.data1 = ((addr[4] << 8) | (addr[5])); 824 825 status = myri10ge_send_cmd(mgp, MXGEFW_SET_MAC_ADDRESS, &cmd, 0); 826 return status; 827 } 828 829 static int myri10ge_change_pause(struct myri10ge_priv *mgp, int pause) 830 { 831 struct myri10ge_cmd cmd; 832 int status, ctl; 833 834 ctl = pause ? MXGEFW_ENABLE_FLOW_CONTROL : MXGEFW_DISABLE_FLOW_CONTROL; 835 status = myri10ge_send_cmd(mgp, ctl, &cmd, 0); 836 837 if (status) { 838 netdev_err(mgp->dev, "Failed to set flow control mode\n"); 839 return status; 840 } 841 mgp->pause = pause; 842 return 0; 843 } 844 845 static void 846 myri10ge_change_promisc(struct myri10ge_priv *mgp, int promisc, int atomic) 847 { 848 struct myri10ge_cmd cmd; 849 int status, ctl; 850 851 ctl = promisc ? MXGEFW_ENABLE_PROMISC : MXGEFW_DISABLE_PROMISC; 852 status = myri10ge_send_cmd(mgp, ctl, &cmd, atomic); 853 if (status) 854 netdev_err(mgp->dev, "Failed to set promisc mode\n"); 855 } 856 857 static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) 858 { 859 struct myri10ge_cmd cmd; 860 int status; 861 u32 len; 862 struct page *dmatest_page; 863 dma_addr_t dmatest_bus; 864 char *test = " "; 865 866 dmatest_page = alloc_page(GFP_KERNEL); 867 if (!dmatest_page) 868 return -ENOMEM; 869 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, 870 DMA_BIDIRECTIONAL); 871 if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) { 872 __free_page(dmatest_page); 873 return -ENOMEM; 874 } 875 876 /* Run a small DMA test. 877 * The magic multipliers to the length tell the firmware 878 * to do DMA read, write, or read+write tests. The 879 * results are returned in cmd.data0. The upper 16 880 * bits or the return is the number of transfers completed. 881 * The lower 16 bits is the time in 0.5us ticks that the 882 * transfers took to complete. 883 */ 884 885 len = mgp->tx_boundary; 886 887 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 888 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 889 cmd.data2 = len * 0x10000; 890 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 891 if (status != 0) { 892 test = "read"; 893 goto abort; 894 } 895 mgp->read_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 896 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 897 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 898 cmd.data2 = len * 0x1; 899 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 900 if (status != 0) { 901 test = "write"; 902 goto abort; 903 } 904 mgp->write_dma = ((cmd.data0 >> 16) * len * 2) / (cmd.data0 & 0xffff); 905 906 cmd.data0 = MYRI10GE_LOWPART_TO_U32(dmatest_bus); 907 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(dmatest_bus); 908 cmd.data2 = len * 0x10001; 909 status = myri10ge_send_cmd(mgp, test_type, &cmd, 0); 910 if (status != 0) { 911 test = "read/write"; 912 goto abort; 913 } 914 mgp->read_write_dma = ((cmd.data0 >> 16) * len * 2 * 2) / 915 (cmd.data0 & 0xffff); 916 917 abort: 918 pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL); 919 put_page(dmatest_page); 920 921 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 922 dev_warn(&mgp->pdev->dev, "DMA %s benchmark failed: %d\n", 923 test, status); 924 925 return status; 926 } 927 928 #ifdef CONFIG_NET_RX_BUSY_POLL 929 static inline void myri10ge_ss_init_lock(struct myri10ge_slice_state *ss) 930 { 931 spin_lock_init(&ss->lock); 932 ss->state = SLICE_STATE_IDLE; 933 } 934 935 static inline bool myri10ge_ss_lock_napi(struct myri10ge_slice_state *ss) 936 { 937 bool rc = true; 938 spin_lock(&ss->lock); 939 if ((ss->state & SLICE_LOCKED)) { 940 WARN_ON((ss->state & SLICE_STATE_NAPI)); 941 ss->state |= SLICE_STATE_NAPI_YIELD; 942 rc = false; 943 ss->lock_napi_yield++; 944 } else 945 ss->state = SLICE_STATE_NAPI; 946 spin_unlock(&ss->lock); 947 return rc; 948 } 949 950 static inline void myri10ge_ss_unlock_napi(struct myri10ge_slice_state *ss) 951 { 952 spin_lock(&ss->lock); 953 WARN_ON((ss->state & (SLICE_STATE_POLL | SLICE_STATE_NAPI_YIELD))); 954 ss->state = SLICE_STATE_IDLE; 955 spin_unlock(&ss->lock); 956 } 957 958 static inline bool myri10ge_ss_lock_poll(struct myri10ge_slice_state *ss) 959 { 960 bool rc = true; 961 spin_lock_bh(&ss->lock); 962 if ((ss->state & SLICE_LOCKED)) { 963 ss->state |= SLICE_STATE_POLL_YIELD; 964 rc = false; 965 ss->lock_poll_yield++; 966 } else 967 ss->state |= SLICE_STATE_POLL; 968 spin_unlock_bh(&ss->lock); 969 return rc; 970 } 971 972 static inline void myri10ge_ss_unlock_poll(struct myri10ge_slice_state *ss) 973 { 974 spin_lock_bh(&ss->lock); 975 WARN_ON((ss->state & SLICE_STATE_NAPI)); 976 ss->state = SLICE_STATE_IDLE; 977 spin_unlock_bh(&ss->lock); 978 } 979 980 static inline bool myri10ge_ss_busy_polling(struct myri10ge_slice_state *ss) 981 { 982 WARN_ON(!(ss->state & SLICE_LOCKED)); 983 return (ss->state & SLICE_USER_PEND); 984 } 985 #else /* CONFIG_NET_RX_BUSY_POLL */ 986 static inline void myri10ge_ss_init_lock(struct myri10ge_slice_state *ss) 987 { 988 } 989 990 static inline bool myri10ge_ss_lock_napi(struct myri10ge_slice_state *ss) 991 { 992 return false; 993 } 994 995 static inline void myri10ge_ss_unlock_napi(struct myri10ge_slice_state *ss) 996 { 997 } 998 999 static inline bool myri10ge_ss_lock_poll(struct myri10ge_slice_state *ss) 1000 { 1001 return false; 1002 } 1003 1004 static inline void myri10ge_ss_unlock_poll(struct myri10ge_slice_state *ss) 1005 { 1006 } 1007 1008 static inline bool myri10ge_ss_busy_polling(struct myri10ge_slice_state *ss) 1009 { 1010 return false; 1011 } 1012 #endif 1013 1014 static int myri10ge_reset(struct myri10ge_priv *mgp) 1015 { 1016 struct myri10ge_cmd cmd; 1017 struct myri10ge_slice_state *ss; 1018 int i, status; 1019 size_t bytes; 1020 #ifdef CONFIG_MYRI10GE_DCA 1021 unsigned long dca_tag_off; 1022 #endif 1023 1024 /* try to send a reset command to the card to see if it 1025 * is alive */ 1026 memset(&cmd, 0, sizeof(cmd)); 1027 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 1028 if (status != 0) { 1029 dev_err(&mgp->pdev->dev, "failed reset\n"); 1030 return -ENXIO; 1031 } 1032 1033 (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); 1034 /* 1035 * Use non-ndis mcp_slot (eg, 4 bytes total, 1036 * no toeplitz hash value returned. Older firmware will 1037 * not understand this command, but will use the correct 1038 * sized mcp_slot, so we ignore error returns 1039 */ 1040 cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN; 1041 (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0); 1042 1043 /* Now exchange information about interrupts */ 1044 1045 bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry); 1046 cmd.data0 = (u32) bytes; 1047 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 1048 1049 /* 1050 * Even though we already know how many slices are supported 1051 * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES 1052 * has magic side effects, and must be called after a reset. 1053 * It must be called prior to calling any RSS related cmds, 1054 * including assigning an interrupt queue for anything but 1055 * slice 0. It must also be called *after* 1056 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1057 * the firmware to compute offsets. 1058 */ 1059 1060 if (mgp->num_slices > 1) { 1061 1062 /* ask the maximum number of slices it supports */ 1063 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1064 &cmd, 0); 1065 if (status != 0) { 1066 dev_err(&mgp->pdev->dev, 1067 "failed to get number of slices\n"); 1068 } 1069 1070 /* 1071 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1072 * to setting up the interrupt queue DMA 1073 */ 1074 1075 cmd.data0 = mgp->num_slices; 1076 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1077 if (mgp->dev->real_num_tx_queues > 1) 1078 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1079 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1080 &cmd, 0); 1081 1082 /* Firmware older than 1.4.32 only supports multiple 1083 * RX queues, so if we get an error, first retry using a 1084 * single TX queue before giving up */ 1085 if (status != 0 && mgp->dev->real_num_tx_queues > 1) { 1086 netif_set_real_num_tx_queues(mgp->dev, 1); 1087 cmd.data0 = mgp->num_slices; 1088 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1089 status = myri10ge_send_cmd(mgp, 1090 MXGEFW_CMD_ENABLE_RSS_QUEUES, 1091 &cmd, 0); 1092 } 1093 1094 if (status != 0) { 1095 dev_err(&mgp->pdev->dev, 1096 "failed to set number of slices\n"); 1097 1098 return status; 1099 } 1100 } 1101 for (i = 0; i < mgp->num_slices; i++) { 1102 ss = &mgp->ss[i]; 1103 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus); 1104 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus); 1105 cmd.data2 = i; 1106 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, 1107 &cmd, 0); 1108 } 1109 1110 status |= 1111 myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); 1112 for (i = 0; i < mgp->num_slices; i++) { 1113 ss = &mgp->ss[i]; 1114 ss->irq_claim = 1115 (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i); 1116 } 1117 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1118 &cmd, 0); 1119 mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); 1120 1121 status |= myri10ge_send_cmd 1122 (mgp, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd, 0); 1123 mgp->intr_coal_delay_ptr = (__iomem __be32 *) (mgp->sram + cmd.data0); 1124 if (status != 0) { 1125 dev_err(&mgp->pdev->dev, "failed set interrupt parameters\n"); 1126 return status; 1127 } 1128 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1129 1130 #ifdef CONFIG_MYRI10GE_DCA 1131 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0); 1132 dca_tag_off = cmd.data0; 1133 for (i = 0; i < mgp->num_slices; i++) { 1134 ss = &mgp->ss[i]; 1135 if (status == 0) { 1136 ss->dca_tag = (__iomem __be32 *) 1137 (mgp->sram + dca_tag_off + 4 * i); 1138 } else { 1139 ss->dca_tag = NULL; 1140 } 1141 } 1142 #endif /* CONFIG_MYRI10GE_DCA */ 1143 1144 /* reset mcp/driver shared state back to 0 */ 1145 1146 mgp->link_changes = 0; 1147 for (i = 0; i < mgp->num_slices; i++) { 1148 ss = &mgp->ss[i]; 1149 1150 memset(ss->rx_done.entry, 0, bytes); 1151 ss->tx.req = 0; 1152 ss->tx.done = 0; 1153 ss->tx.pkt_start = 0; 1154 ss->tx.pkt_done = 0; 1155 ss->rx_big.cnt = 0; 1156 ss->rx_small.cnt = 0; 1157 ss->rx_done.idx = 0; 1158 ss->rx_done.cnt = 0; 1159 ss->tx.wake_queue = 0; 1160 ss->tx.stop_queue = 0; 1161 } 1162 1163 status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr); 1164 myri10ge_change_pause(mgp, mgp->pause); 1165 myri10ge_set_multicast_list(mgp->dev); 1166 return status; 1167 } 1168 1169 #ifdef CONFIG_MYRI10GE_DCA 1170 static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on) 1171 { 1172 int ret; 1173 u16 ctl; 1174 1175 pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &ctl); 1176 1177 ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4; 1178 if (ret != on) { 1179 ctl &= ~PCI_EXP_DEVCTL_RELAX_EN; 1180 ctl |= (on << 4); 1181 pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, ctl); 1182 } 1183 return ret; 1184 } 1185 1186 static void 1187 myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag) 1188 { 1189 ss->cached_dca_tag = tag; 1190 put_be32(htonl(tag), ss->dca_tag); 1191 } 1192 1193 static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss) 1194 { 1195 int cpu = get_cpu(); 1196 int tag; 1197 1198 if (cpu != ss->cpu) { 1199 tag = dca3_get_tag(&ss->mgp->pdev->dev, cpu); 1200 if (ss->cached_dca_tag != tag) 1201 myri10ge_write_dca(ss, cpu, tag); 1202 ss->cpu = cpu; 1203 } 1204 put_cpu(); 1205 } 1206 1207 static void myri10ge_setup_dca(struct myri10ge_priv *mgp) 1208 { 1209 int err, i; 1210 struct pci_dev *pdev = mgp->pdev; 1211 1212 if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled) 1213 return; 1214 if (!myri10ge_dca) { 1215 dev_err(&pdev->dev, "dca disabled by administrator\n"); 1216 return; 1217 } 1218 err = dca_add_requester(&pdev->dev); 1219 if (err) { 1220 if (err != -ENODEV) 1221 dev_err(&pdev->dev, 1222 "dca_add_requester() failed, err=%d\n", err); 1223 return; 1224 } 1225 mgp->relaxed_order = myri10ge_toggle_relaxed(pdev, 0); 1226 mgp->dca_enabled = 1; 1227 for (i = 0; i < mgp->num_slices; i++) { 1228 mgp->ss[i].cpu = -1; 1229 mgp->ss[i].cached_dca_tag = -1; 1230 myri10ge_update_dca(&mgp->ss[i]); 1231 } 1232 } 1233 1234 static void myri10ge_teardown_dca(struct myri10ge_priv *mgp) 1235 { 1236 struct pci_dev *pdev = mgp->pdev; 1237 1238 if (!mgp->dca_enabled) 1239 return; 1240 mgp->dca_enabled = 0; 1241 if (mgp->relaxed_order) 1242 myri10ge_toggle_relaxed(pdev, 1); 1243 dca_remove_requester(&pdev->dev); 1244 } 1245 1246 static int myri10ge_notify_dca_device(struct device *dev, void *data) 1247 { 1248 struct myri10ge_priv *mgp; 1249 unsigned long event; 1250 1251 mgp = dev_get_drvdata(dev); 1252 event = *(unsigned long *)data; 1253 1254 if (event == DCA_PROVIDER_ADD) 1255 myri10ge_setup_dca(mgp); 1256 else if (event == DCA_PROVIDER_REMOVE) 1257 myri10ge_teardown_dca(mgp); 1258 return 0; 1259 } 1260 #endif /* CONFIG_MYRI10GE_DCA */ 1261 1262 static inline void 1263 myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, 1264 struct mcp_kreq_ether_recv *src) 1265 { 1266 __be32 low; 1267 1268 low = src->addr_low; 1269 src->addr_low = htonl(DMA_BIT_MASK(32)); 1270 myri10ge_pio_copy(dst, src, 4 * sizeof(*src)); 1271 mb(); 1272 myri10ge_pio_copy(dst + 4, src + 4, 4 * sizeof(*src)); 1273 mb(); 1274 src->addr_low = low; 1275 put_be32(low, &dst->addr_low); 1276 mb(); 1277 } 1278 1279 static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum) 1280 { 1281 struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data); 1282 1283 if ((skb->protocol == htons(ETH_P_8021Q)) && 1284 (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || 1285 vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { 1286 skb->csum = hw_csum; 1287 skb->ip_summed = CHECKSUM_COMPLETE; 1288 } 1289 } 1290 1291 static void 1292 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, 1293 int bytes, int watchdog) 1294 { 1295 struct page *page; 1296 dma_addr_t bus; 1297 int idx; 1298 #if MYRI10GE_ALLOC_SIZE > 4096 1299 int end_offset; 1300 #endif 1301 1302 if (unlikely(rx->watchdog_needed && !watchdog)) 1303 return; 1304 1305 /* try to refill entire ring */ 1306 while (rx->fill_cnt != (rx->cnt + rx->mask + 1)) { 1307 idx = rx->fill_cnt & rx->mask; 1308 if (rx->page_offset + bytes <= MYRI10GE_ALLOC_SIZE) { 1309 /* we can use part of previous page */ 1310 get_page(rx->page); 1311 } else { 1312 /* we need a new page */ 1313 page = 1314 alloc_pages(GFP_ATOMIC | __GFP_COMP, 1315 MYRI10GE_ALLOC_ORDER); 1316 if (unlikely(page == NULL)) { 1317 if (rx->fill_cnt - rx->cnt < 16) 1318 rx->watchdog_needed = 1; 1319 return; 1320 } 1321 1322 bus = pci_map_page(mgp->pdev, page, 0, 1323 MYRI10GE_ALLOC_SIZE, 1324 PCI_DMA_FROMDEVICE); 1325 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { 1326 __free_pages(page, MYRI10GE_ALLOC_ORDER); 1327 if (rx->fill_cnt - rx->cnt < 16) 1328 rx->watchdog_needed = 1; 1329 return; 1330 } 1331 1332 rx->page = page; 1333 rx->page_offset = 0; 1334 rx->bus = bus; 1335 1336 } 1337 rx->info[idx].page = rx->page; 1338 rx->info[idx].page_offset = rx->page_offset; 1339 /* note that this is the address of the start of the 1340 * page */ 1341 dma_unmap_addr_set(&rx->info[idx], bus, rx->bus); 1342 rx->shadow[idx].addr_low = 1343 htonl(MYRI10GE_LOWPART_TO_U32(rx->bus) + rx->page_offset); 1344 rx->shadow[idx].addr_high = 1345 htonl(MYRI10GE_HIGHPART_TO_U32(rx->bus)); 1346 1347 /* start next packet on a cacheline boundary */ 1348 rx->page_offset += SKB_DATA_ALIGN(bytes); 1349 1350 #if MYRI10GE_ALLOC_SIZE > 4096 1351 /* don't cross a 4KB boundary */ 1352 end_offset = rx->page_offset + bytes - 1; 1353 if ((unsigned)(rx->page_offset ^ end_offset) > 4095) 1354 rx->page_offset = end_offset & ~4095; 1355 #endif 1356 rx->fill_cnt++; 1357 1358 /* copy 8 descriptors to the firmware at a time */ 1359 if ((idx & 7) == 7) { 1360 myri10ge_submit_8rx(&rx->lanai[idx - 7], 1361 &rx->shadow[idx - 7]); 1362 } 1363 } 1364 } 1365 1366 static inline void 1367 myri10ge_unmap_rx_page(struct pci_dev *pdev, 1368 struct myri10ge_rx_buffer_state *info, int bytes) 1369 { 1370 /* unmap the recvd page if we're the only or last user of it */ 1371 if (bytes >= MYRI10GE_ALLOC_SIZE / 2 || 1372 (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) { 1373 pci_unmap_page(pdev, (dma_unmap_addr(info, bus) 1374 & ~(MYRI10GE_ALLOC_SIZE - 1)), 1375 MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE); 1376 } 1377 } 1378 1379 /* 1380 * GRO does not support acceleration of tagged vlan frames, and 1381 * this NIC does not support vlan tag offload, so we must pop 1382 * the tag ourselves to be able to achieve GRO performance that 1383 * is comparable to LRO. 1384 */ 1385 1386 static inline void 1387 myri10ge_vlan_rx(struct net_device *dev, void *addr, struct sk_buff *skb) 1388 { 1389 u8 *va; 1390 struct vlan_ethhdr *veh; 1391 struct skb_frag_struct *frag; 1392 __wsum vsum; 1393 1394 va = addr; 1395 va += MXGEFW_PAD; 1396 veh = (struct vlan_ethhdr *)va; 1397 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) == 1398 NETIF_F_HW_VLAN_CTAG_RX && 1399 veh->h_vlan_proto == htons(ETH_P_8021Q)) { 1400 /* fixup csum if needed */ 1401 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1402 vsum = csum_partial(va + ETH_HLEN, VLAN_HLEN, 0); 1403 skb->csum = csum_sub(skb->csum, vsum); 1404 } 1405 /* pop tag */ 1406 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(veh->h_vlan_TCI)); 1407 memmove(va + VLAN_HLEN, va, 2 * ETH_ALEN); 1408 skb->len -= VLAN_HLEN; 1409 skb->data_len -= VLAN_HLEN; 1410 frag = skb_shinfo(skb)->frags; 1411 frag->page_offset += VLAN_HLEN; 1412 skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN); 1413 } 1414 } 1415 1416 #define MYRI10GE_HLEN 64 /* Bytes to copy from page to skb linear memory */ 1417 1418 static inline int 1419 myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) 1420 { 1421 struct myri10ge_priv *mgp = ss->mgp; 1422 struct sk_buff *skb; 1423 struct skb_frag_struct *rx_frags; 1424 struct myri10ge_rx_buf *rx; 1425 int i, idx, remainder, bytes; 1426 struct pci_dev *pdev = mgp->pdev; 1427 struct net_device *dev = mgp->dev; 1428 u8 *va; 1429 bool polling; 1430 1431 if (len <= mgp->small_bytes) { 1432 rx = &ss->rx_small; 1433 bytes = mgp->small_bytes; 1434 } else { 1435 rx = &ss->rx_big; 1436 bytes = mgp->big_bytes; 1437 } 1438 1439 len += MXGEFW_PAD; 1440 idx = rx->cnt & rx->mask; 1441 va = page_address(rx->info[idx].page) + rx->info[idx].page_offset; 1442 prefetch(va); 1443 1444 /* When busy polling in user context, allocate skb and copy headers to 1445 * skb's linear memory ourselves. When not busy polling, use the napi 1446 * gro api. 1447 */ 1448 polling = myri10ge_ss_busy_polling(ss); 1449 if (polling) 1450 skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16); 1451 else 1452 skb = napi_get_frags(&ss->napi); 1453 if (unlikely(skb == NULL)) { 1454 ss->stats.rx_dropped++; 1455 for (i = 0, remainder = len; remainder > 0; i++) { 1456 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1457 put_page(rx->info[idx].page); 1458 rx->cnt++; 1459 idx = rx->cnt & rx->mask; 1460 remainder -= MYRI10GE_ALLOC_SIZE; 1461 } 1462 return 0; 1463 } 1464 rx_frags = skb_shinfo(skb)->frags; 1465 /* Fill skb_frag_struct(s) with data from our receive */ 1466 for (i = 0, remainder = len; remainder > 0; i++) { 1467 myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); 1468 skb_fill_page_desc(skb, i, rx->info[idx].page, 1469 rx->info[idx].page_offset, 1470 remainder < MYRI10GE_ALLOC_SIZE ? 1471 remainder : MYRI10GE_ALLOC_SIZE); 1472 rx->cnt++; 1473 idx = rx->cnt & rx->mask; 1474 remainder -= MYRI10GE_ALLOC_SIZE; 1475 } 1476 1477 /* remove padding */ 1478 rx_frags[0].page_offset += MXGEFW_PAD; 1479 rx_frags[0].size -= MXGEFW_PAD; 1480 len -= MXGEFW_PAD; 1481 1482 skb->len = len; 1483 skb->data_len = len; 1484 skb->truesize += len; 1485 if (dev->features & NETIF_F_RXCSUM) { 1486 skb->ip_summed = CHECKSUM_COMPLETE; 1487 skb->csum = csum; 1488 } 1489 myri10ge_vlan_rx(mgp->dev, va, skb); 1490 skb_record_rx_queue(skb, ss - &mgp->ss[0]); 1491 1492 if (polling) { 1493 int hlen; 1494 1495 /* myri10ge_vlan_rx might have moved the header, so compute 1496 * length and address again. 1497 */ 1498 hlen = MYRI10GE_HLEN > skb->len ? skb->len : MYRI10GE_HLEN; 1499 va = page_address(skb_frag_page(&rx_frags[0])) + 1500 rx_frags[0].page_offset; 1501 /* Copy header into the skb linear memory */ 1502 skb_copy_to_linear_data(skb, va, hlen); 1503 rx_frags[0].page_offset += hlen; 1504 rx_frags[0].size -= hlen; 1505 skb->data_len -= hlen; 1506 skb->tail += hlen; 1507 skb->protocol = eth_type_trans(skb, dev); 1508 skb_mark_napi_id(skb, &ss->napi); 1509 netif_receive_skb(skb); 1510 } 1511 else 1512 napi_gro_frags(&ss->napi); 1513 1514 return 1; 1515 } 1516 1517 static inline void 1518 myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index) 1519 { 1520 struct pci_dev *pdev = ss->mgp->pdev; 1521 struct myri10ge_tx_buf *tx = &ss->tx; 1522 struct netdev_queue *dev_queue; 1523 struct sk_buff *skb; 1524 int idx, len; 1525 1526 while (tx->pkt_done != mcp_index) { 1527 idx = tx->done & tx->mask; 1528 skb = tx->info[idx].skb; 1529 1530 /* Mark as free */ 1531 tx->info[idx].skb = NULL; 1532 if (tx->info[idx].last) { 1533 tx->pkt_done++; 1534 tx->info[idx].last = 0; 1535 } 1536 tx->done++; 1537 len = dma_unmap_len(&tx->info[idx], len); 1538 dma_unmap_len_set(&tx->info[idx], len, 0); 1539 if (skb) { 1540 ss->stats.tx_bytes += skb->len; 1541 ss->stats.tx_packets++; 1542 dev_kfree_skb_irq(skb); 1543 if (len) 1544 pci_unmap_single(pdev, 1545 dma_unmap_addr(&tx->info[idx], 1546 bus), len, 1547 PCI_DMA_TODEVICE); 1548 } else { 1549 if (len) 1550 pci_unmap_page(pdev, 1551 dma_unmap_addr(&tx->info[idx], 1552 bus), len, 1553 PCI_DMA_TODEVICE); 1554 } 1555 } 1556 1557 dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss); 1558 /* 1559 * Make a minimal effort to prevent the NIC from polling an 1560 * idle tx queue. If we can't get the lock we leave the queue 1561 * active. In this case, either a thread was about to start 1562 * using the queue anyway, or we lost a race and the NIC will 1563 * waste some of its resources polling an inactive queue for a 1564 * while. 1565 */ 1566 1567 if ((ss->mgp->dev->real_num_tx_queues > 1) && 1568 __netif_tx_trylock(dev_queue)) { 1569 if (tx->req == tx->done) { 1570 tx->queue_active = 0; 1571 put_be32(htonl(1), tx->send_stop); 1572 mb(); 1573 mmiowb(); 1574 } 1575 __netif_tx_unlock(dev_queue); 1576 } 1577 1578 /* start the queue if we've stopped it */ 1579 if (netif_tx_queue_stopped(dev_queue) && 1580 tx->req - tx->done < (tx->mask >> 1) && 1581 ss->mgp->running == MYRI10GE_ETH_RUNNING) { 1582 tx->wake_queue++; 1583 netif_tx_wake_queue(dev_queue); 1584 } 1585 } 1586 1587 static inline int 1588 myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) 1589 { 1590 struct myri10ge_rx_done *rx_done = &ss->rx_done; 1591 struct myri10ge_priv *mgp = ss->mgp; 1592 unsigned long rx_bytes = 0; 1593 unsigned long rx_packets = 0; 1594 unsigned long rx_ok; 1595 int idx = rx_done->idx; 1596 int cnt = rx_done->cnt; 1597 int work_done = 0; 1598 u16 length; 1599 __wsum checksum; 1600 1601 while (rx_done->entry[idx].length != 0 && work_done < budget) { 1602 length = ntohs(rx_done->entry[idx].length); 1603 rx_done->entry[idx].length = 0; 1604 checksum = csum_unfold(rx_done->entry[idx].checksum); 1605 rx_ok = myri10ge_rx_done(ss, length, checksum); 1606 rx_packets += rx_ok; 1607 rx_bytes += rx_ok * (unsigned long)length; 1608 cnt++; 1609 idx = cnt & (mgp->max_intr_slots - 1); 1610 work_done++; 1611 } 1612 rx_done->idx = idx; 1613 rx_done->cnt = cnt; 1614 ss->stats.rx_packets += rx_packets; 1615 ss->stats.rx_bytes += rx_bytes; 1616 1617 /* restock receive rings if needed */ 1618 if (ss->rx_small.fill_cnt - ss->rx_small.cnt < myri10ge_fill_thresh) 1619 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 1620 mgp->small_bytes + MXGEFW_PAD, 0); 1621 if (ss->rx_big.fill_cnt - ss->rx_big.cnt < myri10ge_fill_thresh) 1622 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 1623 1624 return work_done; 1625 } 1626 1627 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) 1628 { 1629 struct mcp_irq_data *stats = mgp->ss[0].fw_stats; 1630 1631 if (unlikely(stats->stats_updated)) { 1632 unsigned link_up = ntohl(stats->link_up); 1633 if (mgp->link_state != link_up) { 1634 mgp->link_state = link_up; 1635 1636 if (mgp->link_state == MXGEFW_LINK_UP) { 1637 netif_info(mgp, link, mgp->dev, "link up\n"); 1638 netif_carrier_on(mgp->dev); 1639 mgp->link_changes++; 1640 } else { 1641 netif_info(mgp, link, mgp->dev, "link %s\n", 1642 (link_up == MXGEFW_LINK_MYRINET ? 1643 "mismatch (Myrinet detected)" : 1644 "down")); 1645 netif_carrier_off(mgp->dev); 1646 mgp->link_changes++; 1647 } 1648 } 1649 if (mgp->rdma_tags_available != 1650 ntohl(stats->rdma_tags_available)) { 1651 mgp->rdma_tags_available = 1652 ntohl(stats->rdma_tags_available); 1653 netdev_warn(mgp->dev, "RDMA timed out! %d tags left\n", 1654 mgp->rdma_tags_available); 1655 } 1656 mgp->down_cnt += stats->link_down; 1657 if (stats->link_down) 1658 wake_up(&mgp->down_wq); 1659 } 1660 } 1661 1662 static int myri10ge_poll(struct napi_struct *napi, int budget) 1663 { 1664 struct myri10ge_slice_state *ss = 1665 container_of(napi, struct myri10ge_slice_state, napi); 1666 int work_done; 1667 1668 #ifdef CONFIG_MYRI10GE_DCA 1669 if (ss->mgp->dca_enabled) 1670 myri10ge_update_dca(ss); 1671 #endif 1672 /* Try later if the busy_poll handler is running. */ 1673 if (!myri10ge_ss_lock_napi(ss)) 1674 return budget; 1675 1676 /* process as many rx events as NAPI will allow */ 1677 work_done = myri10ge_clean_rx_done(ss, budget); 1678 1679 myri10ge_ss_unlock_napi(ss); 1680 if (work_done < budget) { 1681 napi_complete(napi); 1682 put_be32(htonl(3), ss->irq_claim); 1683 } 1684 return work_done; 1685 } 1686 1687 #ifdef CONFIG_NET_RX_BUSY_POLL 1688 static int myri10ge_busy_poll(struct napi_struct *napi) 1689 { 1690 struct myri10ge_slice_state *ss = 1691 container_of(napi, struct myri10ge_slice_state, napi); 1692 struct myri10ge_priv *mgp = ss->mgp; 1693 int work_done; 1694 1695 /* Poll only when the link is up */ 1696 if (mgp->link_state != MXGEFW_LINK_UP) 1697 return LL_FLUSH_FAILED; 1698 1699 if (!myri10ge_ss_lock_poll(ss)) 1700 return LL_FLUSH_BUSY; 1701 1702 /* Process a small number of packets */ 1703 work_done = myri10ge_clean_rx_done(ss, 4); 1704 if (work_done) 1705 ss->busy_poll_cnt += work_done; 1706 else 1707 ss->busy_poll_miss++; 1708 1709 myri10ge_ss_unlock_poll(ss); 1710 1711 return work_done; 1712 } 1713 #endif /* CONFIG_NET_RX_BUSY_POLL */ 1714 1715 static irqreturn_t myri10ge_intr(int irq, void *arg) 1716 { 1717 struct myri10ge_slice_state *ss = arg; 1718 struct myri10ge_priv *mgp = ss->mgp; 1719 struct mcp_irq_data *stats = ss->fw_stats; 1720 struct myri10ge_tx_buf *tx = &ss->tx; 1721 u32 send_done_count; 1722 int i; 1723 1724 /* an interrupt on a non-zero receive-only slice is implicitly 1725 * valid since MSI-X irqs are not shared */ 1726 if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) { 1727 napi_schedule(&ss->napi); 1728 return IRQ_HANDLED; 1729 } 1730 1731 /* make sure it is our IRQ, and that the DMA has finished */ 1732 if (unlikely(!stats->valid)) 1733 return IRQ_NONE; 1734 1735 /* low bit indicates receives are present, so schedule 1736 * napi poll handler */ 1737 if (stats->valid & 1) 1738 napi_schedule(&ss->napi); 1739 1740 if (!mgp->msi_enabled && !mgp->msix_enabled) { 1741 put_be32(0, mgp->irq_deassert); 1742 if (!myri10ge_deassert_wait) 1743 stats->valid = 0; 1744 mb(); 1745 } else 1746 stats->valid = 0; 1747 1748 /* Wait for IRQ line to go low, if using INTx */ 1749 i = 0; 1750 while (1) { 1751 i++; 1752 /* check for transmit completes and receives */ 1753 send_done_count = ntohl(stats->send_done_count); 1754 if (send_done_count != tx->pkt_done) 1755 myri10ge_tx_done(ss, (int)send_done_count); 1756 if (unlikely(i > myri10ge_max_irq_loops)) { 1757 netdev_warn(mgp->dev, "irq stuck?\n"); 1758 stats->valid = 0; 1759 schedule_work(&mgp->watchdog_work); 1760 } 1761 if (likely(stats->valid == 0)) 1762 break; 1763 cpu_relax(); 1764 barrier(); 1765 } 1766 1767 /* Only slice 0 updates stats */ 1768 if (ss == mgp->ss) 1769 myri10ge_check_statblock(mgp); 1770 1771 put_be32(htonl(3), ss->irq_claim + 1); 1772 return IRQ_HANDLED; 1773 } 1774 1775 static int 1776 myri10ge_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) 1777 { 1778 struct myri10ge_priv *mgp = netdev_priv(netdev); 1779 char *ptr; 1780 int i; 1781 1782 cmd->autoneg = AUTONEG_DISABLE; 1783 ethtool_cmd_speed_set(cmd, SPEED_10000); 1784 cmd->duplex = DUPLEX_FULL; 1785 1786 /* 1787 * parse the product code to deterimine the interface type 1788 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 1789 * after the 3rd dash in the driver's cached copy of the 1790 * EEPROM's product code string. 1791 */ 1792 ptr = mgp->product_code_string; 1793 if (ptr == NULL) { 1794 netdev_err(netdev, "Missing product code\n"); 1795 return 0; 1796 } 1797 for (i = 0; i < 3; i++, ptr++) { 1798 ptr = strchr(ptr, '-'); 1799 if (ptr == NULL) { 1800 netdev_err(netdev, "Invalid product code %s\n", 1801 mgp->product_code_string); 1802 return 0; 1803 } 1804 } 1805 if (*ptr == '2') 1806 ptr++; 1807 if (*ptr == 'R' || *ptr == 'Q' || *ptr == 'S') { 1808 /* We've found either an XFP, quad ribbon fiber, or SFP+ */ 1809 cmd->port = PORT_FIBRE; 1810 cmd->supported |= SUPPORTED_FIBRE; 1811 cmd->advertising |= ADVERTISED_FIBRE; 1812 } else { 1813 cmd->port = PORT_OTHER; 1814 } 1815 if (*ptr == 'R' || *ptr == 'S') 1816 cmd->transceiver = XCVR_EXTERNAL; 1817 else 1818 cmd->transceiver = XCVR_INTERNAL; 1819 1820 return 0; 1821 } 1822 1823 static void 1824 myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) 1825 { 1826 struct myri10ge_priv *mgp = netdev_priv(netdev); 1827 1828 strlcpy(info->driver, "myri10ge", sizeof(info->driver)); 1829 strlcpy(info->version, MYRI10GE_VERSION_STR, sizeof(info->version)); 1830 strlcpy(info->fw_version, mgp->fw_version, sizeof(info->fw_version)); 1831 strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info)); 1832 } 1833 1834 static int 1835 myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1836 { 1837 struct myri10ge_priv *mgp = netdev_priv(netdev); 1838 1839 coal->rx_coalesce_usecs = mgp->intr_coal_delay; 1840 return 0; 1841 } 1842 1843 static int 1844 myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal) 1845 { 1846 struct myri10ge_priv *mgp = netdev_priv(netdev); 1847 1848 mgp->intr_coal_delay = coal->rx_coalesce_usecs; 1849 put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); 1850 return 0; 1851 } 1852 1853 static void 1854 myri10ge_get_pauseparam(struct net_device *netdev, 1855 struct ethtool_pauseparam *pause) 1856 { 1857 struct myri10ge_priv *mgp = netdev_priv(netdev); 1858 1859 pause->autoneg = 0; 1860 pause->rx_pause = mgp->pause; 1861 pause->tx_pause = mgp->pause; 1862 } 1863 1864 static int 1865 myri10ge_set_pauseparam(struct net_device *netdev, 1866 struct ethtool_pauseparam *pause) 1867 { 1868 struct myri10ge_priv *mgp = netdev_priv(netdev); 1869 1870 if (pause->tx_pause != mgp->pause) 1871 return myri10ge_change_pause(mgp, pause->tx_pause); 1872 if (pause->rx_pause != mgp->pause) 1873 return myri10ge_change_pause(mgp, pause->rx_pause); 1874 if (pause->autoneg != 0) 1875 return -EINVAL; 1876 return 0; 1877 } 1878 1879 static void 1880 myri10ge_get_ringparam(struct net_device *netdev, 1881 struct ethtool_ringparam *ring) 1882 { 1883 struct myri10ge_priv *mgp = netdev_priv(netdev); 1884 1885 ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1; 1886 ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1; 1887 ring->rx_jumbo_max_pending = 0; 1888 ring->tx_max_pending = mgp->ss[0].tx.mask + 1; 1889 ring->rx_mini_pending = ring->rx_mini_max_pending; 1890 ring->rx_pending = ring->rx_max_pending; 1891 ring->rx_jumbo_pending = ring->rx_jumbo_max_pending; 1892 ring->tx_pending = ring->tx_max_pending; 1893 } 1894 1895 static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { 1896 "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", 1897 "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", 1898 "rx_length_errors", "rx_over_errors", "rx_crc_errors", 1899 "rx_frame_errors", "rx_fifo_errors", "rx_missed_errors", 1900 "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", 1901 "tx_heartbeat_errors", "tx_window_errors", 1902 /* device-specific stats */ 1903 "tx_boundary", "irq", "MSI", "MSIX", 1904 "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", 1905 "serial_number", "watchdog_resets", 1906 #ifdef CONFIG_MYRI10GE_DCA 1907 "dca_capable_firmware", "dca_device_present", 1908 #endif 1909 "link_changes", "link_up", "dropped_link_overflow", 1910 "dropped_link_error_or_filtered", 1911 "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", 1912 "dropped_unicast_filtered", "dropped_multicast_filtered", 1913 "dropped_runt", "dropped_overrun", "dropped_no_small_buffer", 1914 "dropped_no_big_buffer" 1915 }; 1916 1917 static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = { 1918 "----------- slice ---------", 1919 "tx_pkt_start", "tx_pkt_done", "tx_req", "tx_done", 1920 "rx_small_cnt", "rx_big_cnt", 1921 "wake_queue", "stop_queue", "tx_linearized", 1922 #ifdef CONFIG_NET_RX_BUSY_POLL 1923 "rx_lock_napi_yield", "rx_lock_poll_yield", "rx_busy_poll_miss", 1924 "rx_busy_poll_cnt", 1925 #endif 1926 }; 1927 1928 #define MYRI10GE_NET_STATS_LEN 21 1929 #define MYRI10GE_MAIN_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_main_stats) 1930 #define MYRI10GE_SLICE_STATS_LEN ARRAY_SIZE(myri10ge_gstrings_slice_stats) 1931 1932 static void 1933 myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data) 1934 { 1935 struct myri10ge_priv *mgp = netdev_priv(netdev); 1936 int i; 1937 1938 switch (stringset) { 1939 case ETH_SS_STATS: 1940 memcpy(data, *myri10ge_gstrings_main_stats, 1941 sizeof(myri10ge_gstrings_main_stats)); 1942 data += sizeof(myri10ge_gstrings_main_stats); 1943 for (i = 0; i < mgp->num_slices; i++) { 1944 memcpy(data, *myri10ge_gstrings_slice_stats, 1945 sizeof(myri10ge_gstrings_slice_stats)); 1946 data += sizeof(myri10ge_gstrings_slice_stats); 1947 } 1948 break; 1949 } 1950 } 1951 1952 static int myri10ge_get_sset_count(struct net_device *netdev, int sset) 1953 { 1954 struct myri10ge_priv *mgp = netdev_priv(netdev); 1955 1956 switch (sset) { 1957 case ETH_SS_STATS: 1958 return MYRI10GE_MAIN_STATS_LEN + 1959 mgp->num_slices * MYRI10GE_SLICE_STATS_LEN; 1960 default: 1961 return -EOPNOTSUPP; 1962 } 1963 } 1964 1965 static void 1966 myri10ge_get_ethtool_stats(struct net_device *netdev, 1967 struct ethtool_stats *stats, u64 * data) 1968 { 1969 struct myri10ge_priv *mgp = netdev_priv(netdev); 1970 struct myri10ge_slice_state *ss; 1971 struct rtnl_link_stats64 link_stats; 1972 int slice; 1973 int i; 1974 1975 /* force stats update */ 1976 memset(&link_stats, 0, sizeof(link_stats)); 1977 (void)myri10ge_get_stats(netdev, &link_stats); 1978 for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++) 1979 data[i] = ((u64 *)&link_stats)[i]; 1980 1981 data[i++] = (unsigned int)mgp->tx_boundary; 1982 data[i++] = (unsigned int)mgp->pdev->irq; 1983 data[i++] = (unsigned int)mgp->msi_enabled; 1984 data[i++] = (unsigned int)mgp->msix_enabled; 1985 data[i++] = (unsigned int)mgp->read_dma; 1986 data[i++] = (unsigned int)mgp->write_dma; 1987 data[i++] = (unsigned int)mgp->read_write_dma; 1988 data[i++] = (unsigned int)mgp->serial_number; 1989 data[i++] = (unsigned int)mgp->watchdog_resets; 1990 #ifdef CONFIG_MYRI10GE_DCA 1991 data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL); 1992 data[i++] = (unsigned int)(mgp->dca_enabled); 1993 #endif 1994 data[i++] = (unsigned int)mgp->link_changes; 1995 1996 /* firmware stats are useful only in the first slice */ 1997 ss = &mgp->ss[0]; 1998 data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up); 1999 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow); 2000 data[i++] = 2001 (unsigned int)ntohl(ss->fw_stats->dropped_link_error_or_filtered); 2002 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_pause); 2003 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_phy); 2004 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_bad_crc32); 2005 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_unicast_filtered); 2006 data[i++] = 2007 (unsigned int)ntohl(ss->fw_stats->dropped_multicast_filtered); 2008 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_runt); 2009 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_overrun); 2010 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer); 2011 data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer); 2012 2013 for (slice = 0; slice < mgp->num_slices; slice++) { 2014 ss = &mgp->ss[slice]; 2015 data[i++] = slice; 2016 data[i++] = (unsigned int)ss->tx.pkt_start; 2017 data[i++] = (unsigned int)ss->tx.pkt_done; 2018 data[i++] = (unsigned int)ss->tx.req; 2019 data[i++] = (unsigned int)ss->tx.done; 2020 data[i++] = (unsigned int)ss->rx_small.cnt; 2021 data[i++] = (unsigned int)ss->rx_big.cnt; 2022 data[i++] = (unsigned int)ss->tx.wake_queue; 2023 data[i++] = (unsigned int)ss->tx.stop_queue; 2024 data[i++] = (unsigned int)ss->tx.linearized; 2025 #ifdef CONFIG_NET_RX_BUSY_POLL 2026 data[i++] = ss->lock_napi_yield; 2027 data[i++] = ss->lock_poll_yield; 2028 data[i++] = ss->busy_poll_miss; 2029 data[i++] = ss->busy_poll_cnt; 2030 #endif 2031 } 2032 } 2033 2034 static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) 2035 { 2036 struct myri10ge_priv *mgp = netdev_priv(netdev); 2037 mgp->msg_enable = value; 2038 } 2039 2040 static u32 myri10ge_get_msglevel(struct net_device *netdev) 2041 { 2042 struct myri10ge_priv *mgp = netdev_priv(netdev); 2043 return mgp->msg_enable; 2044 } 2045 2046 /* 2047 * Use a low-level command to change the LED behavior. Rather than 2048 * blinking (which is the normal case), when identify is used, the 2049 * yellow LED turns solid. 2050 */ 2051 static int myri10ge_led(struct myri10ge_priv *mgp, int on) 2052 { 2053 struct mcp_gen_header *hdr; 2054 struct device *dev = &mgp->pdev->dev; 2055 size_t hdr_off, pattern_off, hdr_len; 2056 u32 pattern = 0xfffffffe; 2057 2058 /* find running firmware header */ 2059 hdr_off = swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)); 2060 if ((hdr_off & 3) || hdr_off + sizeof(*hdr) > mgp->sram_size) { 2061 dev_err(dev, "Running firmware has bad header offset (%d)\n", 2062 (int)hdr_off); 2063 return -EIO; 2064 } 2065 hdr_len = swab32(readl(mgp->sram + hdr_off + 2066 offsetof(struct mcp_gen_header, header_length))); 2067 pattern_off = hdr_off + offsetof(struct mcp_gen_header, led_pattern); 2068 if (pattern_off >= (hdr_len + hdr_off)) { 2069 dev_info(dev, "Firmware does not support LED identification\n"); 2070 return -EINVAL; 2071 } 2072 if (!on) 2073 pattern = swab32(readl(mgp->sram + pattern_off + 4)); 2074 writel(swab32(pattern), mgp->sram + pattern_off); 2075 return 0; 2076 } 2077 2078 static int 2079 myri10ge_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) 2080 { 2081 struct myri10ge_priv *mgp = netdev_priv(netdev); 2082 int rc; 2083 2084 switch (state) { 2085 case ETHTOOL_ID_ACTIVE: 2086 rc = myri10ge_led(mgp, 1); 2087 break; 2088 2089 case ETHTOOL_ID_INACTIVE: 2090 rc = myri10ge_led(mgp, 0); 2091 break; 2092 2093 default: 2094 rc = -EINVAL; 2095 } 2096 2097 return rc; 2098 } 2099 2100 static const struct ethtool_ops myri10ge_ethtool_ops = { 2101 .get_settings = myri10ge_get_settings, 2102 .get_drvinfo = myri10ge_get_drvinfo, 2103 .get_coalesce = myri10ge_get_coalesce, 2104 .set_coalesce = myri10ge_set_coalesce, 2105 .get_pauseparam = myri10ge_get_pauseparam, 2106 .set_pauseparam = myri10ge_set_pauseparam, 2107 .get_ringparam = myri10ge_get_ringparam, 2108 .get_link = ethtool_op_get_link, 2109 .get_strings = myri10ge_get_strings, 2110 .get_sset_count = myri10ge_get_sset_count, 2111 .get_ethtool_stats = myri10ge_get_ethtool_stats, 2112 .set_msglevel = myri10ge_set_msglevel, 2113 .get_msglevel = myri10ge_get_msglevel, 2114 .set_phys_id = myri10ge_phys_id, 2115 }; 2116 2117 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) 2118 { 2119 struct myri10ge_priv *mgp = ss->mgp; 2120 struct myri10ge_cmd cmd; 2121 struct net_device *dev = mgp->dev; 2122 int tx_ring_size, rx_ring_size; 2123 int tx_ring_entries, rx_ring_entries; 2124 int i, slice, status; 2125 size_t bytes; 2126 2127 /* get ring sizes */ 2128 slice = ss - mgp->ss; 2129 cmd.data0 = slice; 2130 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0); 2131 tx_ring_size = cmd.data0; 2132 cmd.data0 = slice; 2133 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); 2134 if (status != 0) 2135 return status; 2136 rx_ring_size = cmd.data0; 2137 2138 tx_ring_entries = tx_ring_size / sizeof(struct mcp_kreq_ether_send); 2139 rx_ring_entries = rx_ring_size / sizeof(struct mcp_dma_addr); 2140 ss->tx.mask = tx_ring_entries - 1; 2141 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 2142 2143 status = -ENOMEM; 2144 2145 /* allocate the host shadow rings */ 2146 2147 bytes = 8 + (MYRI10GE_MAX_SEND_DESC_TSO + 4) 2148 * sizeof(*ss->tx.req_list); 2149 ss->tx.req_bytes = kzalloc(bytes, GFP_KERNEL); 2150 if (ss->tx.req_bytes == NULL) 2151 goto abort_with_nothing; 2152 2153 /* ensure req_list entries are aligned to 8 bytes */ 2154 ss->tx.req_list = (struct mcp_kreq_ether_send *) 2155 ALIGN((unsigned long)ss->tx.req_bytes, 8); 2156 ss->tx.queue_active = 0; 2157 2158 bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow); 2159 ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL); 2160 if (ss->rx_small.shadow == NULL) 2161 goto abort_with_tx_req_bytes; 2162 2163 bytes = rx_ring_entries * sizeof(*ss->rx_big.shadow); 2164 ss->rx_big.shadow = kzalloc(bytes, GFP_KERNEL); 2165 if (ss->rx_big.shadow == NULL) 2166 goto abort_with_rx_small_shadow; 2167 2168 /* allocate the host info rings */ 2169 2170 bytes = tx_ring_entries * sizeof(*ss->tx.info); 2171 ss->tx.info = kzalloc(bytes, GFP_KERNEL); 2172 if (ss->tx.info == NULL) 2173 goto abort_with_rx_big_shadow; 2174 2175 bytes = rx_ring_entries * sizeof(*ss->rx_small.info); 2176 ss->rx_small.info = kzalloc(bytes, GFP_KERNEL); 2177 if (ss->rx_small.info == NULL) 2178 goto abort_with_tx_info; 2179 2180 bytes = rx_ring_entries * sizeof(*ss->rx_big.info); 2181 ss->rx_big.info = kzalloc(bytes, GFP_KERNEL); 2182 if (ss->rx_big.info == NULL) 2183 goto abort_with_rx_small_info; 2184 2185 /* Fill the receive rings */ 2186 ss->rx_big.cnt = 0; 2187 ss->rx_small.cnt = 0; 2188 ss->rx_big.fill_cnt = 0; 2189 ss->rx_small.fill_cnt = 0; 2190 ss->rx_small.page_offset = MYRI10GE_ALLOC_SIZE; 2191 ss->rx_big.page_offset = MYRI10GE_ALLOC_SIZE; 2192 ss->rx_small.watchdog_needed = 0; 2193 ss->rx_big.watchdog_needed = 0; 2194 if (mgp->small_bytes == 0) { 2195 ss->rx_small.fill_cnt = ss->rx_small.mask + 1; 2196 } else { 2197 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 2198 mgp->small_bytes + MXGEFW_PAD, 0); 2199 } 2200 2201 if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) { 2202 netdev_err(dev, "slice-%d: alloced only %d small bufs\n", 2203 slice, ss->rx_small.fill_cnt); 2204 goto abort_with_rx_small_ring; 2205 } 2206 2207 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); 2208 if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) { 2209 netdev_err(dev, "slice-%d: alloced only %d big bufs\n", 2210 slice, ss->rx_big.fill_cnt); 2211 goto abort_with_rx_big_ring; 2212 } 2213 2214 return 0; 2215 2216 abort_with_rx_big_ring: 2217 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2218 int idx = i & ss->rx_big.mask; 2219 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2220 mgp->big_bytes); 2221 put_page(ss->rx_big.info[idx].page); 2222 } 2223 2224 abort_with_rx_small_ring: 2225 if (mgp->small_bytes == 0) 2226 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2227 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2228 int idx = i & ss->rx_small.mask; 2229 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2230 mgp->small_bytes + MXGEFW_PAD); 2231 put_page(ss->rx_small.info[idx].page); 2232 } 2233 2234 kfree(ss->rx_big.info); 2235 2236 abort_with_rx_small_info: 2237 kfree(ss->rx_small.info); 2238 2239 abort_with_tx_info: 2240 kfree(ss->tx.info); 2241 2242 abort_with_rx_big_shadow: 2243 kfree(ss->rx_big.shadow); 2244 2245 abort_with_rx_small_shadow: 2246 kfree(ss->rx_small.shadow); 2247 2248 abort_with_tx_req_bytes: 2249 kfree(ss->tx.req_bytes); 2250 ss->tx.req_bytes = NULL; 2251 ss->tx.req_list = NULL; 2252 2253 abort_with_nothing: 2254 return status; 2255 } 2256 2257 static void myri10ge_free_rings(struct myri10ge_slice_state *ss) 2258 { 2259 struct myri10ge_priv *mgp = ss->mgp; 2260 struct sk_buff *skb; 2261 struct myri10ge_tx_buf *tx; 2262 int i, len, idx; 2263 2264 /* If not allocated, skip it */ 2265 if (ss->tx.req_list == NULL) 2266 return; 2267 2268 for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { 2269 idx = i & ss->rx_big.mask; 2270 if (i == ss->rx_big.fill_cnt - 1) 2271 ss->rx_big.info[idx].page_offset = MYRI10GE_ALLOC_SIZE; 2272 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_big.info[idx], 2273 mgp->big_bytes); 2274 put_page(ss->rx_big.info[idx].page); 2275 } 2276 2277 if (mgp->small_bytes == 0) 2278 ss->rx_small.fill_cnt = ss->rx_small.cnt; 2279 for (i = ss->rx_small.cnt; i < ss->rx_small.fill_cnt; i++) { 2280 idx = i & ss->rx_small.mask; 2281 if (i == ss->rx_small.fill_cnt - 1) 2282 ss->rx_small.info[idx].page_offset = 2283 MYRI10GE_ALLOC_SIZE; 2284 myri10ge_unmap_rx_page(mgp->pdev, &ss->rx_small.info[idx], 2285 mgp->small_bytes + MXGEFW_PAD); 2286 put_page(ss->rx_small.info[idx].page); 2287 } 2288 tx = &ss->tx; 2289 while (tx->done != tx->req) { 2290 idx = tx->done & tx->mask; 2291 skb = tx->info[idx].skb; 2292 2293 /* Mark as free */ 2294 tx->info[idx].skb = NULL; 2295 tx->done++; 2296 len = dma_unmap_len(&tx->info[idx], len); 2297 dma_unmap_len_set(&tx->info[idx], len, 0); 2298 if (skb) { 2299 ss->stats.tx_dropped++; 2300 dev_kfree_skb_any(skb); 2301 if (len) 2302 pci_unmap_single(mgp->pdev, 2303 dma_unmap_addr(&tx->info[idx], 2304 bus), len, 2305 PCI_DMA_TODEVICE); 2306 } else { 2307 if (len) 2308 pci_unmap_page(mgp->pdev, 2309 dma_unmap_addr(&tx->info[idx], 2310 bus), len, 2311 PCI_DMA_TODEVICE); 2312 } 2313 } 2314 kfree(ss->rx_big.info); 2315 2316 kfree(ss->rx_small.info); 2317 2318 kfree(ss->tx.info); 2319 2320 kfree(ss->rx_big.shadow); 2321 2322 kfree(ss->rx_small.shadow); 2323 2324 kfree(ss->tx.req_bytes); 2325 ss->tx.req_bytes = NULL; 2326 ss->tx.req_list = NULL; 2327 } 2328 2329 static int myri10ge_request_irq(struct myri10ge_priv *mgp) 2330 { 2331 struct pci_dev *pdev = mgp->pdev; 2332 struct myri10ge_slice_state *ss; 2333 struct net_device *netdev = mgp->dev; 2334 int i; 2335 int status; 2336 2337 mgp->msi_enabled = 0; 2338 mgp->msix_enabled = 0; 2339 status = 0; 2340 if (myri10ge_msi) { 2341 if (mgp->num_slices > 1) { 2342 status = pci_enable_msix_range(pdev, mgp->msix_vectors, 2343 mgp->num_slices, mgp->num_slices); 2344 if (status < 0) { 2345 dev_err(&pdev->dev, 2346 "Error %d setting up MSI-X\n", status); 2347 return status; 2348 } 2349 mgp->msix_enabled = 1; 2350 } 2351 if (mgp->msix_enabled == 0) { 2352 status = pci_enable_msi(pdev); 2353 if (status != 0) { 2354 dev_err(&pdev->dev, 2355 "Error %d setting up MSI; falling back to xPIC\n", 2356 status); 2357 } else { 2358 mgp->msi_enabled = 1; 2359 } 2360 } 2361 } 2362 if (mgp->msix_enabled) { 2363 for (i = 0; i < mgp->num_slices; i++) { 2364 ss = &mgp->ss[i]; 2365 snprintf(ss->irq_desc, sizeof(ss->irq_desc), 2366 "%s:slice-%d", netdev->name, i); 2367 status = request_irq(mgp->msix_vectors[i].vector, 2368 myri10ge_intr, 0, ss->irq_desc, 2369 ss); 2370 if (status != 0) { 2371 dev_err(&pdev->dev, 2372 "slice %d failed to allocate IRQ\n", i); 2373 i--; 2374 while (i >= 0) { 2375 free_irq(mgp->msix_vectors[i].vector, 2376 &mgp->ss[i]); 2377 i--; 2378 } 2379 pci_disable_msix(pdev); 2380 return status; 2381 } 2382 } 2383 } else { 2384 status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, 2385 mgp->dev->name, &mgp->ss[0]); 2386 if (status != 0) { 2387 dev_err(&pdev->dev, "failed to allocate IRQ\n"); 2388 if (mgp->msi_enabled) 2389 pci_disable_msi(pdev); 2390 } 2391 } 2392 return status; 2393 } 2394 2395 static void myri10ge_free_irq(struct myri10ge_priv *mgp) 2396 { 2397 struct pci_dev *pdev = mgp->pdev; 2398 int i; 2399 2400 if (mgp->msix_enabled) { 2401 for (i = 0; i < mgp->num_slices; i++) 2402 free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]); 2403 } else { 2404 free_irq(pdev->irq, &mgp->ss[0]); 2405 } 2406 if (mgp->msi_enabled) 2407 pci_disable_msi(pdev); 2408 if (mgp->msix_enabled) 2409 pci_disable_msix(pdev); 2410 } 2411 2412 static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice) 2413 { 2414 struct myri10ge_cmd cmd; 2415 struct myri10ge_slice_state *ss; 2416 int status; 2417 2418 ss = &mgp->ss[slice]; 2419 status = 0; 2420 if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) { 2421 cmd.data0 = slice; 2422 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, 2423 &cmd, 0); 2424 ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) 2425 (mgp->sram + cmd.data0); 2426 } 2427 cmd.data0 = slice; 2428 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, 2429 &cmd, 0); 2430 ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *) 2431 (mgp->sram + cmd.data0); 2432 2433 cmd.data0 = slice; 2434 status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); 2435 ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) 2436 (mgp->sram + cmd.data0); 2437 2438 ss->tx.send_go = (__iomem __be32 *) 2439 (mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 2440 ss->tx.send_stop = (__iomem __be32 *) 2441 (mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 2442 return status; 2443 2444 } 2445 2446 static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice) 2447 { 2448 struct myri10ge_cmd cmd; 2449 struct myri10ge_slice_state *ss; 2450 int status; 2451 2452 ss = &mgp->ss[slice]; 2453 cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); 2454 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); 2455 cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16); 2456 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); 2457 if (status == -ENOSYS) { 2458 dma_addr_t bus = ss->fw_stats_bus; 2459 if (slice != 0) 2460 return -EINVAL; 2461 bus += offsetof(struct mcp_irq_data, send_done_count); 2462 cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); 2463 cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); 2464 status = myri10ge_send_cmd(mgp, 2465 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2466 &cmd, 0); 2467 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2468 mgp->fw_multicast_support = 0; 2469 } else { 2470 mgp->fw_multicast_support = 1; 2471 } 2472 return 0; 2473 } 2474 2475 static int myri10ge_open(struct net_device *dev) 2476 { 2477 struct myri10ge_slice_state *ss; 2478 struct myri10ge_priv *mgp = netdev_priv(dev); 2479 struct myri10ge_cmd cmd; 2480 int i, status, big_pow2, slice; 2481 u8 __iomem *itable; 2482 2483 if (mgp->running != MYRI10GE_ETH_STOPPED) 2484 return -EBUSY; 2485 2486 mgp->running = MYRI10GE_ETH_STARTING; 2487 status = myri10ge_reset(mgp); 2488 if (status != 0) { 2489 netdev_err(dev, "failed reset\n"); 2490 goto abort_with_nothing; 2491 } 2492 2493 if (mgp->num_slices > 1) { 2494 cmd.data0 = mgp->num_slices; 2495 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 2496 if (mgp->dev->real_num_tx_queues > 1) 2497 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 2498 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, 2499 &cmd, 0); 2500 if (status != 0) { 2501 netdev_err(dev, "failed to set number of slices\n"); 2502 goto abort_with_nothing; 2503 } 2504 /* setup the indirection table */ 2505 cmd.data0 = mgp->num_slices; 2506 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 2507 &cmd, 0); 2508 2509 status |= myri10ge_send_cmd(mgp, 2510 MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 2511 &cmd, 0); 2512 if (status != 0) { 2513 netdev_err(dev, "failed to setup rss tables\n"); 2514 goto abort_with_nothing; 2515 } 2516 2517 /* just enable an identity mapping */ 2518 itable = mgp->sram + cmd.data0; 2519 for (i = 0; i < mgp->num_slices; i++) 2520 __raw_writeb(i, &itable[i]); 2521 2522 cmd.data0 = 1; 2523 cmd.data1 = myri10ge_rss_hash; 2524 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, 2525 &cmd, 0); 2526 if (status != 0) { 2527 netdev_err(dev, "failed to enable slices\n"); 2528 goto abort_with_nothing; 2529 } 2530 } 2531 2532 status = myri10ge_request_irq(mgp); 2533 if (status != 0) 2534 goto abort_with_nothing; 2535 2536 /* decide what small buffer size to use. For good TCP rx 2537 * performance, it is important to not receive 1514 byte 2538 * frames into jumbo buffers, as it confuses the socket buffer 2539 * accounting code, leading to drops and erratic performance. 2540 */ 2541 2542 if (dev->mtu <= ETH_DATA_LEN) 2543 /* enough for a TCP header */ 2544 mgp->small_bytes = (128 > SMP_CACHE_BYTES) 2545 ? (128 - MXGEFW_PAD) 2546 : (SMP_CACHE_BYTES - MXGEFW_PAD); 2547 else 2548 /* enough for a vlan encapsulated ETH_DATA_LEN frame */ 2549 mgp->small_bytes = VLAN_ETH_FRAME_LEN; 2550 2551 /* Override the small buffer size? */ 2552 if (myri10ge_small_bytes >= 0) 2553 mgp->small_bytes = myri10ge_small_bytes; 2554 2555 /* Firmware needs the big buff size as a power of 2. Lie and 2556 * tell him the buffer is larger, because we only use 1 2557 * buffer/pkt, and the mtu will prevent overruns. 2558 */ 2559 big_pow2 = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2560 if (big_pow2 < MYRI10GE_ALLOC_SIZE / 2) { 2561 while (!is_power_of_2(big_pow2)) 2562 big_pow2++; 2563 mgp->big_bytes = dev->mtu + ETH_HLEN + VLAN_HLEN + MXGEFW_PAD; 2564 } else { 2565 big_pow2 = MYRI10GE_ALLOC_SIZE; 2566 mgp->big_bytes = big_pow2; 2567 } 2568 2569 /* setup the per-slice data structures */ 2570 for (slice = 0; slice < mgp->num_slices; slice++) { 2571 ss = &mgp->ss[slice]; 2572 2573 status = myri10ge_get_txrx(mgp, slice); 2574 if (status != 0) { 2575 netdev_err(dev, "failed to get ring sizes or locations\n"); 2576 goto abort_with_rings; 2577 } 2578 status = myri10ge_allocate_rings(ss); 2579 if (status != 0) 2580 goto abort_with_rings; 2581 2582 /* only firmware which supports multiple TX queues 2583 * supports setting up the tx stats on non-zero 2584 * slices */ 2585 if (slice == 0 || mgp->dev->real_num_tx_queues > 1) 2586 status = myri10ge_set_stats(mgp, slice); 2587 if (status) { 2588 netdev_err(dev, "Couldn't set stats DMA\n"); 2589 goto abort_with_rings; 2590 } 2591 2592 /* Initialize the slice spinlock and state used for polling */ 2593 myri10ge_ss_init_lock(ss); 2594 2595 /* must happen prior to any irq */ 2596 napi_enable(&(ss)->napi); 2597 } 2598 2599 /* now give firmware buffers sizes, and MTU */ 2600 cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; 2601 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_MTU, &cmd, 0); 2602 cmd.data0 = mgp->small_bytes; 2603 status |= 2604 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd, 0); 2605 cmd.data0 = big_pow2; 2606 status |= 2607 myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd, 0); 2608 if (status) { 2609 netdev_err(dev, "Couldn't set buffer sizes\n"); 2610 goto abort_with_rings; 2611 } 2612 2613 /* 2614 * Set Linux style TSO mode; this is needed only on newer 2615 * firmware versions. Older versions default to Linux 2616 * style TSO 2617 */ 2618 cmd.data0 = 0; 2619 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0); 2620 if (status && status != -ENOSYS) { 2621 netdev_err(dev, "Couldn't set TSO mode\n"); 2622 goto abort_with_rings; 2623 } 2624 2625 mgp->link_state = ~0U; 2626 mgp->rdma_tags_available = 15; 2627 2628 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); 2629 if (status) { 2630 netdev_err(dev, "Couldn't bring up link\n"); 2631 goto abort_with_rings; 2632 } 2633 2634 mgp->running = MYRI10GE_ETH_RUNNING; 2635 mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; 2636 add_timer(&mgp->watchdog_timer); 2637 netif_tx_wake_all_queues(dev); 2638 2639 return 0; 2640 2641 abort_with_rings: 2642 while (slice) { 2643 slice--; 2644 napi_disable(&mgp->ss[slice].napi); 2645 } 2646 for (i = 0; i < mgp->num_slices; i++) 2647 myri10ge_free_rings(&mgp->ss[i]); 2648 2649 myri10ge_free_irq(mgp); 2650 2651 abort_with_nothing: 2652 mgp->running = MYRI10GE_ETH_STOPPED; 2653 return -ENOMEM; 2654 } 2655 2656 static int myri10ge_close(struct net_device *dev) 2657 { 2658 struct myri10ge_priv *mgp = netdev_priv(dev); 2659 struct myri10ge_cmd cmd; 2660 int status, old_down_cnt; 2661 int i; 2662 2663 if (mgp->running != MYRI10GE_ETH_RUNNING) 2664 return 0; 2665 2666 if (mgp->ss[0].tx.req_bytes == NULL) 2667 return 0; 2668 2669 del_timer_sync(&mgp->watchdog_timer); 2670 mgp->running = MYRI10GE_ETH_STOPPING; 2671 for (i = 0; i < mgp->num_slices; i++) { 2672 napi_disable(&mgp->ss[i].napi); 2673 local_bh_disable(); /* myri10ge_ss_lock_napi needs this */ 2674 /* Lock the slice to prevent the busy_poll handler from 2675 * accessing it. Later when we bring the NIC up, myri10ge_open 2676 * resets the slice including this lock. 2677 */ 2678 while (!myri10ge_ss_lock_napi(&mgp->ss[i])) { 2679 pr_info("Slice %d locked\n", i); 2680 mdelay(1); 2681 } 2682 local_bh_enable(); 2683 } 2684 netif_carrier_off(dev); 2685 2686 netif_tx_stop_all_queues(dev); 2687 if (mgp->rebooted == 0) { 2688 old_down_cnt = mgp->down_cnt; 2689 mb(); 2690 status = 2691 myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); 2692 if (status) 2693 netdev_err(dev, "Couldn't bring down link\n"); 2694 2695 wait_event_timeout(mgp->down_wq, old_down_cnt != mgp->down_cnt, 2696 HZ); 2697 if (old_down_cnt == mgp->down_cnt) 2698 netdev_err(dev, "never got down irq\n"); 2699 } 2700 netif_tx_disable(dev); 2701 myri10ge_free_irq(mgp); 2702 for (i = 0; i < mgp->num_slices; i++) 2703 myri10ge_free_rings(&mgp->ss[i]); 2704 2705 mgp->running = MYRI10GE_ETH_STOPPED; 2706 return 0; 2707 } 2708 2709 /* copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2710 * backwards one at a time and handle ring wraps */ 2711 2712 static inline void 2713 myri10ge_submit_req_backwards(struct myri10ge_tx_buf *tx, 2714 struct mcp_kreq_ether_send *src, int cnt) 2715 { 2716 int idx, starting_slot; 2717 starting_slot = tx->req; 2718 while (cnt > 1) { 2719 cnt--; 2720 idx = (starting_slot + cnt) & tx->mask; 2721 myri10ge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src)); 2722 mb(); 2723 } 2724 } 2725 2726 /* 2727 * copy an array of struct mcp_kreq_ether_send's to the mcp. Copy 2728 * at most 32 bytes at a time, so as to avoid involving the software 2729 * pio handler in the nic. We re-write the first segment's flags 2730 * to mark them valid only after writing the entire chain. 2731 */ 2732 2733 static inline void 2734 myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, 2735 int cnt) 2736 { 2737 int idx, i; 2738 struct mcp_kreq_ether_send __iomem *dstp, *dst; 2739 struct mcp_kreq_ether_send *srcp; 2740 u8 last_flags; 2741 2742 idx = tx->req & tx->mask; 2743 2744 last_flags = src->flags; 2745 src->flags = 0; 2746 mb(); 2747 dst = dstp = &tx->lanai[idx]; 2748 srcp = src; 2749 2750 if ((idx + cnt) < tx->mask) { 2751 for (i = 0; i < (cnt - 1); i += 2) { 2752 myri10ge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 2753 mb(); /* force write every 32 bytes */ 2754 srcp += 2; 2755 dstp += 2; 2756 } 2757 } else { 2758 /* submit all but the first request, and ensure 2759 * that it is submitted below */ 2760 myri10ge_submit_req_backwards(tx, src, cnt); 2761 i = 0; 2762 } 2763 if (i < cnt) { 2764 /* submit the first request */ 2765 myri10ge_pio_copy(dstp, srcp, sizeof(*src)); 2766 mb(); /* barrier before setting valid flag */ 2767 } 2768 2769 /* re-write the last 32-bits with the valid flags */ 2770 src->flags = last_flags; 2771 put_be32(*((__be32 *) src + 3), (__be32 __iomem *) dst + 3); 2772 tx->req += cnt; 2773 mb(); 2774 } 2775 2776 static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp, 2777 struct myri10ge_tx_buf *tx, int idx) 2778 { 2779 unsigned int len; 2780 int last_idx; 2781 2782 /* Free any DMA resources we've alloced and clear out the skb slot */ 2783 last_idx = (idx + 1) & tx->mask; 2784 idx = tx->req & tx->mask; 2785 do { 2786 len = dma_unmap_len(&tx->info[idx], len); 2787 if (len) { 2788 if (tx->info[idx].skb != NULL) 2789 pci_unmap_single(mgp->pdev, 2790 dma_unmap_addr(&tx->info[idx], 2791 bus), len, 2792 PCI_DMA_TODEVICE); 2793 else 2794 pci_unmap_page(mgp->pdev, 2795 dma_unmap_addr(&tx->info[idx], 2796 bus), len, 2797 PCI_DMA_TODEVICE); 2798 dma_unmap_len_set(&tx->info[idx], len, 0); 2799 tx->info[idx].skb = NULL; 2800 } 2801 idx = (idx + 1) & tx->mask; 2802 } while (idx != last_idx); 2803 } 2804 2805 /* 2806 * Transmit a packet. We need to split the packet so that a single 2807 * segment does not cross myri10ge->tx_boundary, so this makes segment 2808 * counting tricky. So rather than try to count segments up front, we 2809 * just give up if there are too few segments to hold a reasonably 2810 * fragmented packet currently available. If we run 2811 * out of segments while preparing a packet for DMA, we just linearize 2812 * it and try again. 2813 */ 2814 2815 static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, 2816 struct net_device *dev) 2817 { 2818 struct myri10ge_priv *mgp = netdev_priv(dev); 2819 struct myri10ge_slice_state *ss; 2820 struct mcp_kreq_ether_send *req; 2821 struct myri10ge_tx_buf *tx; 2822 struct skb_frag_struct *frag; 2823 struct netdev_queue *netdev_queue; 2824 dma_addr_t bus; 2825 u32 low; 2826 __be32 high_swapped; 2827 unsigned int len; 2828 int idx, avail, frag_cnt, frag_idx, count, mss, max_segments; 2829 u16 pseudo_hdr_offset, cksum_offset, queue; 2830 int cum_len, seglen, boundary, rdma_count; 2831 u8 flags, odd_flag; 2832 2833 queue = skb_get_queue_mapping(skb); 2834 ss = &mgp->ss[queue]; 2835 netdev_queue = netdev_get_tx_queue(mgp->dev, queue); 2836 tx = &ss->tx; 2837 2838 again: 2839 req = tx->req_list; 2840 avail = tx->mask - 1 - (tx->req - tx->done); 2841 2842 mss = 0; 2843 max_segments = MXGEFW_MAX_SEND_DESC; 2844 2845 if (skb_is_gso(skb)) { 2846 mss = skb_shinfo(skb)->gso_size; 2847 max_segments = MYRI10GE_MAX_SEND_DESC_TSO; 2848 } 2849 2850 if ((unlikely(avail < max_segments))) { 2851 /* we are out of transmit resources */ 2852 tx->stop_queue++; 2853 netif_tx_stop_queue(netdev_queue); 2854 return NETDEV_TX_BUSY; 2855 } 2856 2857 /* Setup checksum offloading, if needed */ 2858 cksum_offset = 0; 2859 pseudo_hdr_offset = 0; 2860 odd_flag = 0; 2861 flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); 2862 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 2863 cksum_offset = skb_checksum_start_offset(skb); 2864 pseudo_hdr_offset = cksum_offset + skb->csum_offset; 2865 /* If the headers are excessively large, then we must 2866 * fall back to a software checksum */ 2867 if (unlikely(!mss && (cksum_offset > 255 || 2868 pseudo_hdr_offset > 127))) { 2869 if (skb_checksum_help(skb)) 2870 goto drop; 2871 cksum_offset = 0; 2872 pseudo_hdr_offset = 0; 2873 } else { 2874 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2875 flags |= MXGEFW_FLAGS_CKSUM; 2876 } 2877 } 2878 2879 cum_len = 0; 2880 2881 if (mss) { /* TSO */ 2882 /* this removes any CKSUM flag from before */ 2883 flags = (MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST); 2884 2885 /* negative cum_len signifies to the 2886 * send loop that we are still in the 2887 * header portion of the TSO packet. 2888 * TSO header can be at most 1KB long */ 2889 cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); 2890 2891 /* for IPv6 TSO, the checksum offset stores the 2892 * TCP header length, to save the firmware from 2893 * the need to parse the headers */ 2894 if (skb_is_gso_v6(skb)) { 2895 cksum_offset = tcp_hdrlen(skb); 2896 /* Can only handle headers <= max_tso6 long */ 2897 if (unlikely(-cum_len > mgp->max_tso6)) 2898 return myri10ge_sw_tso(skb, dev); 2899 } 2900 /* for TSO, pseudo_hdr_offset holds mss. 2901 * The firmware figures out where to put 2902 * the checksum by parsing the header. */ 2903 pseudo_hdr_offset = mss; 2904 } else 2905 /* Mark small packets, and pad out tiny packets */ 2906 if (skb->len <= MXGEFW_SEND_SMALL_SIZE) { 2907 flags |= MXGEFW_FLAGS_SMALL; 2908 2909 /* pad frames to at least ETH_ZLEN bytes */ 2910 if (eth_skb_pad(skb)) { 2911 /* The packet is gone, so we must 2912 * return 0 */ 2913 ss->stats.tx_dropped += 1; 2914 return NETDEV_TX_OK; 2915 } 2916 } 2917 2918 /* map the skb for DMA */ 2919 len = skb_headlen(skb); 2920 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); 2921 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) 2922 goto drop; 2923 2924 idx = tx->req & tx->mask; 2925 tx->info[idx].skb = skb; 2926 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2927 dma_unmap_len_set(&tx->info[idx], len, len); 2928 2929 frag_cnt = skb_shinfo(skb)->nr_frags; 2930 frag_idx = 0; 2931 count = 0; 2932 rdma_count = 0; 2933 2934 /* "rdma_count" is the number of RDMAs belonging to the 2935 * current packet BEFORE the current send request. For 2936 * non-TSO packets, this is equal to "count". 2937 * For TSO packets, rdma_count needs to be reset 2938 * to 0 after a segment cut. 2939 * 2940 * The rdma_count field of the send request is 2941 * the number of RDMAs of the packet starting at 2942 * that request. For TSO send requests with one ore more cuts 2943 * in the middle, this is the number of RDMAs starting 2944 * after the last cut in the request. All previous 2945 * segments before the last cut implicitly have 1 RDMA. 2946 * 2947 * Since the number of RDMAs is not known beforehand, 2948 * it must be filled-in retroactively - after each 2949 * segmentation cut or at the end of the entire packet. 2950 */ 2951 2952 while (1) { 2953 /* Break the SKB or Fragment up into pieces which 2954 * do not cross mgp->tx_boundary */ 2955 low = MYRI10GE_LOWPART_TO_U32(bus); 2956 high_swapped = htonl(MYRI10GE_HIGHPART_TO_U32(bus)); 2957 while (len) { 2958 u8 flags_next; 2959 int cum_len_next; 2960 2961 if (unlikely(count == max_segments)) 2962 goto abort_linearize; 2963 2964 boundary = 2965 (low + mgp->tx_boundary) & ~(mgp->tx_boundary - 1); 2966 seglen = boundary - low; 2967 if (seglen > len) 2968 seglen = len; 2969 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 2970 cum_len_next = cum_len + seglen; 2971 if (mss) { /* TSO */ 2972 (req - rdma_count)->rdma_count = rdma_count + 1; 2973 2974 if (likely(cum_len >= 0)) { /* payload */ 2975 int next_is_first, chop; 2976 2977 chop = (cum_len_next > mss); 2978 cum_len_next = cum_len_next % mss; 2979 next_is_first = (cum_len_next == 0); 2980 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 2981 flags_next |= next_is_first * 2982 MXGEFW_FLAGS_FIRST; 2983 rdma_count |= -(chop | next_is_first); 2984 rdma_count += chop & ~next_is_first; 2985 } else if (likely(cum_len_next >= 0)) { /* header ends */ 2986 int small; 2987 2988 rdma_count = -1; 2989 cum_len_next = 0; 2990 seglen = -cum_len; 2991 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 2992 flags_next = MXGEFW_FLAGS_TSO_PLD | 2993 MXGEFW_FLAGS_FIRST | 2994 (small * MXGEFW_FLAGS_SMALL); 2995 } 2996 } 2997 req->addr_high = high_swapped; 2998 req->addr_low = htonl(low); 2999 req->pseudo_hdr_offset = htons(pseudo_hdr_offset); 3000 req->pad = 0; /* complete solid 16-byte block; does this matter? */ 3001 req->rdma_count = 1; 3002 req->length = htons(seglen); 3003 req->cksum_offset = cksum_offset; 3004 req->flags = flags | ((cum_len & 1) * odd_flag); 3005 3006 low += seglen; 3007 len -= seglen; 3008 cum_len = cum_len_next; 3009 flags = flags_next; 3010 req++; 3011 count++; 3012 rdma_count++; 3013 if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { 3014 if (unlikely(cksum_offset > seglen)) 3015 cksum_offset -= seglen; 3016 else 3017 cksum_offset = 0; 3018 } 3019 } 3020 if (frag_idx == frag_cnt) 3021 break; 3022 3023 /* map next fragment for DMA */ 3024 frag = &skb_shinfo(skb)->frags[frag_idx]; 3025 frag_idx++; 3026 len = skb_frag_size(frag); 3027 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, 3028 DMA_TO_DEVICE); 3029 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { 3030 myri10ge_unmap_tx_dma(mgp, tx, idx); 3031 goto drop; 3032 } 3033 idx = (count + tx->req) & tx->mask; 3034 dma_unmap_addr_set(&tx->info[idx], bus, bus); 3035 dma_unmap_len_set(&tx->info[idx], len, len); 3036 } 3037 3038 (req - rdma_count)->rdma_count = rdma_count; 3039 if (mss) 3040 do { 3041 req--; 3042 req->flags |= MXGEFW_FLAGS_TSO_LAST; 3043 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | 3044 MXGEFW_FLAGS_FIRST))); 3045 idx = ((count - 1) + tx->req) & tx->mask; 3046 tx->info[idx].last = 1; 3047 myri10ge_submit_req(tx, tx->req_list, count); 3048 /* if using multiple tx queues, make sure NIC polls the 3049 * current slice */ 3050 if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) { 3051 tx->queue_active = 1; 3052 put_be32(htonl(1), tx->send_go); 3053 mb(); 3054 mmiowb(); 3055 } 3056 tx->pkt_start++; 3057 if ((avail - count) < MXGEFW_MAX_SEND_DESC) { 3058 tx->stop_queue++; 3059 netif_tx_stop_queue(netdev_queue); 3060 } 3061 return NETDEV_TX_OK; 3062 3063 abort_linearize: 3064 myri10ge_unmap_tx_dma(mgp, tx, idx); 3065 3066 if (skb_is_gso(skb)) { 3067 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); 3068 goto drop; 3069 } 3070 3071 if (skb_linearize(skb)) 3072 goto drop; 3073 3074 tx->linearized++; 3075 goto again; 3076 3077 drop: 3078 dev_kfree_skb_any(skb); 3079 ss->stats.tx_dropped += 1; 3080 return NETDEV_TX_OK; 3081 3082 } 3083 3084 static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, 3085 struct net_device *dev) 3086 { 3087 struct sk_buff *segs, *curr; 3088 struct myri10ge_priv *mgp = netdev_priv(dev); 3089 struct myri10ge_slice_state *ss; 3090 netdev_tx_t status; 3091 3092 segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); 3093 if (IS_ERR(segs)) 3094 goto drop; 3095 3096 while (segs) { 3097 curr = segs; 3098 segs = segs->next; 3099 curr->next = NULL; 3100 status = myri10ge_xmit(curr, dev); 3101 if (status != 0) { 3102 dev_kfree_skb_any(curr); 3103 if (segs != NULL) { 3104 curr = segs; 3105 segs = segs->next; 3106 curr->next = NULL; 3107 dev_kfree_skb_any(segs); 3108 } 3109 goto drop; 3110 } 3111 } 3112 dev_kfree_skb_any(skb); 3113 return NETDEV_TX_OK; 3114 3115 drop: 3116 ss = &mgp->ss[skb_get_queue_mapping(skb)]; 3117 dev_kfree_skb_any(skb); 3118 ss->stats.tx_dropped += 1; 3119 return NETDEV_TX_OK; 3120 } 3121 3122 static struct rtnl_link_stats64 *myri10ge_get_stats(struct net_device *dev, 3123 struct rtnl_link_stats64 *stats) 3124 { 3125 const struct myri10ge_priv *mgp = netdev_priv(dev); 3126 const struct myri10ge_slice_netstats *slice_stats; 3127 int i; 3128 3129 for (i = 0; i < mgp->num_slices; i++) { 3130 slice_stats = &mgp->ss[i].stats; 3131 stats->rx_packets += slice_stats->rx_packets; 3132 stats->tx_packets += slice_stats->tx_packets; 3133 stats->rx_bytes += slice_stats->rx_bytes; 3134 stats->tx_bytes += slice_stats->tx_bytes; 3135 stats->rx_dropped += slice_stats->rx_dropped; 3136 stats->tx_dropped += slice_stats->tx_dropped; 3137 } 3138 return stats; 3139 } 3140 3141 static void myri10ge_set_multicast_list(struct net_device *dev) 3142 { 3143 struct myri10ge_priv *mgp = netdev_priv(dev); 3144 struct myri10ge_cmd cmd; 3145 struct netdev_hw_addr *ha; 3146 __be32 data[2] = { 0, 0 }; 3147 int err; 3148 3149 /* can be called from atomic contexts, 3150 * pass 1 to force atomicity in myri10ge_send_cmd() */ 3151 myri10ge_change_promisc(mgp, dev->flags & IFF_PROMISC, 1); 3152 3153 /* This firmware is known to not support multicast */ 3154 if (!mgp->fw_multicast_support) 3155 return; 3156 3157 /* Disable multicast filtering */ 3158 3159 err = myri10ge_send_cmd(mgp, MXGEFW_ENABLE_ALLMULTI, &cmd, 1); 3160 if (err != 0) { 3161 netdev_err(dev, "Failed MXGEFW_ENABLE_ALLMULTI, error status: %d\n", 3162 err); 3163 goto abort; 3164 } 3165 3166 if ((dev->flags & IFF_ALLMULTI) || mgp->adopted_rx_filter_bug) { 3167 /* request to disable multicast filtering, so quit here */ 3168 return; 3169 } 3170 3171 /* Flush the filters */ 3172 3173 err = myri10ge_send_cmd(mgp, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, 3174 &cmd, 1); 3175 if (err != 0) { 3176 netdev_err(dev, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, error status: %d\n", 3177 err); 3178 goto abort; 3179 } 3180 3181 /* Walk the multicast list, and add each address */ 3182 netdev_for_each_mc_addr(ha, dev) { 3183 memcpy(data, &ha->addr, ETH_ALEN); 3184 cmd.data0 = ntohl(data[0]); 3185 cmd.data1 = ntohl(data[1]); 3186 err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP, 3187 &cmd, 1); 3188 3189 if (err != 0) { 3190 netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n", 3191 err, ha->addr); 3192 goto abort; 3193 } 3194 } 3195 /* Enable multicast filtering */ 3196 err = myri10ge_send_cmd(mgp, MXGEFW_DISABLE_ALLMULTI, &cmd, 1); 3197 if (err != 0) { 3198 netdev_err(dev, "Failed MXGEFW_DISABLE_ALLMULTI, error status: %d\n", 3199 err); 3200 goto abort; 3201 } 3202 3203 return; 3204 3205 abort: 3206 return; 3207 } 3208 3209 static int myri10ge_set_mac_address(struct net_device *dev, void *addr) 3210 { 3211 struct sockaddr *sa = addr; 3212 struct myri10ge_priv *mgp = netdev_priv(dev); 3213 int status; 3214 3215 if (!is_valid_ether_addr(sa->sa_data)) 3216 return -EADDRNOTAVAIL; 3217 3218 status = myri10ge_update_mac_address(mgp, sa->sa_data); 3219 if (status != 0) { 3220 netdev_err(dev, "changing mac address failed with %d\n", 3221 status); 3222 return status; 3223 } 3224 3225 /* change the dev structure */ 3226 memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN); 3227 return 0; 3228 } 3229 3230 static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) 3231 { 3232 struct myri10ge_priv *mgp = netdev_priv(dev); 3233 int error = 0; 3234 3235 if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) { 3236 netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu); 3237 return -EINVAL; 3238 } 3239 netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); 3240 if (mgp->running) { 3241 /* if we change the mtu on an active device, we must 3242 * reset the device so the firmware sees the change */ 3243 myri10ge_close(dev); 3244 dev->mtu = new_mtu; 3245 myri10ge_open(dev); 3246 } else 3247 dev->mtu = new_mtu; 3248 3249 return error; 3250 } 3251 3252 /* 3253 * Enable ECRC to align PCI-E Completion packets on an 8-byte boundary. 3254 * Only do it if the bridge is a root port since we don't want to disturb 3255 * any other device, except if forced with myri10ge_ecrc_enable > 1. 3256 */ 3257 3258 static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) 3259 { 3260 struct pci_dev *bridge = mgp->pdev->bus->self; 3261 struct device *dev = &mgp->pdev->dev; 3262 int cap; 3263 unsigned err_cap; 3264 int ret; 3265 3266 if (!myri10ge_ecrc_enable || !bridge) 3267 return; 3268 3269 /* check that the bridge is a root port */ 3270 if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) { 3271 if (myri10ge_ecrc_enable > 1) { 3272 struct pci_dev *prev_bridge, *old_bridge = bridge; 3273 3274 /* Walk the hierarchy up to the root port 3275 * where ECRC has to be enabled */ 3276 do { 3277 prev_bridge = bridge; 3278 bridge = bridge->bus->self; 3279 if (!bridge || prev_bridge == bridge) { 3280 dev_err(dev, 3281 "Failed to find root port" 3282 " to force ECRC\n"); 3283 return; 3284 } 3285 } while (pci_pcie_type(bridge) != 3286 PCI_EXP_TYPE_ROOT_PORT); 3287 3288 dev_info(dev, 3289 "Forcing ECRC on non-root port %s" 3290 " (enabling on root port %s)\n", 3291 pci_name(old_bridge), pci_name(bridge)); 3292 } else { 3293 dev_err(dev, 3294 "Not enabling ECRC on non-root port %s\n", 3295 pci_name(bridge)); 3296 return; 3297 } 3298 } 3299 3300 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3301 if (!cap) 3302 return; 3303 3304 ret = pci_read_config_dword(bridge, cap + PCI_ERR_CAP, &err_cap); 3305 if (ret) { 3306 dev_err(dev, "failed reading ext-conf-space of %s\n", 3307 pci_name(bridge)); 3308 dev_err(dev, "\t pci=nommconf in use? " 3309 "or buggy/incomplete/absent ACPI MCFG attr?\n"); 3310 return; 3311 } 3312 if (!(err_cap & PCI_ERR_CAP_ECRC_GENC)) 3313 return; 3314 3315 err_cap |= PCI_ERR_CAP_ECRC_GENE; 3316 pci_write_config_dword(bridge, cap + PCI_ERR_CAP, err_cap); 3317 dev_info(dev, "Enabled ECRC on upstream bridge %s\n", pci_name(bridge)); 3318 } 3319 3320 /* 3321 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 3322 * when the PCI-E Completion packets are aligned on an 8-byte 3323 * boundary. Some PCI-E chip sets always align Completion packets; on 3324 * the ones that do not, the alignment can be enforced by enabling 3325 * ECRC generation (if supported). 3326 * 3327 * When PCI-E Completion packets are not aligned, it is actually more 3328 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 3329 * 3330 * If the driver can neither enable ECRC nor verify that it has 3331 * already been enabled, then it must use a firmware image which works 3332 * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it 3333 * should also ensure that it never gives the device a Read-DMA which is 3334 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 3335 * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat) 3336 * firmware image, and set tx_boundary to 4KB. 3337 */ 3338 3339 static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) 3340 { 3341 struct pci_dev *pdev = mgp->pdev; 3342 struct device *dev = &pdev->dev; 3343 int status; 3344 3345 mgp->tx_boundary = 4096; 3346 /* 3347 * Verify the max read request size was set to 4KB 3348 * before trying the test with 4KB. 3349 */ 3350 status = pcie_get_readrq(pdev); 3351 if (status < 0) { 3352 dev_err(dev, "Couldn't read max read req size: %d\n", status); 3353 goto abort; 3354 } 3355 if (status != 4096) { 3356 dev_warn(dev, "Max Read Request size != 4096 (%d)\n", status); 3357 mgp->tx_boundary = 2048; 3358 } 3359 /* 3360 * load the optimized firmware (which assumes aligned PCIe 3361 * completions) in order to see if it works on this host. 3362 */ 3363 set_fw_name(mgp, myri10ge_fw_aligned, false); 3364 status = myri10ge_load_firmware(mgp, 1); 3365 if (status != 0) { 3366 goto abort; 3367 } 3368 3369 /* 3370 * Enable ECRC if possible 3371 */ 3372 myri10ge_enable_ecrc(mgp); 3373 3374 /* 3375 * Run a DMA test which watches for unaligned completions and 3376 * aborts on the first one seen. 3377 */ 3378 3379 status = myri10ge_dma_test(mgp, MXGEFW_CMD_UNALIGNED_TEST); 3380 if (status == 0) 3381 return; /* keep the aligned firmware */ 3382 3383 if (status != -E2BIG) 3384 dev_warn(dev, "DMA test failed: %d\n", status); 3385 if (status == -ENOSYS) 3386 dev_warn(dev, "Falling back to ethp! " 3387 "Please install up to date fw\n"); 3388 abort: 3389 /* fall back to using the unaligned firmware */ 3390 mgp->tx_boundary = 2048; 3391 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3392 } 3393 3394 static void myri10ge_select_firmware(struct myri10ge_priv *mgp) 3395 { 3396 int overridden = 0; 3397 3398 if (myri10ge_force_firmware == 0) { 3399 int link_width; 3400 u16 lnk; 3401 3402 pcie_capability_read_word(mgp->pdev, PCI_EXP_LNKSTA, &lnk); 3403 link_width = (lnk >> 4) & 0x3f; 3404 3405 /* Check to see if Link is less than 8 or if the 3406 * upstream bridge is known to provide aligned 3407 * completions */ 3408 if (link_width < 8) { 3409 dev_info(&mgp->pdev->dev, "PCIE x%d Link\n", 3410 link_width); 3411 mgp->tx_boundary = 4096; 3412 set_fw_name(mgp, myri10ge_fw_aligned, false); 3413 } else { 3414 myri10ge_firmware_probe(mgp); 3415 } 3416 } else { 3417 if (myri10ge_force_firmware == 1) { 3418 dev_info(&mgp->pdev->dev, 3419 "Assuming aligned completions (forced)\n"); 3420 mgp->tx_boundary = 4096; 3421 set_fw_name(mgp, myri10ge_fw_aligned, false); 3422 } else { 3423 dev_info(&mgp->pdev->dev, 3424 "Assuming unaligned completions (forced)\n"); 3425 mgp->tx_boundary = 2048; 3426 set_fw_name(mgp, myri10ge_fw_unaligned, false); 3427 } 3428 } 3429 3430 kernel_param_lock(THIS_MODULE); 3431 if (myri10ge_fw_name != NULL) { 3432 char *fw_name = kstrdup(myri10ge_fw_name, GFP_KERNEL); 3433 if (fw_name) { 3434 overridden = 1; 3435 set_fw_name(mgp, fw_name, true); 3436 } 3437 } 3438 kernel_param_unlock(THIS_MODULE); 3439 3440 if (mgp->board_number < MYRI10GE_MAX_BOARDS && 3441 myri10ge_fw_names[mgp->board_number] != NULL && 3442 strlen(myri10ge_fw_names[mgp->board_number])) { 3443 set_fw_name(mgp, myri10ge_fw_names[mgp->board_number], false); 3444 overridden = 1; 3445 } 3446 if (overridden) 3447 dev_info(&mgp->pdev->dev, "overriding firmware to %s\n", 3448 mgp->fw_name); 3449 } 3450 3451 static void myri10ge_mask_surprise_down(struct pci_dev *pdev) 3452 { 3453 struct pci_dev *bridge = pdev->bus->self; 3454 int cap; 3455 u32 mask; 3456 3457 if (bridge == NULL) 3458 return; 3459 3460 cap = pci_find_ext_capability(bridge, PCI_EXT_CAP_ID_ERR); 3461 if (cap) { 3462 /* a sram parity error can cause a surprise link 3463 * down; since we expect and can recover from sram 3464 * parity errors, mask surprise link down events */ 3465 pci_read_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, &mask); 3466 mask |= 0x20; 3467 pci_write_config_dword(bridge, cap + PCI_ERR_UNCOR_MASK, mask); 3468 } 3469 } 3470 3471 #ifdef CONFIG_PM 3472 static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) 3473 { 3474 struct myri10ge_priv *mgp; 3475 struct net_device *netdev; 3476 3477 mgp = pci_get_drvdata(pdev); 3478 if (mgp == NULL) 3479 return -EINVAL; 3480 netdev = mgp->dev; 3481 3482 netif_device_detach(netdev); 3483 if (netif_running(netdev)) { 3484 netdev_info(netdev, "closing\n"); 3485 rtnl_lock(); 3486 myri10ge_close(netdev); 3487 rtnl_unlock(); 3488 } 3489 myri10ge_dummy_rdma(mgp, 0); 3490 pci_save_state(pdev); 3491 pci_disable_device(pdev); 3492 3493 return pci_set_power_state(pdev, pci_choose_state(pdev, state)); 3494 } 3495 3496 static int myri10ge_resume(struct pci_dev *pdev) 3497 { 3498 struct myri10ge_priv *mgp; 3499 struct net_device *netdev; 3500 int status; 3501 u16 vendor; 3502 3503 mgp = pci_get_drvdata(pdev); 3504 if (mgp == NULL) 3505 return -EINVAL; 3506 netdev = mgp->dev; 3507 pci_set_power_state(pdev, PCI_D0); /* zeros conf space as a side effect */ 3508 msleep(5); /* give card time to respond */ 3509 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3510 if (vendor == 0xffff) { 3511 netdev_err(mgp->dev, "device disappeared!\n"); 3512 return -EIO; 3513 } 3514 3515 pci_restore_state(pdev); 3516 3517 status = pci_enable_device(pdev); 3518 if (status) { 3519 dev_err(&pdev->dev, "failed to enable device\n"); 3520 return status; 3521 } 3522 3523 pci_set_master(pdev); 3524 3525 myri10ge_reset(mgp); 3526 myri10ge_dummy_rdma(mgp, 1); 3527 3528 /* Save configuration space to be restored if the 3529 * nic resets due to a parity error */ 3530 pci_save_state(pdev); 3531 3532 if (netif_running(netdev)) { 3533 rtnl_lock(); 3534 status = myri10ge_open(netdev); 3535 rtnl_unlock(); 3536 if (status != 0) 3537 goto abort_with_enabled; 3538 3539 } 3540 netif_device_attach(netdev); 3541 3542 return 0; 3543 3544 abort_with_enabled: 3545 pci_disable_device(pdev); 3546 return -EIO; 3547 3548 } 3549 #endif /* CONFIG_PM */ 3550 3551 static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) 3552 { 3553 struct pci_dev *pdev = mgp->pdev; 3554 int vs = mgp->vendor_specific_offset; 3555 u32 reboot; 3556 3557 /*enter read32 mode */ 3558 pci_write_config_byte(pdev, vs + 0x10, 0x3); 3559 3560 /*read REBOOT_STATUS (0xfffffff0) */ 3561 pci_write_config_dword(pdev, vs + 0x18, 0xfffffff0); 3562 pci_read_config_dword(pdev, vs + 0x14, &reboot); 3563 return reboot; 3564 } 3565 3566 static void 3567 myri10ge_check_slice(struct myri10ge_slice_state *ss, int *reset_needed, 3568 int *busy_slice_cnt, u32 rx_pause_cnt) 3569 { 3570 struct myri10ge_priv *mgp = ss->mgp; 3571 int slice = ss - mgp->ss; 3572 3573 if (ss->tx.req != ss->tx.done && 3574 ss->tx.done == ss->watchdog_tx_done && 3575 ss->watchdog_tx_req != ss->watchdog_tx_done) { 3576 /* nic seems like it might be stuck.. */ 3577 if (rx_pause_cnt != mgp->watchdog_pause) { 3578 if (net_ratelimit()) 3579 netdev_warn(mgp->dev, "slice %d: TX paused, " 3580 "check link partner\n", slice); 3581 } else { 3582 netdev_warn(mgp->dev, 3583 "slice %d: TX stuck %d %d %d %d %d %d\n", 3584 slice, ss->tx.queue_active, ss->tx.req, 3585 ss->tx.done, ss->tx.pkt_start, 3586 ss->tx.pkt_done, 3587 (int)ntohl(mgp->ss[slice].fw_stats-> 3588 send_done_count)); 3589 *reset_needed = 1; 3590 ss->stuck = 1; 3591 } 3592 } 3593 if (ss->watchdog_tx_done != ss->tx.done || 3594 ss->watchdog_rx_done != ss->rx_done.cnt) { 3595 *busy_slice_cnt += 1; 3596 } 3597 ss->watchdog_tx_done = ss->tx.done; 3598 ss->watchdog_tx_req = ss->tx.req; 3599 ss->watchdog_rx_done = ss->rx_done.cnt; 3600 } 3601 3602 /* 3603 * This watchdog is used to check whether the board has suffered 3604 * from a parity error and needs to be recovered. 3605 */ 3606 static void myri10ge_watchdog(struct work_struct *work) 3607 { 3608 struct myri10ge_priv *mgp = 3609 container_of(work, struct myri10ge_priv, watchdog_work); 3610 struct myri10ge_slice_state *ss; 3611 u32 reboot, rx_pause_cnt; 3612 int status, rebooted; 3613 int i; 3614 int reset_needed = 0; 3615 int busy_slice_cnt = 0; 3616 u16 cmd, vendor; 3617 3618 mgp->watchdog_resets++; 3619 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3620 rebooted = 0; 3621 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3622 /* Bus master DMA disabled? Check to see 3623 * if the card rebooted due to a parity error 3624 * For now, just report it */ 3625 reboot = myri10ge_read_reboot(mgp); 3626 netdev_err(mgp->dev, "NIC rebooted (0x%x),%s resetting\n", 3627 reboot, myri10ge_reset_recover ? "" : " not"); 3628 if (myri10ge_reset_recover == 0) 3629 return; 3630 rtnl_lock(); 3631 mgp->rebooted = 1; 3632 rebooted = 1; 3633 myri10ge_close(mgp->dev); 3634 myri10ge_reset_recover--; 3635 mgp->rebooted = 0; 3636 /* 3637 * A rebooted nic will come back with config space as 3638 * it was after power was applied to PCIe bus. 3639 * Attempt to restore config space which was saved 3640 * when the driver was loaded, or the last time the 3641 * nic was resumed from power saving mode. 3642 */ 3643 pci_restore_state(mgp->pdev); 3644 3645 /* save state again for accounting reasons */ 3646 pci_save_state(mgp->pdev); 3647 3648 } else { 3649 /* if we get back -1's from our slot, perhaps somebody 3650 * powered off our card. Don't try to reset it in 3651 * this case */ 3652 if (cmd == 0xffff) { 3653 pci_read_config_word(mgp->pdev, PCI_VENDOR_ID, &vendor); 3654 if (vendor == 0xffff) { 3655 netdev_err(mgp->dev, "device disappeared!\n"); 3656 return; 3657 } 3658 } 3659 /* Perhaps it is a software error. See if stuck slice 3660 * has recovered, reset if not */ 3661 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3662 for (i = 0; i < mgp->num_slices; i++) { 3663 ss = mgp->ss; 3664 if (ss->stuck) { 3665 myri10ge_check_slice(ss, &reset_needed, 3666 &busy_slice_cnt, 3667 rx_pause_cnt); 3668 ss->stuck = 0; 3669 } 3670 } 3671 if (!reset_needed) { 3672 netdev_dbg(mgp->dev, "not resetting\n"); 3673 return; 3674 } 3675 3676 netdev_err(mgp->dev, "device timeout, resetting\n"); 3677 } 3678 3679 if (!rebooted) { 3680 rtnl_lock(); 3681 myri10ge_close(mgp->dev); 3682 } 3683 status = myri10ge_load_firmware(mgp, 1); 3684 if (status != 0) 3685 netdev_err(mgp->dev, "failed to load firmware\n"); 3686 else 3687 myri10ge_open(mgp->dev); 3688 rtnl_unlock(); 3689 } 3690 3691 /* 3692 * We use our own timer routine rather than relying upon 3693 * netdev->tx_timeout because we have a very large hardware transmit 3694 * queue. Due to the large queue, the netdev->tx_timeout function 3695 * cannot detect a NIC with a parity error in a timely fashion if the 3696 * NIC is lightly loaded. 3697 */ 3698 static void myri10ge_watchdog_timer(unsigned long arg) 3699 { 3700 struct myri10ge_priv *mgp; 3701 struct myri10ge_slice_state *ss; 3702 int i, reset_needed, busy_slice_cnt; 3703 u32 rx_pause_cnt; 3704 u16 cmd; 3705 3706 mgp = (struct myri10ge_priv *)arg; 3707 3708 rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); 3709 busy_slice_cnt = 0; 3710 for (i = 0, reset_needed = 0; 3711 i < mgp->num_slices && reset_needed == 0; ++i) { 3712 3713 ss = &mgp->ss[i]; 3714 if (ss->rx_small.watchdog_needed) { 3715 myri10ge_alloc_rx_pages(mgp, &ss->rx_small, 3716 mgp->small_bytes + MXGEFW_PAD, 3717 1); 3718 if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= 3719 myri10ge_fill_thresh) 3720 ss->rx_small.watchdog_needed = 0; 3721 } 3722 if (ss->rx_big.watchdog_needed) { 3723 myri10ge_alloc_rx_pages(mgp, &ss->rx_big, 3724 mgp->big_bytes, 1); 3725 if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= 3726 myri10ge_fill_thresh) 3727 ss->rx_big.watchdog_needed = 0; 3728 } 3729 myri10ge_check_slice(ss, &reset_needed, &busy_slice_cnt, 3730 rx_pause_cnt); 3731 } 3732 /* if we've sent or received no traffic, poll the NIC to 3733 * ensure it is still there. Otherwise, we risk not noticing 3734 * an error in a timely fashion */ 3735 if (busy_slice_cnt == 0) { 3736 pci_read_config_word(mgp->pdev, PCI_COMMAND, &cmd); 3737 if ((cmd & PCI_COMMAND_MASTER) == 0) { 3738 reset_needed = 1; 3739 } 3740 } 3741 mgp->watchdog_pause = rx_pause_cnt; 3742 3743 if (reset_needed) { 3744 schedule_work(&mgp->watchdog_work); 3745 } else { 3746 /* rearm timer */ 3747 mod_timer(&mgp->watchdog_timer, 3748 jiffies + myri10ge_watchdog_timeout * HZ); 3749 } 3750 } 3751 3752 static void myri10ge_free_slices(struct myri10ge_priv *mgp) 3753 { 3754 struct myri10ge_slice_state *ss; 3755 struct pci_dev *pdev = mgp->pdev; 3756 size_t bytes; 3757 int i; 3758 3759 if (mgp->ss == NULL) 3760 return; 3761 3762 for (i = 0; i < mgp->num_slices; i++) { 3763 ss = &mgp->ss[i]; 3764 if (ss->rx_done.entry != NULL) { 3765 bytes = mgp->max_intr_slots * 3766 sizeof(*ss->rx_done.entry); 3767 dma_free_coherent(&pdev->dev, bytes, 3768 ss->rx_done.entry, ss->rx_done.bus); 3769 ss->rx_done.entry = NULL; 3770 } 3771 if (ss->fw_stats != NULL) { 3772 bytes = sizeof(*ss->fw_stats); 3773 dma_free_coherent(&pdev->dev, bytes, 3774 ss->fw_stats, ss->fw_stats_bus); 3775 ss->fw_stats = NULL; 3776 } 3777 napi_hash_del(&ss->napi); 3778 netif_napi_del(&ss->napi); 3779 } 3780 /* Wait till napi structs are no longer used, and then free ss. */ 3781 synchronize_rcu(); 3782 kfree(mgp->ss); 3783 mgp->ss = NULL; 3784 } 3785 3786 static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) 3787 { 3788 struct myri10ge_slice_state *ss; 3789 struct pci_dev *pdev = mgp->pdev; 3790 size_t bytes; 3791 int i; 3792 3793 bytes = sizeof(*mgp->ss) * mgp->num_slices; 3794 mgp->ss = kzalloc(bytes, GFP_KERNEL); 3795 if (mgp->ss == NULL) { 3796 return -ENOMEM; 3797 } 3798 3799 for (i = 0; i < mgp->num_slices; i++) { 3800 ss = &mgp->ss[i]; 3801 bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); 3802 ss->rx_done.entry = dma_zalloc_coherent(&pdev->dev, bytes, 3803 &ss->rx_done.bus, 3804 GFP_KERNEL); 3805 if (ss->rx_done.entry == NULL) 3806 goto abort; 3807 bytes = sizeof(*ss->fw_stats); 3808 ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, 3809 &ss->fw_stats_bus, 3810 GFP_KERNEL); 3811 if (ss->fw_stats == NULL) 3812 goto abort; 3813 ss->mgp = mgp; 3814 ss->dev = mgp->dev; 3815 netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, 3816 myri10ge_napi_weight); 3817 } 3818 return 0; 3819 abort: 3820 myri10ge_free_slices(mgp); 3821 return -ENOMEM; 3822 } 3823 3824 /* 3825 * This function determines the number of slices supported. 3826 * The number slices is the minimum of the number of CPUS, 3827 * the number of MSI-X irqs supported, the number of slices 3828 * supported by the firmware 3829 */ 3830 static void myri10ge_probe_slices(struct myri10ge_priv *mgp) 3831 { 3832 struct myri10ge_cmd cmd; 3833 struct pci_dev *pdev = mgp->pdev; 3834 char *old_fw; 3835 bool old_allocated; 3836 int i, status, ncpus; 3837 3838 mgp->num_slices = 1; 3839 ncpus = netif_get_num_default_rss_queues(); 3840 3841 if (myri10ge_max_slices == 1 || !pdev->msix_cap || 3842 (myri10ge_max_slices == -1 && ncpus < 2)) 3843 return; 3844 3845 /* try to load the slice aware rss firmware */ 3846 old_fw = mgp->fw_name; 3847 old_allocated = mgp->fw_name_allocated; 3848 /* don't free old_fw if we override it. */ 3849 mgp->fw_name_allocated = false; 3850 3851 if (myri10ge_fw_name != NULL) { 3852 dev_info(&mgp->pdev->dev, "overriding rss firmware to %s\n", 3853 myri10ge_fw_name); 3854 set_fw_name(mgp, myri10ge_fw_name, false); 3855 } else if (old_fw == myri10ge_fw_aligned) 3856 set_fw_name(mgp, myri10ge_fw_rss_aligned, false); 3857 else 3858 set_fw_name(mgp, myri10ge_fw_rss_unaligned, false); 3859 status = myri10ge_load_firmware(mgp, 0); 3860 if (status != 0) { 3861 dev_info(&pdev->dev, "Rss firmware not found\n"); 3862 if (old_allocated) 3863 kfree(old_fw); 3864 return; 3865 } 3866 3867 /* hit the board with a reset to ensure it is alive */ 3868 memset(&cmd, 0, sizeof(cmd)); 3869 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); 3870 if (status != 0) { 3871 dev_err(&mgp->pdev->dev, "failed reset\n"); 3872 goto abort_with_fw; 3873 } 3874 3875 mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot); 3876 3877 /* tell it the size of the interrupt queues */ 3878 cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot); 3879 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); 3880 if (status != 0) { 3881 dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 3882 goto abort_with_fw; 3883 } 3884 3885 /* ask the maximum number of slices it supports */ 3886 status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0); 3887 if (status != 0) 3888 goto abort_with_fw; 3889 else 3890 mgp->num_slices = cmd.data0; 3891 3892 /* Only allow multiple slices if MSI-X is usable */ 3893 if (!myri10ge_msi) { 3894 goto abort_with_fw; 3895 } 3896 3897 /* if the admin did not specify a limit to how many 3898 * slices we should use, cap it automatically to the 3899 * number of CPUs currently online */ 3900 if (myri10ge_max_slices == -1) 3901 myri10ge_max_slices = ncpus; 3902 3903 if (mgp->num_slices > myri10ge_max_slices) 3904 mgp->num_slices = myri10ge_max_slices; 3905 3906 /* Now try to allocate as many MSI-X vectors as we have 3907 * slices. We give up on MSI-X if we can only get a single 3908 * vector. */ 3909 3910 mgp->msix_vectors = kcalloc(mgp->num_slices, sizeof(*mgp->msix_vectors), 3911 GFP_KERNEL); 3912 if (mgp->msix_vectors == NULL) 3913 goto no_msix; 3914 for (i = 0; i < mgp->num_slices; i++) { 3915 mgp->msix_vectors[i].entry = i; 3916 } 3917 3918 while (mgp->num_slices > 1) { 3919 mgp->num_slices = rounddown_pow_of_two(mgp->num_slices); 3920 if (mgp->num_slices == 1) 3921 goto no_msix; 3922 status = pci_enable_msix_range(pdev, 3923 mgp->msix_vectors, 3924 mgp->num_slices, 3925 mgp->num_slices); 3926 if (status < 0) 3927 goto no_msix; 3928 3929 pci_disable_msix(pdev); 3930 3931 if (status == mgp->num_slices) { 3932 if (old_allocated) 3933 kfree(old_fw); 3934 return; 3935 } else { 3936 mgp->num_slices = status; 3937 } 3938 } 3939 3940 no_msix: 3941 if (mgp->msix_vectors != NULL) { 3942 kfree(mgp->msix_vectors); 3943 mgp->msix_vectors = NULL; 3944 } 3945 3946 abort_with_fw: 3947 mgp->num_slices = 1; 3948 set_fw_name(mgp, old_fw, old_allocated); 3949 myri10ge_load_firmware(mgp, 0); 3950 } 3951 3952 static const struct net_device_ops myri10ge_netdev_ops = { 3953 .ndo_open = myri10ge_open, 3954 .ndo_stop = myri10ge_close, 3955 .ndo_start_xmit = myri10ge_xmit, 3956 .ndo_get_stats64 = myri10ge_get_stats, 3957 .ndo_validate_addr = eth_validate_addr, 3958 .ndo_change_mtu = myri10ge_change_mtu, 3959 .ndo_set_rx_mode = myri10ge_set_multicast_list, 3960 .ndo_set_mac_address = myri10ge_set_mac_address, 3961 #ifdef CONFIG_NET_RX_BUSY_POLL 3962 .ndo_busy_poll = myri10ge_busy_poll, 3963 #endif 3964 }; 3965 3966 static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 3967 { 3968 struct net_device *netdev; 3969 struct myri10ge_priv *mgp; 3970 struct device *dev = &pdev->dev; 3971 int i; 3972 int status = -ENXIO; 3973 int dac_enabled; 3974 unsigned hdr_offset, ss_offset; 3975 static int board_number; 3976 3977 netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES); 3978 if (netdev == NULL) 3979 return -ENOMEM; 3980 3981 SET_NETDEV_DEV(netdev, &pdev->dev); 3982 3983 mgp = netdev_priv(netdev); 3984 mgp->dev = netdev; 3985 mgp->pdev = pdev; 3986 mgp->pause = myri10ge_flow_control; 3987 mgp->intr_coal_delay = myri10ge_intr_coal_delay; 3988 mgp->msg_enable = netif_msg_init(myri10ge_debug, MYRI10GE_MSG_DEFAULT); 3989 mgp->board_number = board_number; 3990 init_waitqueue_head(&mgp->down_wq); 3991 3992 if (pci_enable_device(pdev)) { 3993 dev_err(&pdev->dev, "pci_enable_device call failed\n"); 3994 status = -ENODEV; 3995 goto abort_with_netdev; 3996 } 3997 3998 /* Find the vendor-specific cap so we can check 3999 * the reboot register later on */ 4000 mgp->vendor_specific_offset 4001 = pci_find_capability(pdev, PCI_CAP_ID_VNDR); 4002 4003 /* Set our max read request to 4KB */ 4004 status = pcie_set_readrq(pdev, 4096); 4005 if (status != 0) { 4006 dev_err(&pdev->dev, "Error %d writing PCI_EXP_DEVCTL\n", 4007 status); 4008 goto abort_with_enabled; 4009 } 4010 4011 myri10ge_mask_surprise_down(pdev); 4012 pci_set_master(pdev); 4013 dac_enabled = 1; 4014 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 4015 if (status != 0) { 4016 dac_enabled = 0; 4017 dev_err(&pdev->dev, 4018 "64-bit pci address mask was refused, " 4019 "trying 32-bit\n"); 4020 status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 4021 } 4022 if (status != 0) { 4023 dev_err(&pdev->dev, "Error %d setting DMA mask\n", status); 4024 goto abort_with_enabled; 4025 } 4026 (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 4027 mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd), 4028 &mgp->cmd_bus, GFP_KERNEL); 4029 if (!mgp->cmd) { 4030 status = -ENOMEM; 4031 goto abort_with_enabled; 4032 } 4033 4034 mgp->board_span = pci_resource_len(pdev, 0); 4035 mgp->iomem_base = pci_resource_start(pdev, 0); 4036 mgp->wc_cookie = arch_phys_wc_add(mgp->iomem_base, mgp->board_span); 4037 mgp->sram = ioremap_wc(mgp->iomem_base, mgp->board_span); 4038 if (mgp->sram == NULL) { 4039 dev_err(&pdev->dev, "ioremap failed for %ld bytes at 0x%lx\n", 4040 mgp->board_span, mgp->iomem_base); 4041 status = -ENXIO; 4042 goto abort_with_mtrr; 4043 } 4044 hdr_offset = 4045 swab32(readl(mgp->sram + MCP_HEADER_PTR_OFFSET)) & 0xffffc; 4046 ss_offset = hdr_offset + offsetof(struct mcp_gen_header, string_specs); 4047 mgp->sram_size = swab32(readl(mgp->sram + ss_offset)); 4048 if (mgp->sram_size > mgp->board_span || 4049 mgp->sram_size <= MYRI10GE_FW_OFFSET) { 4050 dev_err(&pdev->dev, 4051 "invalid sram_size %dB or board span %ldB\n", 4052 mgp->sram_size, mgp->board_span); 4053 goto abort_with_ioremap; 4054 } 4055 memcpy_fromio(mgp->eeprom_strings, 4056 mgp->sram + mgp->sram_size, MYRI10GE_EEPROM_STRINGS_SIZE); 4057 memset(mgp->eeprom_strings + MYRI10GE_EEPROM_STRINGS_SIZE - 2, 0, 2); 4058 status = myri10ge_read_mac_addr(mgp); 4059 if (status) 4060 goto abort_with_ioremap; 4061 4062 for (i = 0; i < ETH_ALEN; i++) 4063 netdev->dev_addr[i] = mgp->mac_addr[i]; 4064 4065 myri10ge_select_firmware(mgp); 4066 4067 status = myri10ge_load_firmware(mgp, 1); 4068 if (status != 0) { 4069 dev_err(&pdev->dev, "failed to load firmware\n"); 4070 goto abort_with_ioremap; 4071 } 4072 myri10ge_probe_slices(mgp); 4073 status = myri10ge_alloc_slices(mgp); 4074 if (status != 0) { 4075 dev_err(&pdev->dev, "failed to alloc slice state\n"); 4076 goto abort_with_firmware; 4077 } 4078 netif_set_real_num_tx_queues(netdev, mgp->num_slices); 4079 netif_set_real_num_rx_queues(netdev, mgp->num_slices); 4080 status = myri10ge_reset(mgp); 4081 if (status != 0) { 4082 dev_err(&pdev->dev, "failed reset\n"); 4083 goto abort_with_slices; 4084 } 4085 #ifdef CONFIG_MYRI10GE_DCA 4086 myri10ge_setup_dca(mgp); 4087 #endif 4088 pci_set_drvdata(pdev, mgp); 4089 if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU) 4090 myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; 4091 if ((myri10ge_initial_mtu + ETH_HLEN) < 68) 4092 myri10ge_initial_mtu = 68; 4093 4094 netdev->netdev_ops = &myri10ge_netdev_ops; 4095 netdev->mtu = myri10ge_initial_mtu; 4096 netdev->hw_features = mgp->features | NETIF_F_RXCSUM; 4097 4098 /* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */ 4099 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 4100 4101 netdev->features = netdev->hw_features; 4102 4103 if (dac_enabled) 4104 netdev->features |= NETIF_F_HIGHDMA; 4105 4106 netdev->vlan_features |= mgp->features; 4107 if (mgp->fw_ver_tiny < 37) 4108 netdev->vlan_features &= ~NETIF_F_TSO6; 4109 if (mgp->fw_ver_tiny < 32) 4110 netdev->vlan_features &= ~NETIF_F_TSO; 4111 4112 /* make sure we can get an irq, and that MSI can be 4113 * setup (if available). */ 4114 status = myri10ge_request_irq(mgp); 4115 if (status != 0) 4116 goto abort_with_firmware; 4117 myri10ge_free_irq(mgp); 4118 4119 /* Save configuration space to be restored if the 4120 * nic resets due to a parity error */ 4121 pci_save_state(pdev); 4122 4123 /* Setup the watchdog timer */ 4124 setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer, 4125 (unsigned long)mgp); 4126 4127 netdev->ethtool_ops = &myri10ge_ethtool_ops; 4128 INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog); 4129 status = register_netdev(netdev); 4130 if (status != 0) { 4131 dev_err(&pdev->dev, "register_netdev failed: %d\n", status); 4132 goto abort_with_state; 4133 } 4134 if (mgp->msix_enabled) 4135 dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", 4136 mgp->num_slices, mgp->tx_boundary, mgp->fw_name, 4137 (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); 4138 else 4139 dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, MTRR %s, WC Enabled\n", 4140 mgp->msi_enabled ? "MSI" : "xPIC", 4141 pdev->irq, mgp->tx_boundary, mgp->fw_name, 4142 (mgp->wc_cookie > 0 ? "Enabled" : "Disabled")); 4143 4144 board_number++; 4145 return 0; 4146 4147 abort_with_state: 4148 pci_restore_state(pdev); 4149 4150 abort_with_slices: 4151 myri10ge_free_slices(mgp); 4152 4153 abort_with_firmware: 4154 myri10ge_dummy_rdma(mgp, 0); 4155 4156 abort_with_ioremap: 4157 if (mgp->mac_addr_string != NULL) 4158 dev_err(&pdev->dev, 4159 "myri10ge_probe() failed: MAC=%s, SN=%ld\n", 4160 mgp->mac_addr_string, mgp->serial_number); 4161 iounmap(mgp->sram); 4162 4163 abort_with_mtrr: 4164 arch_phys_wc_del(mgp->wc_cookie); 4165 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4166 mgp->cmd, mgp->cmd_bus); 4167 4168 abort_with_enabled: 4169 pci_disable_device(pdev); 4170 4171 abort_with_netdev: 4172 set_fw_name(mgp, NULL, false); 4173 free_netdev(netdev); 4174 return status; 4175 } 4176 4177 /* 4178 * myri10ge_remove 4179 * 4180 * Does what is necessary to shutdown one Myrinet device. Called 4181 * once for each Myrinet card by the kernel when a module is 4182 * unloaded. 4183 */ 4184 static void myri10ge_remove(struct pci_dev *pdev) 4185 { 4186 struct myri10ge_priv *mgp; 4187 struct net_device *netdev; 4188 4189 mgp = pci_get_drvdata(pdev); 4190 if (mgp == NULL) 4191 return; 4192 4193 cancel_work_sync(&mgp->watchdog_work); 4194 netdev = mgp->dev; 4195 unregister_netdev(netdev); 4196 4197 #ifdef CONFIG_MYRI10GE_DCA 4198 myri10ge_teardown_dca(mgp); 4199 #endif 4200 myri10ge_dummy_rdma(mgp, 0); 4201 4202 /* avoid a memory leak */ 4203 pci_restore_state(pdev); 4204 4205 iounmap(mgp->sram); 4206 arch_phys_wc_del(mgp->wc_cookie); 4207 myri10ge_free_slices(mgp); 4208 kfree(mgp->msix_vectors); 4209 dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), 4210 mgp->cmd, mgp->cmd_bus); 4211 4212 set_fw_name(mgp, NULL, false); 4213 free_netdev(netdev); 4214 pci_disable_device(pdev); 4215 } 4216 4217 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E 0x0008 4218 #define PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9 0x0009 4219 4220 static const struct pci_device_id myri10ge_pci_tbl[] = { 4221 {PCI_DEVICE(PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E)}, 4222 {PCI_DEVICE 4223 (PCI_VENDOR_ID_MYRICOM, PCI_DEVICE_ID_MYRICOM_MYRI10GE_Z8E_9)}, 4224 {0}, 4225 }; 4226 4227 MODULE_DEVICE_TABLE(pci, myri10ge_pci_tbl); 4228 4229 static struct pci_driver myri10ge_driver = { 4230 .name = "myri10ge", 4231 .probe = myri10ge_probe, 4232 .remove = myri10ge_remove, 4233 .id_table = myri10ge_pci_tbl, 4234 #ifdef CONFIG_PM 4235 .suspend = myri10ge_suspend, 4236 .resume = myri10ge_resume, 4237 #endif 4238 }; 4239 4240 #ifdef CONFIG_MYRI10GE_DCA 4241 static int 4242 myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p) 4243 { 4244 int err = driver_for_each_device(&myri10ge_driver.driver, 4245 NULL, &event, 4246 myri10ge_notify_dca_device); 4247 4248 if (err) 4249 return NOTIFY_BAD; 4250 return NOTIFY_DONE; 4251 } 4252 4253 static struct notifier_block myri10ge_dca_notifier = { 4254 .notifier_call = myri10ge_notify_dca, 4255 .next = NULL, 4256 .priority = 0, 4257 }; 4258 #endif /* CONFIG_MYRI10GE_DCA */ 4259 4260 static __init int myri10ge_init_module(void) 4261 { 4262 pr_info("Version %s\n", MYRI10GE_VERSION_STR); 4263 4264 if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) { 4265 pr_err("Illegal rssh hash type %d, defaulting to source port\n", 4266 myri10ge_rss_hash); 4267 myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4268 } 4269 #ifdef CONFIG_MYRI10GE_DCA 4270 dca_register_notify(&myri10ge_dca_notifier); 4271 #endif 4272 if (myri10ge_max_slices > MYRI10GE_MAX_SLICES) 4273 myri10ge_max_slices = MYRI10GE_MAX_SLICES; 4274 4275 return pci_register_driver(&myri10ge_driver); 4276 } 4277 4278 module_init(myri10ge_init_module); 4279 4280 static __exit void myri10ge_cleanup_module(void) 4281 { 4282 #ifdef CONFIG_MYRI10GE_DCA 4283 dca_unregister_notify(&myri10ge_dca_notifier); 4284 #endif 4285 pci_unregister_driver(&myri10ge_driver); 4286 } 4287 4288 module_exit(myri10ge_cleanup_module); 4289