1 /**************************************************************************** 2 * Driver for Solarflare Solarstorm network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2011 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include <linux/module.h> 12 #include <linux/pci.h> 13 #include <linux/netdevice.h> 14 #include <linux/etherdevice.h> 15 #include <linux/delay.h> 16 #include <linux/notifier.h> 17 #include <linux/ip.h> 18 #include <linux/tcp.h> 19 #include <linux/in.h> 20 #include <linux/crc32.h> 21 #include <linux/ethtool.h> 22 #include <linux/topology.h> 23 #include <linux/gfp.h> 24 #include <linux/cpu_rmap.h> 25 #include "net_driver.h" 26 #include "efx.h" 27 #include "nic.h" 28 29 #include "mcdi.h" 30 #include "workarounds.h" 31 32 /************************************************************************** 33 * 34 * Type name strings 35 * 36 ************************************************************************** 37 */ 38 39 /* Loopback mode names (see LOOPBACK_MODE()) */ 40 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 41 const char *const efx_loopback_mode_names[] = { 42 [LOOPBACK_NONE] = "NONE", 43 [LOOPBACK_DATA] = "DATAPATH", 44 [LOOPBACK_GMAC] = "GMAC", 45 [LOOPBACK_XGMII] = "XGMII", 46 [LOOPBACK_XGXS] = "XGXS", 47 [LOOPBACK_XAUI] = "XAUI", 48 [LOOPBACK_GMII] = "GMII", 49 [LOOPBACK_SGMII] = "SGMII", 50 [LOOPBACK_XGBR] = "XGBR", 51 [LOOPBACK_XFI] = "XFI", 52 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 53 [LOOPBACK_GMII_FAR] = "GMII_FAR", 54 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 55 [LOOPBACK_XFI_FAR] = "XFI_FAR", 56 [LOOPBACK_GPHY] = "GPHY", 57 [LOOPBACK_PHYXS] = "PHYXS", 58 [LOOPBACK_PCS] = "PCS", 59 [LOOPBACK_PMAPMD] = "PMA/PMD", 60 [LOOPBACK_XPORT] = "XPORT", 61 [LOOPBACK_XGMII_WS] = "XGMII_WS", 62 [LOOPBACK_XAUI_WS] = "XAUI_WS", 63 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 64 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 65 [LOOPBACK_GMII_WS] = "GMII_WS", 66 [LOOPBACK_XFI_WS] = "XFI_WS", 67 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 68 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 69 }; 70 71 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 72 const char *const efx_reset_type_names[] = { 73 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 74 [RESET_TYPE_ALL] = "ALL", 75 [RESET_TYPE_WORLD] = "WORLD", 76 [RESET_TYPE_DISABLE] = "DISABLE", 77 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 78 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 79 [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", 80 [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", 81 [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", 82 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 83 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 84 }; 85 86 #define EFX_MAX_MTU (9 * 1024) 87 88 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 89 * queued onto this work queue. This is not a per-nic work queue, because 90 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 91 */ 92 static struct workqueue_struct *reset_workqueue; 93 94 /************************************************************************** 95 * 96 * Configurable values 97 * 98 *************************************************************************/ 99 100 /* 101 * Use separate channels for TX and RX events 102 * 103 * Set this to 1 to use separate channels for TX and RX. It allows us 104 * to control interrupt affinity separately for TX and RX. 105 * 106 * This is only used in MSI-X interrupt mode 107 */ 108 static unsigned int separate_tx_channels; 109 module_param(separate_tx_channels, uint, 0444); 110 MODULE_PARM_DESC(separate_tx_channels, 111 "Use separate channels for TX and RX"); 112 113 /* This is the weight assigned to each of the (per-channel) virtual 114 * NAPI devices. 115 */ 116 static int napi_weight = 64; 117 118 /* This is the time (in jiffies) between invocations of the hardware 119 * monitor. On Falcon-based NICs, this will: 120 * - Check the on-board hardware monitor; 121 * - Poll the link state and reconfigure the hardware as necessary. 122 */ 123 static unsigned int efx_monitor_interval = 1 * HZ; 124 125 /* Initial interrupt moderation settings. They can be modified after 126 * module load with ethtool. 127 * 128 * The default for RX should strike a balance between increasing the 129 * round-trip latency and reducing overhead. 130 */ 131 static unsigned int rx_irq_mod_usec = 60; 132 133 /* Initial interrupt moderation settings. They can be modified after 134 * module load with ethtool. 135 * 136 * This default is chosen to ensure that a 10G link does not go idle 137 * while a TX queue is stopped after it has become full. A queue is 138 * restarted when it drops below half full. The time this takes (assuming 139 * worst case 3 descriptors per packet and 1024 descriptors) is 140 * 512 / 3 * 1.2 = 205 usec. 141 */ 142 static unsigned int tx_irq_mod_usec = 150; 143 144 /* This is the first interrupt mode to try out of: 145 * 0 => MSI-X 146 * 1 => MSI 147 * 2 => legacy 148 */ 149 static unsigned int interrupt_mode; 150 151 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), 152 * i.e. the number of CPUs among which we may distribute simultaneous 153 * interrupt handling. 154 * 155 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. 156 * The default (0) means to assign an interrupt to each core. 157 */ 158 static unsigned int rss_cpus; 159 module_param(rss_cpus, uint, 0444); 160 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); 161 162 static int phy_flash_cfg; 163 module_param(phy_flash_cfg, int, 0644); 164 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); 165 166 static unsigned irq_adapt_low_thresh = 10000; 167 module_param(irq_adapt_low_thresh, uint, 0644); 168 MODULE_PARM_DESC(irq_adapt_low_thresh, 169 "Threshold score for reducing IRQ moderation"); 170 171 static unsigned irq_adapt_high_thresh = 20000; 172 module_param(irq_adapt_high_thresh, uint, 0644); 173 MODULE_PARM_DESC(irq_adapt_high_thresh, 174 "Threshold score for increasing IRQ moderation"); 175 176 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 177 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 178 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 179 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 180 module_param(debug, uint, 0); 181 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 182 183 /************************************************************************** 184 * 185 * Utility functions and prototypes 186 * 187 *************************************************************************/ 188 189 static void efx_remove_channels(struct efx_nic *efx); 190 static void efx_remove_port(struct efx_nic *efx); 191 static void efx_init_napi(struct efx_nic *efx); 192 static void efx_fini_napi(struct efx_nic *efx); 193 static void efx_fini_napi_channel(struct efx_channel *channel); 194 static void efx_fini_struct(struct efx_nic *efx); 195 static void efx_start_all(struct efx_nic *efx); 196 static void efx_stop_all(struct efx_nic *efx); 197 198 #define EFX_ASSERT_RESET_SERIALISED(efx) \ 199 do { \ 200 if ((efx->state == STATE_RUNNING) || \ 201 (efx->state == STATE_DISABLED)) \ 202 ASSERT_RTNL(); \ 203 } while (0) 204 205 /************************************************************************** 206 * 207 * Event queue processing 208 * 209 *************************************************************************/ 210 211 /* Process channel's event queue 212 * 213 * This function is responsible for processing the event queue of a 214 * single channel. The caller must guarantee that this function will 215 * never be concurrently called more than once on the same channel, 216 * though different channels may be being processed concurrently. 217 */ 218 static int efx_process_channel(struct efx_channel *channel, int budget) 219 { 220 struct efx_nic *efx = channel->efx; 221 int spent; 222 223 if (unlikely(efx->reset_pending || !channel->enabled)) 224 return 0; 225 226 spent = efx_nic_process_eventq(channel, budget); 227 if (spent == 0) 228 return 0; 229 230 /* Deliver last RX packet. */ 231 if (channel->rx_pkt) { 232 __efx_rx_packet(channel, channel->rx_pkt); 233 channel->rx_pkt = NULL; 234 } 235 236 efx_rx_strategy(channel); 237 238 efx_fast_push_rx_descriptors(efx_channel_get_rx_queue(channel)); 239 240 return spent; 241 } 242 243 /* Mark channel as finished processing 244 * 245 * Note that since we will not receive further interrupts for this 246 * channel before we finish processing and call the eventq_read_ack() 247 * method, there is no need to use the interrupt hold-off timers. 248 */ 249 static inline void efx_channel_processed(struct efx_channel *channel) 250 { 251 /* The interrupt handler for this channel may set work_pending 252 * as soon as we acknowledge the events we've seen. Make sure 253 * it's cleared before then. */ 254 channel->work_pending = false; 255 smp_wmb(); 256 257 efx_nic_eventq_read_ack(channel); 258 } 259 260 /* NAPI poll handler 261 * 262 * NAPI guarantees serialisation of polls of the same device, which 263 * provides the guarantee required by efx_process_channel(). 264 */ 265 static int efx_poll(struct napi_struct *napi, int budget) 266 { 267 struct efx_channel *channel = 268 container_of(napi, struct efx_channel, napi_str); 269 struct efx_nic *efx = channel->efx; 270 int spent; 271 272 netif_vdbg(efx, intr, efx->net_dev, 273 "channel %d NAPI poll executing on CPU %d\n", 274 channel->channel, raw_smp_processor_id()); 275 276 spent = efx_process_channel(channel, budget); 277 278 if (spent < budget) { 279 if (channel->channel < efx->n_rx_channels && 280 efx->irq_rx_adaptive && 281 unlikely(++channel->irq_count == 1000)) { 282 if (unlikely(channel->irq_mod_score < 283 irq_adapt_low_thresh)) { 284 if (channel->irq_moderation > 1) { 285 channel->irq_moderation -= 1; 286 efx->type->push_irq_moderation(channel); 287 } 288 } else if (unlikely(channel->irq_mod_score > 289 irq_adapt_high_thresh)) { 290 if (channel->irq_moderation < 291 efx->irq_rx_moderation) { 292 channel->irq_moderation += 1; 293 efx->type->push_irq_moderation(channel); 294 } 295 } 296 channel->irq_count = 0; 297 channel->irq_mod_score = 0; 298 } 299 300 efx_filter_rfs_expire(channel); 301 302 /* There is no race here; although napi_disable() will 303 * only wait for napi_complete(), this isn't a problem 304 * since efx_channel_processed() will have no effect if 305 * interrupts have already been disabled. 306 */ 307 napi_complete(napi); 308 efx_channel_processed(channel); 309 } 310 311 return spent; 312 } 313 314 /* Process the eventq of the specified channel immediately on this CPU 315 * 316 * Disable hardware generated interrupts, wait for any existing 317 * processing to finish, then directly poll (and ack ) the eventq. 318 * Finally reenable NAPI and interrupts. 319 * 320 * This is for use only during a loopback self-test. It must not 321 * deliver any packets up the stack as this can result in deadlock. 322 */ 323 void efx_process_channel_now(struct efx_channel *channel) 324 { 325 struct efx_nic *efx = channel->efx; 326 327 BUG_ON(channel->channel >= efx->n_channels); 328 BUG_ON(!channel->enabled); 329 BUG_ON(!efx->loopback_selftest); 330 331 /* Disable interrupts and wait for ISRs to complete */ 332 efx_nic_disable_interrupts(efx); 333 if (efx->legacy_irq) { 334 synchronize_irq(efx->legacy_irq); 335 efx->legacy_irq_enabled = false; 336 } 337 if (channel->irq) 338 synchronize_irq(channel->irq); 339 340 /* Wait for any NAPI processing to complete */ 341 napi_disable(&channel->napi_str); 342 343 /* Poll the channel */ 344 efx_process_channel(channel, channel->eventq_mask + 1); 345 346 /* Ack the eventq. This may cause an interrupt to be generated 347 * when they are reenabled */ 348 efx_channel_processed(channel); 349 350 napi_enable(&channel->napi_str); 351 if (efx->legacy_irq) 352 efx->legacy_irq_enabled = true; 353 efx_nic_enable_interrupts(efx); 354 } 355 356 /* Create event queue 357 * Event queue memory allocations are done only once. If the channel 358 * is reset, the memory buffer will be reused; this guards against 359 * errors during channel reset and also simplifies interrupt handling. 360 */ 361 static int efx_probe_eventq(struct efx_channel *channel) 362 { 363 struct efx_nic *efx = channel->efx; 364 unsigned long entries; 365 366 netif_dbg(efx, probe, efx->net_dev, 367 "chan %d create event queue\n", channel->channel); 368 369 /* Build an event queue with room for one event per tx and rx buffer, 370 * plus some extra for link state events and MCDI completions. */ 371 entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); 372 EFX_BUG_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); 373 channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; 374 375 return efx_nic_probe_eventq(channel); 376 } 377 378 /* Prepare channel's event queue */ 379 static void efx_init_eventq(struct efx_channel *channel) 380 { 381 netif_dbg(channel->efx, drv, channel->efx->net_dev, 382 "chan %d init event queue\n", channel->channel); 383 384 channel->eventq_read_ptr = 0; 385 386 efx_nic_init_eventq(channel); 387 } 388 389 static void efx_fini_eventq(struct efx_channel *channel) 390 { 391 netif_dbg(channel->efx, drv, channel->efx->net_dev, 392 "chan %d fini event queue\n", channel->channel); 393 394 efx_nic_fini_eventq(channel); 395 } 396 397 static void efx_remove_eventq(struct efx_channel *channel) 398 { 399 netif_dbg(channel->efx, drv, channel->efx->net_dev, 400 "chan %d remove event queue\n", channel->channel); 401 402 efx_nic_remove_eventq(channel); 403 } 404 405 /************************************************************************** 406 * 407 * Channel handling 408 * 409 *************************************************************************/ 410 411 /* Allocate and initialise a channel structure, optionally copying 412 * parameters (but not resources) from an old channel structure. */ 413 static struct efx_channel * 414 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) 415 { 416 struct efx_channel *channel; 417 struct efx_rx_queue *rx_queue; 418 struct efx_tx_queue *tx_queue; 419 int j; 420 421 if (old_channel) { 422 channel = kmalloc(sizeof(*channel), GFP_KERNEL); 423 if (!channel) 424 return NULL; 425 426 *channel = *old_channel; 427 428 channel->napi_dev = NULL; 429 memset(&channel->eventq, 0, sizeof(channel->eventq)); 430 431 rx_queue = &channel->rx_queue; 432 rx_queue->buffer = NULL; 433 memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 434 435 for (j = 0; j < EFX_TXQ_TYPES; j++) { 436 tx_queue = &channel->tx_queue[j]; 437 if (tx_queue->channel) 438 tx_queue->channel = channel; 439 tx_queue->buffer = NULL; 440 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 441 } 442 } else { 443 channel = kzalloc(sizeof(*channel), GFP_KERNEL); 444 if (!channel) 445 return NULL; 446 447 channel->efx = efx; 448 channel->channel = i; 449 450 for (j = 0; j < EFX_TXQ_TYPES; j++) { 451 tx_queue = &channel->tx_queue[j]; 452 tx_queue->efx = efx; 453 tx_queue->queue = i * EFX_TXQ_TYPES + j; 454 tx_queue->channel = channel; 455 } 456 } 457 458 rx_queue = &channel->rx_queue; 459 rx_queue->efx = efx; 460 setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, 461 (unsigned long)rx_queue); 462 463 return channel; 464 } 465 466 static int efx_probe_channel(struct efx_channel *channel) 467 { 468 struct efx_tx_queue *tx_queue; 469 struct efx_rx_queue *rx_queue; 470 int rc; 471 472 netif_dbg(channel->efx, probe, channel->efx->net_dev, 473 "creating channel %d\n", channel->channel); 474 475 rc = efx_probe_eventq(channel); 476 if (rc) 477 goto fail1; 478 479 efx_for_each_channel_tx_queue(tx_queue, channel) { 480 rc = efx_probe_tx_queue(tx_queue); 481 if (rc) 482 goto fail2; 483 } 484 485 efx_for_each_channel_rx_queue(rx_queue, channel) { 486 rc = efx_probe_rx_queue(rx_queue); 487 if (rc) 488 goto fail3; 489 } 490 491 channel->n_rx_frm_trunc = 0; 492 493 return 0; 494 495 fail3: 496 efx_for_each_channel_rx_queue(rx_queue, channel) 497 efx_remove_rx_queue(rx_queue); 498 fail2: 499 efx_for_each_channel_tx_queue(tx_queue, channel) 500 efx_remove_tx_queue(tx_queue); 501 fail1: 502 return rc; 503 } 504 505 506 static void efx_set_channel_names(struct efx_nic *efx) 507 { 508 struct efx_channel *channel; 509 const char *type = ""; 510 int number; 511 512 efx_for_each_channel(channel, efx) { 513 number = channel->channel; 514 if (efx->n_channels > efx->n_rx_channels) { 515 if (channel->channel < efx->n_rx_channels) { 516 type = "-rx"; 517 } else { 518 type = "-tx"; 519 number -= efx->n_rx_channels; 520 } 521 } 522 snprintf(efx->channel_name[channel->channel], 523 sizeof(efx->channel_name[0]), 524 "%s%s-%d", efx->name, type, number); 525 } 526 } 527 528 static int efx_probe_channels(struct efx_nic *efx) 529 { 530 struct efx_channel *channel; 531 int rc; 532 533 /* Restart special buffer allocation */ 534 efx->next_buffer_table = 0; 535 536 efx_for_each_channel(channel, efx) { 537 rc = efx_probe_channel(channel); 538 if (rc) { 539 netif_err(efx, probe, efx->net_dev, 540 "failed to create channel %d\n", 541 channel->channel); 542 goto fail; 543 } 544 } 545 efx_set_channel_names(efx); 546 547 return 0; 548 549 fail: 550 efx_remove_channels(efx); 551 return rc; 552 } 553 554 /* Channels are shutdown and reinitialised whilst the NIC is running 555 * to propagate configuration changes (mtu, checksum offload), or 556 * to clear hardware error conditions 557 */ 558 static void efx_init_channels(struct efx_nic *efx) 559 { 560 struct efx_tx_queue *tx_queue; 561 struct efx_rx_queue *rx_queue; 562 struct efx_channel *channel; 563 564 /* Calculate the rx buffer allocation parameters required to 565 * support the current MTU, including padding for header 566 * alignment and overruns. 567 */ 568 efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + 569 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 570 efx->type->rx_buffer_hash_size + 571 efx->type->rx_buffer_padding); 572 efx->rx_buffer_order = get_order(efx->rx_buffer_len + 573 sizeof(struct efx_rx_page_state)); 574 575 /* Initialise the channels */ 576 efx_for_each_channel(channel, efx) { 577 netif_dbg(channel->efx, drv, channel->efx->net_dev, 578 "init chan %d\n", channel->channel); 579 580 efx_init_eventq(channel); 581 582 efx_for_each_channel_tx_queue(tx_queue, channel) 583 efx_init_tx_queue(tx_queue); 584 585 /* The rx buffer allocation strategy is MTU dependent */ 586 efx_rx_strategy(channel); 587 588 efx_for_each_channel_rx_queue(rx_queue, channel) 589 efx_init_rx_queue(rx_queue); 590 591 WARN_ON(channel->rx_pkt != NULL); 592 efx_rx_strategy(channel); 593 } 594 } 595 596 /* This enables event queue processing and packet transmission. 597 * 598 * Note that this function is not allowed to fail, since that would 599 * introduce too much complexity into the suspend/resume path. 600 */ 601 static void efx_start_channel(struct efx_channel *channel) 602 { 603 struct efx_rx_queue *rx_queue; 604 605 netif_dbg(channel->efx, ifup, channel->efx->net_dev, 606 "starting chan %d\n", channel->channel); 607 608 /* The interrupt handler for this channel may set work_pending 609 * as soon as we enable it. Make sure it's cleared before 610 * then. Similarly, make sure it sees the enabled flag set. */ 611 channel->work_pending = false; 612 channel->enabled = true; 613 smp_wmb(); 614 615 /* Fill the queues before enabling NAPI */ 616 efx_for_each_channel_rx_queue(rx_queue, channel) 617 efx_fast_push_rx_descriptors(rx_queue); 618 619 napi_enable(&channel->napi_str); 620 } 621 622 /* This disables event queue processing and packet transmission. 623 * This function does not guarantee that all queue processing 624 * (e.g. RX refill) is complete. 625 */ 626 static void efx_stop_channel(struct efx_channel *channel) 627 { 628 if (!channel->enabled) 629 return; 630 631 netif_dbg(channel->efx, ifdown, channel->efx->net_dev, 632 "stop chan %d\n", channel->channel); 633 634 channel->enabled = false; 635 napi_disable(&channel->napi_str); 636 } 637 638 static void efx_fini_channels(struct efx_nic *efx) 639 { 640 struct efx_channel *channel; 641 struct efx_tx_queue *tx_queue; 642 struct efx_rx_queue *rx_queue; 643 int rc; 644 645 EFX_ASSERT_RESET_SERIALISED(efx); 646 BUG_ON(efx->port_enabled); 647 648 rc = efx_nic_flush_queues(efx); 649 if (rc && EFX_WORKAROUND_7803(efx)) { 650 /* Schedule a reset to recover from the flush failure. The 651 * descriptor caches reference memory we're about to free, 652 * but falcon_reconfigure_mac_wrapper() won't reconnect 653 * the MACs because of the pending reset. */ 654 netif_err(efx, drv, efx->net_dev, 655 "Resetting to recover from flush failure\n"); 656 efx_schedule_reset(efx, RESET_TYPE_ALL); 657 } else if (rc) { 658 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); 659 } else { 660 netif_dbg(efx, drv, efx->net_dev, 661 "successfully flushed all queues\n"); 662 } 663 664 efx_for_each_channel(channel, efx) { 665 netif_dbg(channel->efx, drv, channel->efx->net_dev, 666 "shut down chan %d\n", channel->channel); 667 668 efx_for_each_channel_rx_queue(rx_queue, channel) 669 efx_fini_rx_queue(rx_queue); 670 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 671 efx_fini_tx_queue(tx_queue); 672 efx_fini_eventq(channel); 673 } 674 } 675 676 static void efx_remove_channel(struct efx_channel *channel) 677 { 678 struct efx_tx_queue *tx_queue; 679 struct efx_rx_queue *rx_queue; 680 681 netif_dbg(channel->efx, drv, channel->efx->net_dev, 682 "destroy chan %d\n", channel->channel); 683 684 efx_for_each_channel_rx_queue(rx_queue, channel) 685 efx_remove_rx_queue(rx_queue); 686 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 687 efx_remove_tx_queue(tx_queue); 688 efx_remove_eventq(channel); 689 } 690 691 static void efx_remove_channels(struct efx_nic *efx) 692 { 693 struct efx_channel *channel; 694 695 efx_for_each_channel(channel, efx) 696 efx_remove_channel(channel); 697 } 698 699 int 700 efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) 701 { 702 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 703 u32 old_rxq_entries, old_txq_entries; 704 unsigned i; 705 int rc; 706 707 efx_stop_all(efx); 708 efx_fini_channels(efx); 709 710 /* Clone channels */ 711 memset(other_channel, 0, sizeof(other_channel)); 712 for (i = 0; i < efx->n_channels; i++) { 713 channel = efx_alloc_channel(efx, i, efx->channel[i]); 714 if (!channel) { 715 rc = -ENOMEM; 716 goto out; 717 } 718 other_channel[i] = channel; 719 } 720 721 /* Swap entry counts and channel pointers */ 722 old_rxq_entries = efx->rxq_entries; 723 old_txq_entries = efx->txq_entries; 724 efx->rxq_entries = rxq_entries; 725 efx->txq_entries = txq_entries; 726 for (i = 0; i < efx->n_channels; i++) { 727 channel = efx->channel[i]; 728 efx->channel[i] = other_channel[i]; 729 other_channel[i] = channel; 730 } 731 732 rc = efx_probe_channels(efx); 733 if (rc) 734 goto rollback; 735 736 efx_init_napi(efx); 737 738 /* Destroy old channels */ 739 for (i = 0; i < efx->n_channels; i++) { 740 efx_fini_napi_channel(other_channel[i]); 741 efx_remove_channel(other_channel[i]); 742 } 743 out: 744 /* Free unused channel structures */ 745 for (i = 0; i < efx->n_channels; i++) 746 kfree(other_channel[i]); 747 748 efx_init_channels(efx); 749 efx_start_all(efx); 750 return rc; 751 752 rollback: 753 /* Swap back */ 754 efx->rxq_entries = old_rxq_entries; 755 efx->txq_entries = old_txq_entries; 756 for (i = 0; i < efx->n_channels; i++) { 757 channel = efx->channel[i]; 758 efx->channel[i] = other_channel[i]; 759 other_channel[i] = channel; 760 } 761 goto out; 762 } 763 764 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 765 { 766 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); 767 } 768 769 /************************************************************************** 770 * 771 * Port handling 772 * 773 **************************************************************************/ 774 775 /* This ensures that the kernel is kept informed (via 776 * netif_carrier_on/off) of the link status, and also maintains the 777 * link status's stop on the port's TX queue. 778 */ 779 void efx_link_status_changed(struct efx_nic *efx) 780 { 781 struct efx_link_state *link_state = &efx->link_state; 782 783 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 784 * that no events are triggered between unregister_netdev() and the 785 * driver unloading. A more general condition is that NETDEV_CHANGE 786 * can only be generated between NETDEV_UP and NETDEV_DOWN */ 787 if (!netif_running(efx->net_dev)) 788 return; 789 790 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 791 efx->n_link_state_changes++; 792 793 if (link_state->up) 794 netif_carrier_on(efx->net_dev); 795 else 796 netif_carrier_off(efx->net_dev); 797 } 798 799 /* Status message for kernel log */ 800 if (link_state->up) 801 netif_info(efx, link, efx->net_dev, 802 "link up at %uMbps %s-duplex (MTU %d)%s\n", 803 link_state->speed, link_state->fd ? "full" : "half", 804 efx->net_dev->mtu, 805 (efx->promiscuous ? " [PROMISC]" : "")); 806 else 807 netif_info(efx, link, efx->net_dev, "link down\n"); 808 } 809 810 void efx_link_set_advertising(struct efx_nic *efx, u32 advertising) 811 { 812 efx->link_advertising = advertising; 813 if (advertising) { 814 if (advertising & ADVERTISED_Pause) 815 efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); 816 else 817 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 818 if (advertising & ADVERTISED_Asym_Pause) 819 efx->wanted_fc ^= EFX_FC_TX; 820 } 821 } 822 823 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 824 { 825 efx->wanted_fc = wanted_fc; 826 if (efx->link_advertising) { 827 if (wanted_fc & EFX_FC_RX) 828 efx->link_advertising |= (ADVERTISED_Pause | 829 ADVERTISED_Asym_Pause); 830 else 831 efx->link_advertising &= ~(ADVERTISED_Pause | 832 ADVERTISED_Asym_Pause); 833 if (wanted_fc & EFX_FC_TX) 834 efx->link_advertising ^= ADVERTISED_Asym_Pause; 835 } 836 } 837 838 static void efx_fini_port(struct efx_nic *efx); 839 840 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 841 * the MAC appropriately. All other PHY configuration changes are pushed 842 * through phy_op->set_settings(), and pushed asynchronously to the MAC 843 * through efx_monitor(). 844 * 845 * Callers must hold the mac_lock 846 */ 847 int __efx_reconfigure_port(struct efx_nic *efx) 848 { 849 enum efx_phy_mode phy_mode; 850 int rc; 851 852 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 853 854 /* Serialise the promiscuous flag with efx_set_rx_mode. */ 855 netif_addr_lock_bh(efx->net_dev); 856 netif_addr_unlock_bh(efx->net_dev); 857 858 /* Disable PHY transmit in mac level loopbacks */ 859 phy_mode = efx->phy_mode; 860 if (LOOPBACK_INTERNAL(efx)) 861 efx->phy_mode |= PHY_MODE_TX_DISABLED; 862 else 863 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 864 865 rc = efx->type->reconfigure_port(efx); 866 867 if (rc) 868 efx->phy_mode = phy_mode; 869 870 return rc; 871 } 872 873 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 874 * disabled. */ 875 int efx_reconfigure_port(struct efx_nic *efx) 876 { 877 int rc; 878 879 EFX_ASSERT_RESET_SERIALISED(efx); 880 881 mutex_lock(&efx->mac_lock); 882 rc = __efx_reconfigure_port(efx); 883 mutex_unlock(&efx->mac_lock); 884 885 return rc; 886 } 887 888 /* Asynchronous work item for changing MAC promiscuity and multicast 889 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 890 * MAC directly. */ 891 static void efx_mac_work(struct work_struct *data) 892 { 893 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 894 895 mutex_lock(&efx->mac_lock); 896 if (efx->port_enabled) 897 efx->type->reconfigure_mac(efx); 898 mutex_unlock(&efx->mac_lock); 899 } 900 901 static int efx_probe_port(struct efx_nic *efx) 902 { 903 int rc; 904 905 netif_dbg(efx, probe, efx->net_dev, "create port\n"); 906 907 if (phy_flash_cfg) 908 efx->phy_mode = PHY_MODE_SPECIAL; 909 910 /* Connect up MAC/PHY operations table */ 911 rc = efx->type->probe_port(efx); 912 if (rc) 913 return rc; 914 915 /* Initialise MAC address to permanent address */ 916 memcpy(efx->net_dev->dev_addr, efx->net_dev->perm_addr, ETH_ALEN); 917 918 return 0; 919 } 920 921 static int efx_init_port(struct efx_nic *efx) 922 { 923 int rc; 924 925 netif_dbg(efx, drv, efx->net_dev, "init port\n"); 926 927 mutex_lock(&efx->mac_lock); 928 929 rc = efx->phy_op->init(efx); 930 if (rc) 931 goto fail1; 932 933 efx->port_initialized = true; 934 935 /* Reconfigure the MAC before creating dma queues (required for 936 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 937 efx->type->reconfigure_mac(efx); 938 939 /* Ensure the PHY advertises the correct flow control settings */ 940 rc = efx->phy_op->reconfigure(efx); 941 if (rc) 942 goto fail2; 943 944 mutex_unlock(&efx->mac_lock); 945 return 0; 946 947 fail2: 948 efx->phy_op->fini(efx); 949 fail1: 950 mutex_unlock(&efx->mac_lock); 951 return rc; 952 } 953 954 static void efx_start_port(struct efx_nic *efx) 955 { 956 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 957 BUG_ON(efx->port_enabled); 958 959 mutex_lock(&efx->mac_lock); 960 efx->port_enabled = true; 961 962 /* efx_mac_work() might have been scheduled after efx_stop_port(), 963 * and then cancelled by efx_flush_all() */ 964 efx->type->reconfigure_mac(efx); 965 966 mutex_unlock(&efx->mac_lock); 967 } 968 969 /* Prevent efx_mac_work() and efx_monitor() from working */ 970 static void efx_stop_port(struct efx_nic *efx) 971 { 972 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 973 974 mutex_lock(&efx->mac_lock); 975 efx->port_enabled = false; 976 mutex_unlock(&efx->mac_lock); 977 978 /* Serialise against efx_set_multicast_list() */ 979 netif_addr_lock_bh(efx->net_dev); 980 netif_addr_unlock_bh(efx->net_dev); 981 } 982 983 static void efx_fini_port(struct efx_nic *efx) 984 { 985 netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); 986 987 if (!efx->port_initialized) 988 return; 989 990 efx->phy_op->fini(efx); 991 efx->port_initialized = false; 992 993 efx->link_state.up = false; 994 efx_link_status_changed(efx); 995 } 996 997 static void efx_remove_port(struct efx_nic *efx) 998 { 999 netif_dbg(efx, drv, efx->net_dev, "destroying port\n"); 1000 1001 efx->type->remove_port(efx); 1002 } 1003 1004 /************************************************************************** 1005 * 1006 * NIC handling 1007 * 1008 **************************************************************************/ 1009 1010 /* This configures the PCI device to enable I/O and DMA. */ 1011 static int efx_init_io(struct efx_nic *efx) 1012 { 1013 struct pci_dev *pci_dev = efx->pci_dev; 1014 dma_addr_t dma_mask = efx->type->max_dma_mask; 1015 int rc; 1016 1017 netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); 1018 1019 rc = pci_enable_device(pci_dev); 1020 if (rc) { 1021 netif_err(efx, probe, efx->net_dev, 1022 "failed to enable PCI device\n"); 1023 goto fail1; 1024 } 1025 1026 pci_set_master(pci_dev); 1027 1028 /* Set the PCI DMA mask. Try all possibilities from our 1029 * genuine mask down to 32 bits, because some architectures 1030 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit 1031 * masks event though they reject 46 bit masks. 1032 */ 1033 while (dma_mask > 0x7fffffffUL) { 1034 if (pci_dma_supported(pci_dev, dma_mask)) { 1035 rc = pci_set_dma_mask(pci_dev, dma_mask); 1036 if (rc == 0) 1037 break; 1038 } 1039 dma_mask >>= 1; 1040 } 1041 if (rc) { 1042 netif_err(efx, probe, efx->net_dev, 1043 "could not find a suitable DMA mask\n"); 1044 goto fail2; 1045 } 1046 netif_dbg(efx, probe, efx->net_dev, 1047 "using DMA mask %llx\n", (unsigned long long) dma_mask); 1048 rc = pci_set_consistent_dma_mask(pci_dev, dma_mask); 1049 if (rc) { 1050 /* pci_set_consistent_dma_mask() is not *allowed* to 1051 * fail with a mask that pci_set_dma_mask() accepted, 1052 * but just in case... 1053 */ 1054 netif_err(efx, probe, efx->net_dev, 1055 "failed to set consistent DMA mask\n"); 1056 goto fail2; 1057 } 1058 1059 efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR); 1060 rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc"); 1061 if (rc) { 1062 netif_err(efx, probe, efx->net_dev, 1063 "request for memory BAR failed\n"); 1064 rc = -EIO; 1065 goto fail3; 1066 } 1067 efx->membase = ioremap_nocache(efx->membase_phys, 1068 efx->type->mem_map_size); 1069 if (!efx->membase) { 1070 netif_err(efx, probe, efx->net_dev, 1071 "could not map memory BAR at %llx+%x\n", 1072 (unsigned long long)efx->membase_phys, 1073 efx->type->mem_map_size); 1074 rc = -ENOMEM; 1075 goto fail4; 1076 } 1077 netif_dbg(efx, probe, efx->net_dev, 1078 "memory BAR at %llx+%x (virtual %p)\n", 1079 (unsigned long long)efx->membase_phys, 1080 efx->type->mem_map_size, efx->membase); 1081 1082 return 0; 1083 1084 fail4: 1085 pci_release_region(efx->pci_dev, EFX_MEM_BAR); 1086 fail3: 1087 efx->membase_phys = 0; 1088 fail2: 1089 pci_disable_device(efx->pci_dev); 1090 fail1: 1091 return rc; 1092 } 1093 1094 static void efx_fini_io(struct efx_nic *efx) 1095 { 1096 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1097 1098 if (efx->membase) { 1099 iounmap(efx->membase); 1100 efx->membase = NULL; 1101 } 1102 1103 if (efx->membase_phys) { 1104 pci_release_region(efx->pci_dev, EFX_MEM_BAR); 1105 efx->membase_phys = 0; 1106 } 1107 1108 pci_disable_device(efx->pci_dev); 1109 } 1110 1111 static int efx_wanted_parallelism(void) 1112 { 1113 cpumask_var_t thread_mask; 1114 int count; 1115 int cpu; 1116 1117 if (rss_cpus) 1118 return rss_cpus; 1119 1120 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1121 printk(KERN_WARNING 1122 "sfc: RSS disabled due to allocation failure\n"); 1123 return 1; 1124 } 1125 1126 count = 0; 1127 for_each_online_cpu(cpu) { 1128 if (!cpumask_test_cpu(cpu, thread_mask)) { 1129 ++count; 1130 cpumask_or(thread_mask, thread_mask, 1131 topology_thread_cpumask(cpu)); 1132 } 1133 } 1134 1135 free_cpumask_var(thread_mask); 1136 return count; 1137 } 1138 1139 static int 1140 efx_init_rx_cpu_rmap(struct efx_nic *efx, struct msix_entry *xentries) 1141 { 1142 #ifdef CONFIG_RFS_ACCEL 1143 int i, rc; 1144 1145 efx->net_dev->rx_cpu_rmap = alloc_irq_cpu_rmap(efx->n_rx_channels); 1146 if (!efx->net_dev->rx_cpu_rmap) 1147 return -ENOMEM; 1148 for (i = 0; i < efx->n_rx_channels; i++) { 1149 rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap, 1150 xentries[i].vector); 1151 if (rc) { 1152 free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); 1153 efx->net_dev->rx_cpu_rmap = NULL; 1154 return rc; 1155 } 1156 } 1157 #endif 1158 return 0; 1159 } 1160 1161 /* Probe the number and type of interrupts we are able to obtain, and 1162 * the resulting numbers of channels and RX queues. 1163 */ 1164 static int efx_probe_interrupts(struct efx_nic *efx) 1165 { 1166 int max_channels = 1167 min_t(int, efx->type->phys_addr_channels, EFX_MAX_CHANNELS); 1168 int rc, i; 1169 1170 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1171 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1172 int n_channels; 1173 1174 n_channels = efx_wanted_parallelism(); 1175 if (separate_tx_channels) 1176 n_channels *= 2; 1177 n_channels = min(n_channels, max_channels); 1178 1179 for (i = 0; i < n_channels; i++) 1180 xentries[i].entry = i; 1181 rc = pci_enable_msix(efx->pci_dev, xentries, n_channels); 1182 if (rc > 0) { 1183 netif_err(efx, drv, efx->net_dev, 1184 "WARNING: Insufficient MSI-X vectors" 1185 " available (%d < %d).\n", rc, n_channels); 1186 netif_err(efx, drv, efx->net_dev, 1187 "WARNING: Performance may be reduced.\n"); 1188 EFX_BUG_ON_PARANOID(rc >= n_channels); 1189 n_channels = rc; 1190 rc = pci_enable_msix(efx->pci_dev, xentries, 1191 n_channels); 1192 } 1193 1194 if (rc == 0) { 1195 efx->n_channels = n_channels; 1196 if (separate_tx_channels) { 1197 efx->n_tx_channels = 1198 max(efx->n_channels / 2, 1U); 1199 efx->n_rx_channels = 1200 max(efx->n_channels - 1201 efx->n_tx_channels, 1U); 1202 } else { 1203 efx->n_tx_channels = efx->n_channels; 1204 efx->n_rx_channels = efx->n_channels; 1205 } 1206 rc = efx_init_rx_cpu_rmap(efx, xentries); 1207 if (rc) { 1208 pci_disable_msix(efx->pci_dev); 1209 return rc; 1210 } 1211 for (i = 0; i < n_channels; i++) 1212 efx_get_channel(efx, i)->irq = 1213 xentries[i].vector; 1214 } else { 1215 /* Fall back to single channel MSI */ 1216 efx->interrupt_mode = EFX_INT_MODE_MSI; 1217 netif_err(efx, drv, efx->net_dev, 1218 "could not enable MSI-X\n"); 1219 } 1220 } 1221 1222 /* Try single interrupt MSI */ 1223 if (efx->interrupt_mode == EFX_INT_MODE_MSI) { 1224 efx->n_channels = 1; 1225 efx->n_rx_channels = 1; 1226 efx->n_tx_channels = 1; 1227 rc = pci_enable_msi(efx->pci_dev); 1228 if (rc == 0) { 1229 efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; 1230 } else { 1231 netif_err(efx, drv, efx->net_dev, 1232 "could not enable MSI\n"); 1233 efx->interrupt_mode = EFX_INT_MODE_LEGACY; 1234 } 1235 } 1236 1237 /* Assume legacy interrupts */ 1238 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { 1239 efx->n_channels = 1 + (separate_tx_channels ? 1 : 0); 1240 efx->n_rx_channels = 1; 1241 efx->n_tx_channels = 1; 1242 efx->legacy_irq = efx->pci_dev->irq; 1243 } 1244 1245 return 0; 1246 } 1247 1248 static void efx_remove_interrupts(struct efx_nic *efx) 1249 { 1250 struct efx_channel *channel; 1251 1252 /* Remove MSI/MSI-X interrupts */ 1253 efx_for_each_channel(channel, efx) 1254 channel->irq = 0; 1255 pci_disable_msi(efx->pci_dev); 1256 pci_disable_msix(efx->pci_dev); 1257 1258 /* Remove legacy interrupt */ 1259 efx->legacy_irq = 0; 1260 } 1261 1262 static void efx_set_channels(struct efx_nic *efx) 1263 { 1264 struct efx_channel *channel; 1265 struct efx_tx_queue *tx_queue; 1266 1267 efx->tx_channel_offset = 1268 separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; 1269 1270 /* We need to adjust the TX queue numbers if we have separate 1271 * RX-only and TX-only channels. 1272 */ 1273 efx_for_each_channel(channel, efx) { 1274 efx_for_each_channel_tx_queue(tx_queue, channel) 1275 tx_queue->queue -= (efx->tx_channel_offset * 1276 EFX_TXQ_TYPES); 1277 } 1278 } 1279 1280 static int efx_probe_nic(struct efx_nic *efx) 1281 { 1282 size_t i; 1283 int rc; 1284 1285 netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); 1286 1287 /* Carry out hardware-type specific initialisation */ 1288 rc = efx->type->probe(efx); 1289 if (rc) 1290 return rc; 1291 1292 /* Determine the number of channels and queues by trying to hook 1293 * in MSI-X interrupts. */ 1294 rc = efx_probe_interrupts(efx); 1295 if (rc) 1296 goto fail; 1297 1298 if (efx->n_channels > 1) 1299 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); 1300 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) 1301 efx->rx_indir_table[i] = 1302 ethtool_rxfh_indir_default(i, efx->n_rx_channels); 1303 1304 efx_set_channels(efx); 1305 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1306 netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); 1307 1308 /* Initialise the interrupt moderation settings */ 1309 efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, 1310 true); 1311 1312 return 0; 1313 1314 fail: 1315 efx->type->remove(efx); 1316 return rc; 1317 } 1318 1319 static void efx_remove_nic(struct efx_nic *efx) 1320 { 1321 netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n"); 1322 1323 efx_remove_interrupts(efx); 1324 efx->type->remove(efx); 1325 } 1326 1327 /************************************************************************** 1328 * 1329 * NIC startup/shutdown 1330 * 1331 *************************************************************************/ 1332 1333 static int efx_probe_all(struct efx_nic *efx) 1334 { 1335 int rc; 1336 1337 rc = efx_probe_nic(efx); 1338 if (rc) { 1339 netif_err(efx, probe, efx->net_dev, "failed to create NIC\n"); 1340 goto fail1; 1341 } 1342 1343 rc = efx_probe_port(efx); 1344 if (rc) { 1345 netif_err(efx, probe, efx->net_dev, "failed to create port\n"); 1346 goto fail2; 1347 } 1348 1349 efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1350 rc = efx_probe_channels(efx); 1351 if (rc) 1352 goto fail3; 1353 1354 rc = efx_probe_filters(efx); 1355 if (rc) { 1356 netif_err(efx, probe, efx->net_dev, 1357 "failed to create filter tables\n"); 1358 goto fail4; 1359 } 1360 1361 return 0; 1362 1363 fail4: 1364 efx_remove_channels(efx); 1365 fail3: 1366 efx_remove_port(efx); 1367 fail2: 1368 efx_remove_nic(efx); 1369 fail1: 1370 return rc; 1371 } 1372 1373 /* Called after previous invocation(s) of efx_stop_all, restarts the 1374 * port, kernel transmit queue, NAPI processing and hardware interrupts, 1375 * and ensures that the port is scheduled to be reconfigured. 1376 * This function is safe to call multiple times when the NIC is in any 1377 * state. */ 1378 static void efx_start_all(struct efx_nic *efx) 1379 { 1380 struct efx_channel *channel; 1381 1382 EFX_ASSERT_RESET_SERIALISED(efx); 1383 1384 /* Check that it is appropriate to restart the interface. All 1385 * of these flags are safe to read under just the rtnl lock */ 1386 if (efx->port_enabled) 1387 return; 1388 if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) 1389 return; 1390 if (!netif_running(efx->net_dev)) 1391 return; 1392 1393 /* Mark the port as enabled so port reconfigurations can start, then 1394 * restart the transmit interface early so the watchdog timer stops */ 1395 efx_start_port(efx); 1396 1397 if (netif_device_present(efx->net_dev)) 1398 netif_tx_wake_all_queues(efx->net_dev); 1399 1400 efx_for_each_channel(channel, efx) 1401 efx_start_channel(channel); 1402 1403 if (efx->legacy_irq) 1404 efx->legacy_irq_enabled = true; 1405 efx_nic_enable_interrupts(efx); 1406 1407 /* Switch to event based MCDI completions after enabling interrupts. 1408 * If a reset has been scheduled, then we need to stay in polled mode. 1409 * Rather than serialising efx_mcdi_mode_event() [which sleeps] and 1410 * reset_pending [modified from an atomic context], we instead guarantee 1411 * that efx_mcdi_mode_poll() isn't reverted erroneously */ 1412 efx_mcdi_mode_event(efx); 1413 if (efx->reset_pending) 1414 efx_mcdi_mode_poll(efx); 1415 1416 /* Start the hardware monitor if there is one. Otherwise (we're link 1417 * event driven), we have to poll the PHY because after an event queue 1418 * flush, we could have a missed a link state change */ 1419 if (efx->type->monitor != NULL) { 1420 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1421 efx_monitor_interval); 1422 } else { 1423 mutex_lock(&efx->mac_lock); 1424 if (efx->phy_op->poll(efx)) 1425 efx_link_status_changed(efx); 1426 mutex_unlock(&efx->mac_lock); 1427 } 1428 1429 efx->type->start_stats(efx); 1430 } 1431 1432 /* Flush all delayed work. Should only be called when no more delayed work 1433 * will be scheduled. This doesn't flush pending online resets (efx_reset), 1434 * since we're holding the rtnl_lock at this point. */ 1435 static void efx_flush_all(struct efx_nic *efx) 1436 { 1437 /* Make sure the hardware monitor is stopped */ 1438 cancel_delayed_work_sync(&efx->monitor_work); 1439 /* Stop scheduled port reconfigurations */ 1440 cancel_work_sync(&efx->mac_work); 1441 } 1442 1443 /* Quiesce hardware and software without bringing the link down. 1444 * Safe to call multiple times, when the nic and interface is in any 1445 * state. The caller is guaranteed to subsequently be in a position 1446 * to modify any hardware and software state they see fit without 1447 * taking locks. */ 1448 static void efx_stop_all(struct efx_nic *efx) 1449 { 1450 struct efx_channel *channel; 1451 1452 EFX_ASSERT_RESET_SERIALISED(efx); 1453 1454 /* port_enabled can be read safely under the rtnl lock */ 1455 if (!efx->port_enabled) 1456 return; 1457 1458 efx->type->stop_stats(efx); 1459 1460 /* Switch to MCDI polling on Siena before disabling interrupts */ 1461 efx_mcdi_mode_poll(efx); 1462 1463 /* Disable interrupts and wait for ISR to complete */ 1464 efx_nic_disable_interrupts(efx); 1465 if (efx->legacy_irq) { 1466 synchronize_irq(efx->legacy_irq); 1467 efx->legacy_irq_enabled = false; 1468 } 1469 efx_for_each_channel(channel, efx) { 1470 if (channel->irq) 1471 synchronize_irq(channel->irq); 1472 } 1473 1474 /* Stop all NAPI processing and synchronous rx refills */ 1475 efx_for_each_channel(channel, efx) 1476 efx_stop_channel(channel); 1477 1478 /* Stop all asynchronous port reconfigurations. Since all 1479 * event processing has already been stopped, there is no 1480 * window to loose phy events */ 1481 efx_stop_port(efx); 1482 1483 /* Flush efx_mac_work(), refill_workqueue, monitor_work */ 1484 efx_flush_all(efx); 1485 1486 /* Stop the kernel transmit interface late, so the watchdog 1487 * timer isn't ticking over the flush */ 1488 netif_tx_stop_all_queues(efx->net_dev); 1489 netif_tx_lock_bh(efx->net_dev); 1490 netif_tx_unlock_bh(efx->net_dev); 1491 } 1492 1493 static void efx_remove_all(struct efx_nic *efx) 1494 { 1495 efx_remove_filters(efx); 1496 efx_remove_channels(efx); 1497 efx_remove_port(efx); 1498 efx_remove_nic(efx); 1499 } 1500 1501 /************************************************************************** 1502 * 1503 * Interrupt moderation 1504 * 1505 **************************************************************************/ 1506 1507 static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns) 1508 { 1509 if (usecs == 0) 1510 return 0; 1511 if (usecs * 1000 < quantum_ns) 1512 return 1; /* never round down to 0 */ 1513 return usecs * 1000 / quantum_ns; 1514 } 1515 1516 /* Set interrupt moderation parameters */ 1517 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, 1518 unsigned int rx_usecs, bool rx_adaptive, 1519 bool rx_may_override_tx) 1520 { 1521 struct efx_channel *channel; 1522 unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max * 1523 efx->timer_quantum_ns, 1524 1000); 1525 unsigned int tx_ticks; 1526 unsigned int rx_ticks; 1527 1528 EFX_ASSERT_RESET_SERIALISED(efx); 1529 1530 if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max) 1531 return -EINVAL; 1532 1533 tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns); 1534 rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns); 1535 1536 if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 && 1537 !rx_may_override_tx) { 1538 netif_err(efx, drv, efx->net_dev, "Channels are shared. " 1539 "RX and TX IRQ moderation must be equal\n"); 1540 return -EINVAL; 1541 } 1542 1543 efx->irq_rx_adaptive = rx_adaptive; 1544 efx->irq_rx_moderation = rx_ticks; 1545 efx_for_each_channel(channel, efx) { 1546 if (efx_channel_has_rx_queue(channel)) 1547 channel->irq_moderation = rx_ticks; 1548 else if (efx_channel_has_tx_queues(channel)) 1549 channel->irq_moderation = tx_ticks; 1550 } 1551 1552 return 0; 1553 } 1554 1555 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, 1556 unsigned int *rx_usecs, bool *rx_adaptive) 1557 { 1558 /* We must round up when converting ticks to microseconds 1559 * because we round down when converting the other way. 1560 */ 1561 1562 *rx_adaptive = efx->irq_rx_adaptive; 1563 *rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation * 1564 efx->timer_quantum_ns, 1565 1000); 1566 1567 /* If channels are shared between RX and TX, so is IRQ 1568 * moderation. Otherwise, IRQ moderation is the same for all 1569 * TX channels and is not adaptive. 1570 */ 1571 if (efx->tx_channel_offset == 0) 1572 *tx_usecs = *rx_usecs; 1573 else 1574 *tx_usecs = DIV_ROUND_UP( 1575 efx->channel[efx->tx_channel_offset]->irq_moderation * 1576 efx->timer_quantum_ns, 1577 1000); 1578 } 1579 1580 /************************************************************************** 1581 * 1582 * Hardware monitor 1583 * 1584 **************************************************************************/ 1585 1586 /* Run periodically off the general workqueue */ 1587 static void efx_monitor(struct work_struct *data) 1588 { 1589 struct efx_nic *efx = container_of(data, struct efx_nic, 1590 monitor_work.work); 1591 1592 netif_vdbg(efx, timer, efx->net_dev, 1593 "hardware monitor executing on CPU %d\n", 1594 raw_smp_processor_id()); 1595 BUG_ON(efx->type->monitor == NULL); 1596 1597 /* If the mac_lock is already held then it is likely a port 1598 * reconfiguration is already in place, which will likely do 1599 * most of the work of monitor() anyway. */ 1600 if (mutex_trylock(&efx->mac_lock)) { 1601 if (efx->port_enabled) 1602 efx->type->monitor(efx); 1603 mutex_unlock(&efx->mac_lock); 1604 } 1605 1606 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1607 efx_monitor_interval); 1608 } 1609 1610 /************************************************************************** 1611 * 1612 * ioctls 1613 * 1614 *************************************************************************/ 1615 1616 /* Net device ioctl 1617 * Context: process, rtnl_lock() held. 1618 */ 1619 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) 1620 { 1621 struct efx_nic *efx = netdev_priv(net_dev); 1622 struct mii_ioctl_data *data = if_mii(ifr); 1623 1624 EFX_ASSERT_RESET_SERIALISED(efx); 1625 1626 /* Convert phy_id from older PRTAD/DEVAD format */ 1627 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 1628 (data->phy_id & 0xfc00) == 0x0400) 1629 data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400; 1630 1631 return mdio_mii_ioctl(&efx->mdio, data, cmd); 1632 } 1633 1634 /************************************************************************** 1635 * 1636 * NAPI interface 1637 * 1638 **************************************************************************/ 1639 1640 static void efx_init_napi(struct efx_nic *efx) 1641 { 1642 struct efx_channel *channel; 1643 1644 efx_for_each_channel(channel, efx) { 1645 channel->napi_dev = efx->net_dev; 1646 netif_napi_add(channel->napi_dev, &channel->napi_str, 1647 efx_poll, napi_weight); 1648 } 1649 } 1650 1651 static void efx_fini_napi_channel(struct efx_channel *channel) 1652 { 1653 if (channel->napi_dev) 1654 netif_napi_del(&channel->napi_str); 1655 channel->napi_dev = NULL; 1656 } 1657 1658 static void efx_fini_napi(struct efx_nic *efx) 1659 { 1660 struct efx_channel *channel; 1661 1662 efx_for_each_channel(channel, efx) 1663 efx_fini_napi_channel(channel); 1664 } 1665 1666 /************************************************************************** 1667 * 1668 * Kernel netpoll interface 1669 * 1670 *************************************************************************/ 1671 1672 #ifdef CONFIG_NET_POLL_CONTROLLER 1673 1674 /* Although in the common case interrupts will be disabled, this is not 1675 * guaranteed. However, all our work happens inside the NAPI callback, 1676 * so no locking is required. 1677 */ 1678 static void efx_netpoll(struct net_device *net_dev) 1679 { 1680 struct efx_nic *efx = netdev_priv(net_dev); 1681 struct efx_channel *channel; 1682 1683 efx_for_each_channel(channel, efx) 1684 efx_schedule_channel(channel); 1685 } 1686 1687 #endif 1688 1689 /************************************************************************** 1690 * 1691 * Kernel net device interface 1692 * 1693 *************************************************************************/ 1694 1695 /* Context: process, rtnl_lock() held. */ 1696 static int efx_net_open(struct net_device *net_dev) 1697 { 1698 struct efx_nic *efx = netdev_priv(net_dev); 1699 EFX_ASSERT_RESET_SERIALISED(efx); 1700 1701 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 1702 raw_smp_processor_id()); 1703 1704 if (efx->state == STATE_DISABLED) 1705 return -EIO; 1706 if (efx->phy_mode & PHY_MODE_SPECIAL) 1707 return -EBUSY; 1708 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 1709 return -EIO; 1710 1711 /* Notify the kernel of the link state polled during driver load, 1712 * before the monitor starts running */ 1713 efx_link_status_changed(efx); 1714 1715 efx_start_all(efx); 1716 return 0; 1717 } 1718 1719 /* Context: process, rtnl_lock() held. 1720 * Note that the kernel will ignore our return code; this method 1721 * should really be a void. 1722 */ 1723 static int efx_net_stop(struct net_device *net_dev) 1724 { 1725 struct efx_nic *efx = netdev_priv(net_dev); 1726 1727 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 1728 raw_smp_processor_id()); 1729 1730 if (efx->state != STATE_DISABLED) { 1731 /* Stop the device and flush all the channels */ 1732 efx_stop_all(efx); 1733 efx_fini_channels(efx); 1734 efx_init_channels(efx); 1735 } 1736 1737 return 0; 1738 } 1739 1740 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 1741 static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev, 1742 struct rtnl_link_stats64 *stats) 1743 { 1744 struct efx_nic *efx = netdev_priv(net_dev); 1745 struct efx_mac_stats *mac_stats = &efx->mac_stats; 1746 1747 spin_lock_bh(&efx->stats_lock); 1748 1749 efx->type->update_stats(efx); 1750 1751 stats->rx_packets = mac_stats->rx_packets; 1752 stats->tx_packets = mac_stats->tx_packets; 1753 stats->rx_bytes = mac_stats->rx_bytes; 1754 stats->tx_bytes = mac_stats->tx_bytes; 1755 stats->rx_dropped = efx->n_rx_nodesc_drop_cnt; 1756 stats->multicast = mac_stats->rx_multicast; 1757 stats->collisions = mac_stats->tx_collision; 1758 stats->rx_length_errors = (mac_stats->rx_gtjumbo + 1759 mac_stats->rx_length_error); 1760 stats->rx_crc_errors = mac_stats->rx_bad; 1761 stats->rx_frame_errors = mac_stats->rx_align_error; 1762 stats->rx_fifo_errors = mac_stats->rx_overflow; 1763 stats->rx_missed_errors = mac_stats->rx_missed; 1764 stats->tx_window_errors = mac_stats->tx_late_collision; 1765 1766 stats->rx_errors = (stats->rx_length_errors + 1767 stats->rx_crc_errors + 1768 stats->rx_frame_errors + 1769 mac_stats->rx_symbol_error); 1770 stats->tx_errors = (stats->tx_window_errors + 1771 mac_stats->tx_bad); 1772 1773 spin_unlock_bh(&efx->stats_lock); 1774 1775 return stats; 1776 } 1777 1778 /* Context: netif_tx_lock held, BHs disabled. */ 1779 static void efx_watchdog(struct net_device *net_dev) 1780 { 1781 struct efx_nic *efx = netdev_priv(net_dev); 1782 1783 netif_err(efx, tx_err, efx->net_dev, 1784 "TX stuck with port_enabled=%d: resetting channels\n", 1785 efx->port_enabled); 1786 1787 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 1788 } 1789 1790 1791 /* Context: process, rtnl_lock() held. */ 1792 static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 1793 { 1794 struct efx_nic *efx = netdev_priv(net_dev); 1795 1796 EFX_ASSERT_RESET_SERIALISED(efx); 1797 1798 if (new_mtu > EFX_MAX_MTU) 1799 return -EINVAL; 1800 1801 efx_stop_all(efx); 1802 1803 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 1804 1805 efx_fini_channels(efx); 1806 1807 mutex_lock(&efx->mac_lock); 1808 /* Reconfigure the MAC before enabling the dma queues so that 1809 * the RX buffers don't overflow */ 1810 net_dev->mtu = new_mtu; 1811 efx->type->reconfigure_mac(efx); 1812 mutex_unlock(&efx->mac_lock); 1813 1814 efx_init_channels(efx); 1815 1816 efx_start_all(efx); 1817 return 0; 1818 } 1819 1820 static int efx_set_mac_address(struct net_device *net_dev, void *data) 1821 { 1822 struct efx_nic *efx = netdev_priv(net_dev); 1823 struct sockaddr *addr = data; 1824 char *new_addr = addr->sa_data; 1825 1826 EFX_ASSERT_RESET_SERIALISED(efx); 1827 1828 if (!is_valid_ether_addr(new_addr)) { 1829 netif_err(efx, drv, efx->net_dev, 1830 "invalid ethernet MAC address requested: %pM\n", 1831 new_addr); 1832 return -EINVAL; 1833 } 1834 1835 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); 1836 1837 /* Reconfigure the MAC */ 1838 mutex_lock(&efx->mac_lock); 1839 efx->type->reconfigure_mac(efx); 1840 mutex_unlock(&efx->mac_lock); 1841 1842 return 0; 1843 } 1844 1845 /* Context: netif_addr_lock held, BHs disabled. */ 1846 static void efx_set_rx_mode(struct net_device *net_dev) 1847 { 1848 struct efx_nic *efx = netdev_priv(net_dev); 1849 struct netdev_hw_addr *ha; 1850 union efx_multicast_hash *mc_hash = &efx->multicast_hash; 1851 u32 crc; 1852 int bit; 1853 1854 efx->promiscuous = !!(net_dev->flags & IFF_PROMISC); 1855 1856 /* Build multicast hash table */ 1857 if (efx->promiscuous || (net_dev->flags & IFF_ALLMULTI)) { 1858 memset(mc_hash, 0xff, sizeof(*mc_hash)); 1859 } else { 1860 memset(mc_hash, 0x00, sizeof(*mc_hash)); 1861 netdev_for_each_mc_addr(ha, net_dev) { 1862 crc = ether_crc_le(ETH_ALEN, ha->addr); 1863 bit = crc & (EFX_MCAST_HASH_ENTRIES - 1); 1864 set_bit_le(bit, mc_hash->byte); 1865 } 1866 1867 /* Broadcast packets go through the multicast hash filter. 1868 * ether_crc_le() of the broadcast address is 0xbe2612ff 1869 * so we always add bit 0xff to the mask. 1870 */ 1871 set_bit_le(0xff, mc_hash->byte); 1872 } 1873 1874 if (efx->port_enabled) 1875 queue_work(efx->workqueue, &efx->mac_work); 1876 /* Otherwise efx_start_port() will do this */ 1877 } 1878 1879 static int efx_set_features(struct net_device *net_dev, netdev_features_t data) 1880 { 1881 struct efx_nic *efx = netdev_priv(net_dev); 1882 1883 /* If disabling RX n-tuple filtering, clear existing filters */ 1884 if (net_dev->features & ~data & NETIF_F_NTUPLE) 1885 efx_filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 1886 1887 return 0; 1888 } 1889 1890 static const struct net_device_ops efx_netdev_ops = { 1891 .ndo_open = efx_net_open, 1892 .ndo_stop = efx_net_stop, 1893 .ndo_get_stats64 = efx_net_stats, 1894 .ndo_tx_timeout = efx_watchdog, 1895 .ndo_start_xmit = efx_hard_start_xmit, 1896 .ndo_validate_addr = eth_validate_addr, 1897 .ndo_do_ioctl = efx_ioctl, 1898 .ndo_change_mtu = efx_change_mtu, 1899 .ndo_set_mac_address = efx_set_mac_address, 1900 .ndo_set_rx_mode = efx_set_rx_mode, 1901 .ndo_set_features = efx_set_features, 1902 #ifdef CONFIG_NET_POLL_CONTROLLER 1903 .ndo_poll_controller = efx_netpoll, 1904 #endif 1905 .ndo_setup_tc = efx_setup_tc, 1906 #ifdef CONFIG_RFS_ACCEL 1907 .ndo_rx_flow_steer = efx_filter_rfs, 1908 #endif 1909 }; 1910 1911 static void efx_update_name(struct efx_nic *efx) 1912 { 1913 strcpy(efx->name, efx->net_dev->name); 1914 efx_mtd_rename(efx); 1915 efx_set_channel_names(efx); 1916 } 1917 1918 static int efx_netdev_event(struct notifier_block *this, 1919 unsigned long event, void *ptr) 1920 { 1921 struct net_device *net_dev = ptr; 1922 1923 if (net_dev->netdev_ops == &efx_netdev_ops && 1924 event == NETDEV_CHANGENAME) 1925 efx_update_name(netdev_priv(net_dev)); 1926 1927 return NOTIFY_DONE; 1928 } 1929 1930 static struct notifier_block efx_netdev_notifier = { 1931 .notifier_call = efx_netdev_event, 1932 }; 1933 1934 static ssize_t 1935 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) 1936 { 1937 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 1938 return sprintf(buf, "%d\n", efx->phy_type); 1939 } 1940 static DEVICE_ATTR(phy_type, 0644, show_phy_type, NULL); 1941 1942 static int efx_register_netdev(struct efx_nic *efx) 1943 { 1944 struct net_device *net_dev = efx->net_dev; 1945 struct efx_channel *channel; 1946 int rc; 1947 1948 net_dev->watchdog_timeo = 5 * HZ; 1949 net_dev->irq = efx->pci_dev->irq; 1950 net_dev->netdev_ops = &efx_netdev_ops; 1951 SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); 1952 1953 rtnl_lock(); 1954 1955 rc = dev_alloc_name(net_dev, net_dev->name); 1956 if (rc < 0) 1957 goto fail_locked; 1958 efx_update_name(efx); 1959 1960 rc = register_netdevice(net_dev); 1961 if (rc) 1962 goto fail_locked; 1963 1964 efx_for_each_channel(channel, efx) { 1965 struct efx_tx_queue *tx_queue; 1966 efx_for_each_channel_tx_queue(tx_queue, channel) 1967 efx_init_tx_queue_core_txq(tx_queue); 1968 } 1969 1970 /* Always start with carrier off; PHY events will detect the link */ 1971 netif_carrier_off(net_dev); 1972 1973 rtnl_unlock(); 1974 1975 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 1976 if (rc) { 1977 netif_err(efx, drv, efx->net_dev, 1978 "failed to init net dev attributes\n"); 1979 goto fail_registered; 1980 } 1981 1982 return 0; 1983 1984 fail_locked: 1985 rtnl_unlock(); 1986 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 1987 return rc; 1988 1989 fail_registered: 1990 unregister_netdev(net_dev); 1991 return rc; 1992 } 1993 1994 static void efx_unregister_netdev(struct efx_nic *efx) 1995 { 1996 struct efx_channel *channel; 1997 struct efx_tx_queue *tx_queue; 1998 1999 if (!efx->net_dev) 2000 return; 2001 2002 BUG_ON(netdev_priv(efx->net_dev) != efx); 2003 2004 /* Free up any skbs still remaining. This has to happen before 2005 * we try to unregister the netdev as running their destructors 2006 * may be needed to get the device ref. count to 0. */ 2007 efx_for_each_channel(channel, efx) { 2008 efx_for_each_channel_tx_queue(tx_queue, channel) 2009 efx_release_tx_buffers(tx_queue); 2010 } 2011 2012 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 2013 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2014 unregister_netdev(efx->net_dev); 2015 } 2016 2017 /************************************************************************** 2018 * 2019 * Device reset and suspend 2020 * 2021 **************************************************************************/ 2022 2023 /* Tears down the entire software state and most of the hardware state 2024 * before reset. */ 2025 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 2026 { 2027 EFX_ASSERT_RESET_SERIALISED(efx); 2028 2029 efx_stop_all(efx); 2030 mutex_lock(&efx->mac_lock); 2031 2032 efx_fini_channels(efx); 2033 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) 2034 efx->phy_op->fini(efx); 2035 efx->type->fini(efx); 2036 } 2037 2038 /* This function will always ensure that the locks acquired in 2039 * efx_reset_down() are released. A failure return code indicates 2040 * that we were unable to reinitialise the hardware, and the 2041 * driver should be disabled. If ok is false, then the rx and tx 2042 * engines are not restarted, pending a RESET_DISABLE. */ 2043 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 2044 { 2045 int rc; 2046 2047 EFX_ASSERT_RESET_SERIALISED(efx); 2048 2049 rc = efx->type->init(efx); 2050 if (rc) { 2051 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2052 goto fail; 2053 } 2054 2055 if (!ok) 2056 goto fail; 2057 2058 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) { 2059 rc = efx->phy_op->init(efx); 2060 if (rc) 2061 goto fail; 2062 if (efx->phy_op->reconfigure(efx)) 2063 netif_err(efx, drv, efx->net_dev, 2064 "could not restore PHY settings\n"); 2065 } 2066 2067 efx->type->reconfigure_mac(efx); 2068 2069 efx_init_channels(efx); 2070 efx_restore_filters(efx); 2071 2072 mutex_unlock(&efx->mac_lock); 2073 2074 efx_start_all(efx); 2075 2076 return 0; 2077 2078 fail: 2079 efx->port_initialized = false; 2080 2081 mutex_unlock(&efx->mac_lock); 2082 2083 return rc; 2084 } 2085 2086 /* Reset the NIC using the specified method. Note that the reset may 2087 * fail, in which case the card will be left in an unusable state. 2088 * 2089 * Caller must hold the rtnl_lock. 2090 */ 2091 int efx_reset(struct efx_nic *efx, enum reset_type method) 2092 { 2093 int rc, rc2; 2094 bool disabled; 2095 2096 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 2097 RESET_TYPE(method)); 2098 2099 netif_device_detach(efx->net_dev); 2100 efx_reset_down(efx, method); 2101 2102 rc = efx->type->reset(efx, method); 2103 if (rc) { 2104 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 2105 goto out; 2106 } 2107 2108 /* Clear flags for the scopes we covered. We assume the NIC and 2109 * driver are now quiescent so that there is no race here. 2110 */ 2111 efx->reset_pending &= -(1 << (method + 1)); 2112 2113 /* Reinitialise bus-mastering, which may have been turned off before 2114 * the reset was scheduled. This is still appropriate, even in the 2115 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 2116 * can respond to requests. */ 2117 pci_set_master(efx->pci_dev); 2118 2119 out: 2120 /* Leave device stopped if necessary */ 2121 disabled = rc || method == RESET_TYPE_DISABLE; 2122 rc2 = efx_reset_up(efx, method, !disabled); 2123 if (rc2) { 2124 disabled = true; 2125 if (!rc) 2126 rc = rc2; 2127 } 2128 2129 if (disabled) { 2130 dev_close(efx->net_dev); 2131 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 2132 efx->state = STATE_DISABLED; 2133 } else { 2134 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 2135 netif_device_attach(efx->net_dev); 2136 } 2137 return rc; 2138 } 2139 2140 /* The worker thread exists so that code that cannot sleep can 2141 * schedule a reset for later. 2142 */ 2143 static void efx_reset_work(struct work_struct *data) 2144 { 2145 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 2146 unsigned long pending = ACCESS_ONCE(efx->reset_pending); 2147 2148 if (!pending) 2149 return; 2150 2151 /* If we're not RUNNING then don't reset. Leave the reset_pending 2152 * flags set so that efx_pci_probe_main will be retried */ 2153 if (efx->state != STATE_RUNNING) { 2154 netif_info(efx, drv, efx->net_dev, 2155 "scheduled reset quenched. NIC not RUNNING\n"); 2156 return; 2157 } 2158 2159 rtnl_lock(); 2160 (void)efx_reset(efx, fls(pending) - 1); 2161 rtnl_unlock(); 2162 } 2163 2164 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 2165 { 2166 enum reset_type method; 2167 2168 switch (type) { 2169 case RESET_TYPE_INVISIBLE: 2170 case RESET_TYPE_ALL: 2171 case RESET_TYPE_WORLD: 2172 case RESET_TYPE_DISABLE: 2173 method = type; 2174 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 2175 RESET_TYPE(method)); 2176 break; 2177 default: 2178 method = efx->type->map_reset_reason(type); 2179 netif_dbg(efx, drv, efx->net_dev, 2180 "scheduling %s reset for %s\n", 2181 RESET_TYPE(method), RESET_TYPE(type)); 2182 break; 2183 } 2184 2185 set_bit(method, &efx->reset_pending); 2186 2187 /* efx_process_channel() will no longer read events once a 2188 * reset is scheduled. So switch back to poll'd MCDI completions. */ 2189 efx_mcdi_mode_poll(efx); 2190 2191 queue_work(reset_workqueue, &efx->reset_work); 2192 } 2193 2194 /************************************************************************** 2195 * 2196 * List of NICs we support 2197 * 2198 **************************************************************************/ 2199 2200 /* PCI device ID table */ 2201 static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = { 2202 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 2203 PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0), 2204 .driver_data = (unsigned long) &falcon_a1_nic_type}, 2205 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 2206 PCI_DEVICE_ID_SOLARFLARE_SFC4000B), 2207 .driver_data = (unsigned long) &falcon_b0_nic_type}, 2208 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */ 2209 .driver_data = (unsigned long) &siena_a0_nic_type}, 2210 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */ 2211 .driver_data = (unsigned long) &siena_a0_nic_type}, 2212 {0} /* end of list */ 2213 }; 2214 2215 /************************************************************************** 2216 * 2217 * Dummy PHY/MAC operations 2218 * 2219 * Can be used for some unimplemented operations 2220 * Needed so all function pointers are valid and do not have to be tested 2221 * before use 2222 * 2223 **************************************************************************/ 2224 int efx_port_dummy_op_int(struct efx_nic *efx) 2225 { 2226 return 0; 2227 } 2228 void efx_port_dummy_op_void(struct efx_nic *efx) {} 2229 2230 static bool efx_port_dummy_op_poll(struct efx_nic *efx) 2231 { 2232 return false; 2233 } 2234 2235 static const struct efx_phy_operations efx_dummy_phy_operations = { 2236 .init = efx_port_dummy_op_int, 2237 .reconfigure = efx_port_dummy_op_int, 2238 .poll = efx_port_dummy_op_poll, 2239 .fini = efx_port_dummy_op_void, 2240 }; 2241 2242 /************************************************************************** 2243 * 2244 * Data housekeeping 2245 * 2246 **************************************************************************/ 2247 2248 /* This zeroes out and then fills in the invariants in a struct 2249 * efx_nic (including all sub-structures). 2250 */ 2251 static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, 2252 struct pci_dev *pci_dev, struct net_device *net_dev) 2253 { 2254 int i; 2255 2256 /* Initialise common structures */ 2257 memset(efx, 0, sizeof(*efx)); 2258 spin_lock_init(&efx->biu_lock); 2259 #ifdef CONFIG_SFC_MTD 2260 INIT_LIST_HEAD(&efx->mtd_list); 2261 #endif 2262 INIT_WORK(&efx->reset_work, efx_reset_work); 2263 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 2264 efx->pci_dev = pci_dev; 2265 efx->msg_enable = debug; 2266 efx->state = STATE_INIT; 2267 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 2268 2269 efx->net_dev = net_dev; 2270 spin_lock_init(&efx->stats_lock); 2271 mutex_init(&efx->mac_lock); 2272 efx->phy_op = &efx_dummy_phy_operations; 2273 efx->mdio.dev = net_dev; 2274 INIT_WORK(&efx->mac_work, efx_mac_work); 2275 2276 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 2277 efx->channel[i] = efx_alloc_channel(efx, i, NULL); 2278 if (!efx->channel[i]) 2279 goto fail; 2280 } 2281 2282 efx->type = type; 2283 2284 EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); 2285 2286 /* Higher numbered interrupt modes are less capable! */ 2287 efx->interrupt_mode = max(efx->type->max_interrupt_mode, 2288 interrupt_mode); 2289 2290 /* Would be good to use the net_dev name, but we're too early */ 2291 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 2292 pci_name(pci_dev)); 2293 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 2294 if (!efx->workqueue) 2295 goto fail; 2296 2297 return 0; 2298 2299 fail: 2300 efx_fini_struct(efx); 2301 return -ENOMEM; 2302 } 2303 2304 static void efx_fini_struct(struct efx_nic *efx) 2305 { 2306 int i; 2307 2308 for (i = 0; i < EFX_MAX_CHANNELS; i++) 2309 kfree(efx->channel[i]); 2310 2311 if (efx->workqueue) { 2312 destroy_workqueue(efx->workqueue); 2313 efx->workqueue = NULL; 2314 } 2315 } 2316 2317 /************************************************************************** 2318 * 2319 * PCI interface 2320 * 2321 **************************************************************************/ 2322 2323 /* Main body of final NIC shutdown code 2324 * This is called only at module unload (or hotplug removal). 2325 */ 2326 static void efx_pci_remove_main(struct efx_nic *efx) 2327 { 2328 #ifdef CONFIG_RFS_ACCEL 2329 free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); 2330 efx->net_dev->rx_cpu_rmap = NULL; 2331 #endif 2332 efx_nic_fini_interrupt(efx); 2333 efx_fini_channels(efx); 2334 efx_fini_port(efx); 2335 efx->type->fini(efx); 2336 efx_fini_napi(efx); 2337 efx_remove_all(efx); 2338 } 2339 2340 /* Final NIC shutdown 2341 * This is called only at module unload (or hotplug removal). 2342 */ 2343 static void efx_pci_remove(struct pci_dev *pci_dev) 2344 { 2345 struct efx_nic *efx; 2346 2347 efx = pci_get_drvdata(pci_dev); 2348 if (!efx) 2349 return; 2350 2351 /* Mark the NIC as fini, then stop the interface */ 2352 rtnl_lock(); 2353 efx->state = STATE_FINI; 2354 dev_close(efx->net_dev); 2355 2356 /* Allow any queued efx_resets() to complete */ 2357 rtnl_unlock(); 2358 2359 efx_unregister_netdev(efx); 2360 2361 efx_mtd_remove(efx); 2362 2363 /* Wait for any scheduled resets to complete. No more will be 2364 * scheduled from this point because efx_stop_all() has been 2365 * called, we are no longer registered with driverlink, and 2366 * the net_device's have been removed. */ 2367 cancel_work_sync(&efx->reset_work); 2368 2369 efx_pci_remove_main(efx); 2370 2371 efx_fini_io(efx); 2372 netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); 2373 2374 pci_set_drvdata(pci_dev, NULL); 2375 efx_fini_struct(efx); 2376 free_netdev(efx->net_dev); 2377 }; 2378 2379 /* Main body of NIC initialisation 2380 * This is called at module load (or hotplug insertion, theoretically). 2381 */ 2382 static int efx_pci_probe_main(struct efx_nic *efx) 2383 { 2384 int rc; 2385 2386 /* Do start-of-day initialisation */ 2387 rc = efx_probe_all(efx); 2388 if (rc) 2389 goto fail1; 2390 2391 efx_init_napi(efx); 2392 2393 rc = efx->type->init(efx); 2394 if (rc) { 2395 netif_err(efx, probe, efx->net_dev, 2396 "failed to initialise NIC\n"); 2397 goto fail3; 2398 } 2399 2400 rc = efx_init_port(efx); 2401 if (rc) { 2402 netif_err(efx, probe, efx->net_dev, 2403 "failed to initialise port\n"); 2404 goto fail4; 2405 } 2406 2407 efx_init_channels(efx); 2408 2409 rc = efx_nic_init_interrupt(efx); 2410 if (rc) 2411 goto fail5; 2412 2413 return 0; 2414 2415 fail5: 2416 efx_fini_channels(efx); 2417 efx_fini_port(efx); 2418 fail4: 2419 efx->type->fini(efx); 2420 fail3: 2421 efx_fini_napi(efx); 2422 efx_remove_all(efx); 2423 fail1: 2424 return rc; 2425 } 2426 2427 /* NIC initialisation 2428 * 2429 * This is called at module load (or hotplug insertion, 2430 * theoretically). It sets up PCI mappings, resets the NIC, 2431 * sets up and registers the network devices with the kernel and hooks 2432 * the interrupt service routine. It does not prepare the device for 2433 * transmission; this is left to the first time one of the network 2434 * interfaces is brought up (i.e. efx_net_open). 2435 */ 2436 static int __devinit efx_pci_probe(struct pci_dev *pci_dev, 2437 const struct pci_device_id *entry) 2438 { 2439 const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; 2440 struct net_device *net_dev; 2441 struct efx_nic *efx; 2442 int i, rc; 2443 2444 /* Allocate and initialise a struct net_device and struct efx_nic */ 2445 net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES, 2446 EFX_MAX_RX_QUEUES); 2447 if (!net_dev) 2448 return -ENOMEM; 2449 net_dev->features |= (type->offload_features | NETIF_F_SG | 2450 NETIF_F_HIGHDMA | NETIF_F_TSO | 2451 NETIF_F_RXCSUM); 2452 if (type->offload_features & NETIF_F_V6_CSUM) 2453 net_dev->features |= NETIF_F_TSO6; 2454 /* Mask for features that also apply to VLAN devices */ 2455 net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | 2456 NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | 2457 NETIF_F_RXCSUM); 2458 /* All offloads can be toggled */ 2459 net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; 2460 efx = netdev_priv(net_dev); 2461 pci_set_drvdata(pci_dev, efx); 2462 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 2463 rc = efx_init_struct(efx, type, pci_dev, net_dev); 2464 if (rc) 2465 goto fail1; 2466 2467 netif_info(efx, probe, efx->net_dev, 2468 "Solarflare NIC detected\n"); 2469 2470 /* Set up basic I/O (BAR mappings etc) */ 2471 rc = efx_init_io(efx); 2472 if (rc) 2473 goto fail2; 2474 2475 /* No serialisation is required with the reset path because 2476 * we're in STATE_INIT. */ 2477 for (i = 0; i < 5; i++) { 2478 rc = efx_pci_probe_main(efx); 2479 2480 /* Serialise against efx_reset(). No more resets will be 2481 * scheduled since efx_stop_all() has been called, and we 2482 * have not and never have been registered with either 2483 * the rtnetlink or driverlink layers. */ 2484 cancel_work_sync(&efx->reset_work); 2485 2486 if (rc == 0) { 2487 if (efx->reset_pending) { 2488 /* If there was a scheduled reset during 2489 * probe, the NIC is probably hosed anyway */ 2490 efx_pci_remove_main(efx); 2491 rc = -EIO; 2492 } else { 2493 break; 2494 } 2495 } 2496 2497 /* Retry if a recoverably reset event has been scheduled */ 2498 if (efx->reset_pending & 2499 ~(1 << RESET_TYPE_INVISIBLE | 1 << RESET_TYPE_ALL) || 2500 !efx->reset_pending) 2501 goto fail3; 2502 2503 efx->reset_pending = 0; 2504 } 2505 2506 if (rc) { 2507 netif_err(efx, probe, efx->net_dev, "Could not reset NIC\n"); 2508 goto fail4; 2509 } 2510 2511 /* Switch to the running state before we expose the device to the OS, 2512 * so that dev_open()|efx_start_all() will actually start the device */ 2513 efx->state = STATE_RUNNING; 2514 2515 rc = efx_register_netdev(efx); 2516 if (rc) 2517 goto fail5; 2518 2519 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 2520 2521 rtnl_lock(); 2522 efx_mtd_probe(efx); /* allowed to fail */ 2523 rtnl_unlock(); 2524 return 0; 2525 2526 fail5: 2527 efx_pci_remove_main(efx); 2528 fail4: 2529 fail3: 2530 efx_fini_io(efx); 2531 fail2: 2532 efx_fini_struct(efx); 2533 fail1: 2534 WARN_ON(rc > 0); 2535 netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); 2536 free_netdev(net_dev); 2537 return rc; 2538 } 2539 2540 static int efx_pm_freeze(struct device *dev) 2541 { 2542 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2543 2544 efx->state = STATE_FINI; 2545 2546 netif_device_detach(efx->net_dev); 2547 2548 efx_stop_all(efx); 2549 efx_fini_channels(efx); 2550 2551 return 0; 2552 } 2553 2554 static int efx_pm_thaw(struct device *dev) 2555 { 2556 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2557 2558 efx->state = STATE_INIT; 2559 2560 efx_init_channels(efx); 2561 2562 mutex_lock(&efx->mac_lock); 2563 efx->phy_op->reconfigure(efx); 2564 mutex_unlock(&efx->mac_lock); 2565 2566 efx_start_all(efx); 2567 2568 netif_device_attach(efx->net_dev); 2569 2570 efx->state = STATE_RUNNING; 2571 2572 efx->type->resume_wol(efx); 2573 2574 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 2575 queue_work(reset_workqueue, &efx->reset_work); 2576 2577 return 0; 2578 } 2579 2580 static int efx_pm_poweroff(struct device *dev) 2581 { 2582 struct pci_dev *pci_dev = to_pci_dev(dev); 2583 struct efx_nic *efx = pci_get_drvdata(pci_dev); 2584 2585 efx->type->fini(efx); 2586 2587 efx->reset_pending = 0; 2588 2589 pci_save_state(pci_dev); 2590 return pci_set_power_state(pci_dev, PCI_D3hot); 2591 } 2592 2593 /* Used for both resume and restore */ 2594 static int efx_pm_resume(struct device *dev) 2595 { 2596 struct pci_dev *pci_dev = to_pci_dev(dev); 2597 struct efx_nic *efx = pci_get_drvdata(pci_dev); 2598 int rc; 2599 2600 rc = pci_set_power_state(pci_dev, PCI_D0); 2601 if (rc) 2602 return rc; 2603 pci_restore_state(pci_dev); 2604 rc = pci_enable_device(pci_dev); 2605 if (rc) 2606 return rc; 2607 pci_set_master(efx->pci_dev); 2608 rc = efx->type->reset(efx, RESET_TYPE_ALL); 2609 if (rc) 2610 return rc; 2611 rc = efx->type->init(efx); 2612 if (rc) 2613 return rc; 2614 efx_pm_thaw(dev); 2615 return 0; 2616 } 2617 2618 static int efx_pm_suspend(struct device *dev) 2619 { 2620 int rc; 2621 2622 efx_pm_freeze(dev); 2623 rc = efx_pm_poweroff(dev); 2624 if (rc) 2625 efx_pm_resume(dev); 2626 return rc; 2627 } 2628 2629 static const struct dev_pm_ops efx_pm_ops = { 2630 .suspend = efx_pm_suspend, 2631 .resume = efx_pm_resume, 2632 .freeze = efx_pm_freeze, 2633 .thaw = efx_pm_thaw, 2634 .poweroff = efx_pm_poweroff, 2635 .restore = efx_pm_resume, 2636 }; 2637 2638 static struct pci_driver efx_pci_driver = { 2639 .name = KBUILD_MODNAME, 2640 .id_table = efx_pci_table, 2641 .probe = efx_pci_probe, 2642 .remove = efx_pci_remove, 2643 .driver.pm = &efx_pm_ops, 2644 }; 2645 2646 /************************************************************************** 2647 * 2648 * Kernel module interface 2649 * 2650 *************************************************************************/ 2651 2652 module_param(interrupt_mode, uint, 0444); 2653 MODULE_PARM_DESC(interrupt_mode, 2654 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); 2655 2656 static int __init efx_init_module(void) 2657 { 2658 int rc; 2659 2660 printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); 2661 2662 rc = register_netdevice_notifier(&efx_netdev_notifier); 2663 if (rc) 2664 goto err_notifier; 2665 2666 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 2667 if (!reset_workqueue) { 2668 rc = -ENOMEM; 2669 goto err_reset; 2670 } 2671 2672 rc = pci_register_driver(&efx_pci_driver); 2673 if (rc < 0) 2674 goto err_pci; 2675 2676 return 0; 2677 2678 err_pci: 2679 destroy_workqueue(reset_workqueue); 2680 err_reset: 2681 unregister_netdevice_notifier(&efx_netdev_notifier); 2682 err_notifier: 2683 return rc; 2684 } 2685 2686 static void __exit efx_exit_module(void) 2687 { 2688 printk(KERN_INFO "Solarflare NET driver unloading\n"); 2689 2690 pci_unregister_driver(&efx_pci_driver); 2691 destroy_workqueue(reset_workqueue); 2692 unregister_netdevice_notifier(&efx_netdev_notifier); 2693 2694 } 2695 2696 module_init(efx_init_module); 2697 module_exit(efx_exit_module); 2698 2699 MODULE_AUTHOR("Solarflare Communications and " 2700 "Michael Brown <mbrown@fensystems.co.uk>"); 2701 MODULE_DESCRIPTION("Solarflare Communications network driver"); 2702 MODULE_LICENSE("GPL"); 2703 MODULE_DEVICE_TABLE(pci, efx_pci_table); 2704