1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2018 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "net_driver.h" 12 #include <linux/filter.h> 13 #include <linux/module.h> 14 #include <linux/netdevice.h> 15 #include <net/gre.h> 16 #include "efx_common.h" 17 #include "efx_channels.h" 18 #include "efx.h" 19 #include "mcdi.h" 20 #include "selftest.h" 21 #include "rx_common.h" 22 #include "tx_common.h" 23 #include "nic.h" 24 #include "mcdi_port_common.h" 25 #include "io.h" 26 #include "mcdi_pcol.h" 27 #include "ef100_rep.h" 28 29 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 30 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 31 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 32 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 33 module_param(debug, uint, 0); 34 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 35 36 /* This is the time (in jiffies) between invocations of the hardware 37 * monitor. 38 */ 39 static unsigned int efx_monitor_interval = 1 * HZ; 40 41 /* How often and how many times to poll for a reset while waiting for a 42 * BIST that another function started to complete. 43 */ 44 #define BIST_WAIT_DELAY_MS 100 45 #define BIST_WAIT_DELAY_COUNT 100 46 47 /* Default stats update time */ 48 #define STATS_PERIOD_MS_DEFAULT 1000 49 50 static const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 51 static const char *const efx_reset_type_names[] = { 52 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 53 [RESET_TYPE_ALL] = "ALL", 54 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 55 [RESET_TYPE_WORLD] = "WORLD", 56 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 57 [RESET_TYPE_DATAPATH] = "DATAPATH", 58 [RESET_TYPE_MC_BIST] = "MC_BIST", 59 [RESET_TYPE_DISABLE] = "DISABLE", 60 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 61 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 62 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 63 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 64 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 65 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 66 }; 67 68 #define RESET_TYPE(type) \ 69 STRING_TABLE_LOOKUP(type, efx_reset_type) 70 71 /* Loopback mode names (see LOOPBACK_MODE()) */ 72 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 73 const char *const efx_loopback_mode_names[] = { 74 [LOOPBACK_NONE] = "NONE", 75 [LOOPBACK_DATA] = "DATAPATH", 76 [LOOPBACK_GMAC] = "GMAC", 77 [LOOPBACK_XGMII] = "XGMII", 78 [LOOPBACK_XGXS] = "XGXS", 79 [LOOPBACK_XAUI] = "XAUI", 80 [LOOPBACK_GMII] = "GMII", 81 [LOOPBACK_SGMII] = "SGMII", 82 [LOOPBACK_XGBR] = "XGBR", 83 [LOOPBACK_XFI] = "XFI", 84 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 85 [LOOPBACK_GMII_FAR] = "GMII_FAR", 86 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 87 [LOOPBACK_XFI_FAR] = "XFI_FAR", 88 [LOOPBACK_GPHY] = "GPHY", 89 [LOOPBACK_PHYXS] = "PHYXS", 90 [LOOPBACK_PCS] = "PCS", 91 [LOOPBACK_PMAPMD] = "PMA/PMD", 92 [LOOPBACK_XPORT] = "XPORT", 93 [LOOPBACK_XGMII_WS] = "XGMII_WS", 94 [LOOPBACK_XAUI_WS] = "XAUI_WS", 95 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 96 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 97 [LOOPBACK_GMII_WS] = "GMII_WS", 98 [LOOPBACK_XFI_WS] = "XFI_WS", 99 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 100 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 101 }; 102 103 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 104 * queued onto this work queue. This is not a per-nic work queue, because 105 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 106 */ 107 static struct workqueue_struct *reset_workqueue; 108 109 int efx_create_reset_workqueue(void) 110 { 111 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 112 if (!reset_workqueue) { 113 printk(KERN_ERR "Failed to create reset workqueue\n"); 114 return -ENOMEM; 115 } 116 117 return 0; 118 } 119 120 void efx_queue_reset_work(struct efx_nic *efx) 121 { 122 queue_work(reset_workqueue, &efx->reset_work); 123 } 124 125 void efx_flush_reset_workqueue(struct efx_nic *efx) 126 { 127 cancel_work_sync(&efx->reset_work); 128 } 129 130 void efx_destroy_reset_workqueue(void) 131 { 132 if (reset_workqueue) { 133 destroy_workqueue(reset_workqueue); 134 reset_workqueue = NULL; 135 } 136 } 137 138 /* We assume that efx->type->reconfigure_mac will always try to sync RX 139 * filters and therefore needs to read-lock the filter table against freeing 140 */ 141 void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only) 142 { 143 if (efx->type->reconfigure_mac) { 144 down_read(&efx->filter_sem); 145 efx->type->reconfigure_mac(efx, mtu_only); 146 up_read(&efx->filter_sem); 147 } 148 } 149 150 /* Asynchronous work item for changing MAC promiscuity and multicast 151 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 152 * MAC directly. 153 */ 154 static void efx_mac_work(struct work_struct *data) 155 { 156 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 157 158 mutex_lock(&efx->mac_lock); 159 if (efx->port_enabled) 160 efx_mac_reconfigure(efx, false); 161 mutex_unlock(&efx->mac_lock); 162 } 163 164 int efx_set_mac_address(struct net_device *net_dev, void *data) 165 { 166 struct efx_nic *efx = efx_netdev_priv(net_dev); 167 struct sockaddr *addr = data; 168 u8 *new_addr = addr->sa_data; 169 u8 old_addr[6]; 170 int rc; 171 172 if (!is_valid_ether_addr(new_addr)) { 173 netif_err(efx, drv, efx->net_dev, 174 "invalid ethernet MAC address requested: %pM\n", 175 new_addr); 176 return -EADDRNOTAVAIL; 177 } 178 179 /* save old address */ 180 ether_addr_copy(old_addr, net_dev->dev_addr); 181 eth_hw_addr_set(net_dev, new_addr); 182 if (efx->type->set_mac_address) { 183 rc = efx->type->set_mac_address(efx); 184 if (rc) { 185 eth_hw_addr_set(net_dev, old_addr); 186 return rc; 187 } 188 } 189 190 /* Reconfigure the MAC */ 191 mutex_lock(&efx->mac_lock); 192 efx_mac_reconfigure(efx, false); 193 mutex_unlock(&efx->mac_lock); 194 195 return 0; 196 } 197 198 /* Context: netif_addr_lock held, BHs disabled. */ 199 void efx_set_rx_mode(struct net_device *net_dev) 200 { 201 struct efx_nic *efx = efx_netdev_priv(net_dev); 202 203 if (efx->port_enabled) 204 queue_work(efx->workqueue, &efx->mac_work); 205 /* Otherwise efx_start_port() will do this */ 206 } 207 208 int efx_set_features(struct net_device *net_dev, netdev_features_t data) 209 { 210 struct efx_nic *efx = efx_netdev_priv(net_dev); 211 int rc; 212 213 /* If disabling RX n-tuple filtering, clear existing filters */ 214 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 215 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 216 if (rc) 217 return rc; 218 } 219 220 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 221 * If rx-fcs is changed, mac_reconfigure updates that too. 222 */ 223 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 224 NETIF_F_RXFCS)) { 225 /* efx_set_rx_mode() will schedule MAC work to update filters 226 * when a new features are finally set in net_dev. 227 */ 228 efx_set_rx_mode(net_dev); 229 } 230 231 return 0; 232 } 233 234 /* This ensures that the kernel is kept informed (via 235 * netif_carrier_on/off) of the link status, and also maintains the 236 * link status's stop on the port's TX queue. 237 */ 238 void efx_link_status_changed(struct efx_nic *efx) 239 { 240 struct efx_link_state *link_state = &efx->link_state; 241 242 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 243 * that no events are triggered between unregister_netdev() and the 244 * driver unloading. A more general condition is that NETDEV_CHANGE 245 * can only be generated between NETDEV_UP and NETDEV_DOWN 246 */ 247 if (!netif_running(efx->net_dev)) 248 return; 249 250 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 251 efx->n_link_state_changes++; 252 253 if (link_state->up) 254 netif_carrier_on(efx->net_dev); 255 else 256 netif_carrier_off(efx->net_dev); 257 } 258 259 /* Status message for kernel log */ 260 if (link_state->up) 261 netif_info(efx, link, efx->net_dev, 262 "link up at %uMbps %s-duplex (MTU %d)\n", 263 link_state->speed, link_state->fd ? "full" : "half", 264 efx->net_dev->mtu); 265 else 266 netif_info(efx, link, efx->net_dev, "link down\n"); 267 } 268 269 unsigned int efx_xdp_max_mtu(struct efx_nic *efx) 270 { 271 /* The maximum MTU that we can fit in a single page, allowing for 272 * framing, overhead and XDP headroom + tailroom. 273 */ 274 int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + 275 efx->rx_prefix_size + efx->type->rx_buffer_padding + 276 efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; 277 278 return PAGE_SIZE - overhead; 279 } 280 281 /* Context: process, rtnl_lock() held. */ 282 int efx_change_mtu(struct net_device *net_dev, int new_mtu) 283 { 284 struct efx_nic *efx = efx_netdev_priv(net_dev); 285 int rc; 286 287 rc = efx_check_disabled(efx); 288 if (rc) 289 return rc; 290 291 if (rtnl_dereference(efx->xdp_prog) && 292 new_mtu > efx_xdp_max_mtu(efx)) { 293 netif_err(efx, drv, efx->net_dev, 294 "Requested MTU of %d too big for XDP (max: %d)\n", 295 new_mtu, efx_xdp_max_mtu(efx)); 296 return -EINVAL; 297 } 298 299 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 300 301 efx_device_detach_sync(efx); 302 efx_stop_all(efx); 303 304 mutex_lock(&efx->mac_lock); 305 net_dev->mtu = new_mtu; 306 efx_mac_reconfigure(efx, true); 307 mutex_unlock(&efx->mac_lock); 308 309 efx_start_all(efx); 310 efx_device_attach_if_not_resetting(efx); 311 return 0; 312 } 313 314 /************************************************************************** 315 * 316 * Hardware monitor 317 * 318 **************************************************************************/ 319 320 /* Run periodically off the general workqueue */ 321 static void efx_monitor(struct work_struct *data) 322 { 323 struct efx_nic *efx = container_of(data, struct efx_nic, 324 monitor_work.work); 325 326 netif_vdbg(efx, timer, efx->net_dev, 327 "hardware monitor executing on CPU %d\n", 328 raw_smp_processor_id()); 329 BUG_ON(efx->type->monitor == NULL); 330 331 /* If the mac_lock is already held then it is likely a port 332 * reconfiguration is already in place, which will likely do 333 * most of the work of monitor() anyway. 334 */ 335 if (mutex_trylock(&efx->mac_lock)) { 336 if (efx->port_enabled && efx->type->monitor) 337 efx->type->monitor(efx); 338 mutex_unlock(&efx->mac_lock); 339 } 340 341 efx_start_monitor(efx); 342 } 343 344 void efx_start_monitor(struct efx_nic *efx) 345 { 346 if (efx->type->monitor) 347 queue_delayed_work(efx->workqueue, &efx->monitor_work, 348 efx_monitor_interval); 349 } 350 351 /************************************************************************** 352 * 353 * Event queue processing 354 * 355 *************************************************************************/ 356 357 /* Channels are shutdown and reinitialised whilst the NIC is running 358 * to propagate configuration changes (mtu, checksum offload), or 359 * to clear hardware error conditions 360 */ 361 static void efx_start_datapath(struct efx_nic *efx) 362 { 363 netdev_features_t old_features = efx->net_dev->features; 364 bool old_rx_scatter = efx->rx_scatter; 365 size_t rx_buf_len; 366 367 /* Calculate the rx buffer allocation parameters required to 368 * support the current MTU, including padding for header 369 * alignment and overruns. 370 */ 371 efx->rx_dma_len = (efx->rx_prefix_size + 372 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 373 efx->type->rx_buffer_padding); 374 rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + 375 efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); 376 377 if (rx_buf_len <= PAGE_SIZE) { 378 efx->rx_scatter = efx->type->always_rx_scatter; 379 efx->rx_buffer_order = 0; 380 } else if (efx->type->can_rx_scatter) { 381 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 382 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 383 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 384 EFX_RX_BUF_ALIGNMENT) > 385 PAGE_SIZE); 386 efx->rx_scatter = true; 387 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 388 efx->rx_buffer_order = 0; 389 } else { 390 efx->rx_scatter = false; 391 efx->rx_buffer_order = get_order(rx_buf_len); 392 } 393 394 efx_rx_config_page_split(efx); 395 if (efx->rx_buffer_order) 396 netif_dbg(efx, drv, efx->net_dev, 397 "RX buf len=%u; page order=%u batch=%u\n", 398 efx->rx_dma_len, efx->rx_buffer_order, 399 efx->rx_pages_per_batch); 400 else 401 netif_dbg(efx, drv, efx->net_dev, 402 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 403 efx->rx_dma_len, efx->rx_page_buf_step, 404 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 405 406 /* Restore previously fixed features in hw_features and remove 407 * features which are fixed now 408 */ 409 efx->net_dev->hw_features |= efx->net_dev->features; 410 efx->net_dev->hw_features &= ~efx->fixed_features; 411 efx->net_dev->features |= efx->fixed_features; 412 if (efx->net_dev->features != old_features) 413 netdev_features_change(efx->net_dev); 414 415 /* RX filters may also have scatter-enabled flags */ 416 if ((efx->rx_scatter != old_rx_scatter) && 417 efx->type->filter_update_rx_scatter) 418 efx->type->filter_update_rx_scatter(efx); 419 420 /* We must keep at least one descriptor in a TX ring empty. 421 * We could avoid this when the queue size does not exactly 422 * match the hardware ring size, but it's not that important. 423 * Therefore we stop the queue when one more skb might fill 424 * the ring completely. We wake it when half way back to 425 * empty. 426 */ 427 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 428 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 429 430 /* Initialise the channels */ 431 efx_start_channels(efx); 432 433 efx_ptp_start_datapath(efx); 434 435 if (netif_device_present(efx->net_dev)) 436 netif_tx_wake_all_queues(efx->net_dev); 437 } 438 439 static void efx_stop_datapath(struct efx_nic *efx) 440 { 441 EFX_ASSERT_RESET_SERIALISED(efx); 442 BUG_ON(efx->port_enabled); 443 444 efx_ptp_stop_datapath(efx); 445 446 efx_stop_channels(efx); 447 } 448 449 /************************************************************************** 450 * 451 * Port handling 452 * 453 **************************************************************************/ 454 455 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 456 * force the Autoneg bit on. 457 */ 458 void efx_link_clear_advertising(struct efx_nic *efx) 459 { 460 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 461 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 462 } 463 464 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 465 { 466 efx->wanted_fc = wanted_fc; 467 if (efx->link_advertising[0]) { 468 if (wanted_fc & EFX_FC_RX) 469 efx->link_advertising[0] |= (ADVERTISED_Pause | 470 ADVERTISED_Asym_Pause); 471 else 472 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 473 ADVERTISED_Asym_Pause); 474 if (wanted_fc & EFX_FC_TX) 475 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 476 } 477 } 478 479 static void efx_start_port(struct efx_nic *efx) 480 { 481 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 482 BUG_ON(efx->port_enabled); 483 484 mutex_lock(&efx->mac_lock); 485 efx->port_enabled = true; 486 487 /* Ensure MAC ingress/egress is enabled */ 488 efx_mac_reconfigure(efx, false); 489 490 mutex_unlock(&efx->mac_lock); 491 } 492 493 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 494 * and the async self-test, wait for them to finish and prevent them 495 * being scheduled again. This doesn't cover online resets, which 496 * should only be cancelled when removing the device. 497 */ 498 static void efx_stop_port(struct efx_nic *efx) 499 { 500 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 501 502 EFX_ASSERT_RESET_SERIALISED(efx); 503 504 mutex_lock(&efx->mac_lock); 505 efx->port_enabled = false; 506 mutex_unlock(&efx->mac_lock); 507 508 /* Serialise against efx_set_multicast_list() */ 509 netif_addr_lock_bh(efx->net_dev); 510 netif_addr_unlock_bh(efx->net_dev); 511 512 cancel_delayed_work_sync(&efx->monitor_work); 513 efx_selftest_async_cancel(efx); 514 cancel_work_sync(&efx->mac_work); 515 } 516 517 /* If the interface is supposed to be running but is not, start 518 * the hardware and software data path, regular activity for the port 519 * (MAC statistics, link polling, etc.) and schedule the port to be 520 * reconfigured. Interrupts must already be enabled. This function 521 * is safe to call multiple times, so long as the NIC is not disabled. 522 * Requires the RTNL lock. 523 */ 524 void efx_start_all(struct efx_nic *efx) 525 { 526 EFX_ASSERT_RESET_SERIALISED(efx); 527 BUG_ON(efx->state == STATE_DISABLED); 528 529 /* Check that it is appropriate to restart the interface. All 530 * of these flags are safe to read under just the rtnl lock 531 */ 532 if (efx->port_enabled || !netif_running(efx->net_dev) || 533 efx->reset_pending) 534 return; 535 536 efx_start_port(efx); 537 efx_start_datapath(efx); 538 539 /* Start the hardware monitor if there is one */ 540 efx_start_monitor(efx); 541 542 efx_selftest_async_start(efx); 543 544 /* Link state detection is normally event-driven; we have 545 * to poll now because we could have missed a change 546 */ 547 mutex_lock(&efx->mac_lock); 548 if (efx_mcdi_phy_poll(efx)) 549 efx_link_status_changed(efx); 550 mutex_unlock(&efx->mac_lock); 551 552 if (efx->type->start_stats) { 553 efx->type->start_stats(efx); 554 efx->type->pull_stats(efx); 555 spin_lock_bh(&efx->stats_lock); 556 efx->type->update_stats(efx, NULL, NULL); 557 spin_unlock_bh(&efx->stats_lock); 558 } 559 } 560 561 /* Quiesce the hardware and software data path, and regular activity 562 * for the port without bringing the link down. Safe to call multiple 563 * times with the NIC in almost any state, but interrupts should be 564 * enabled. Requires the RTNL lock. 565 */ 566 void efx_stop_all(struct efx_nic *efx) 567 { 568 EFX_ASSERT_RESET_SERIALISED(efx); 569 570 /* port_enabled can be read safely under the rtnl lock */ 571 if (!efx->port_enabled) 572 return; 573 574 if (efx->type->update_stats) { 575 /* update stats before we go down so we can accurately count 576 * rx_nodesc_drops 577 */ 578 efx->type->pull_stats(efx); 579 spin_lock_bh(&efx->stats_lock); 580 efx->type->update_stats(efx, NULL, NULL); 581 spin_unlock_bh(&efx->stats_lock); 582 efx->type->stop_stats(efx); 583 } 584 585 efx_stop_port(efx); 586 587 /* Stop the kernel transmit interface. This is only valid if 588 * the device is stopped or detached; otherwise the watchdog 589 * may fire immediately. 590 */ 591 WARN_ON(netif_running(efx->net_dev) && 592 netif_device_present(efx->net_dev)); 593 netif_tx_disable(efx->net_dev); 594 595 efx_stop_datapath(efx); 596 } 597 598 /* Context: process, rcu_read_lock or RTNL held, non-blocking. */ 599 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) 600 { 601 struct efx_nic *efx = efx_netdev_priv(net_dev); 602 603 spin_lock_bh(&efx->stats_lock); 604 efx_nic_update_stats_atomic(efx, NULL, stats); 605 spin_unlock_bh(&efx->stats_lock); 606 } 607 608 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 609 * the MAC appropriately. All other PHY configuration changes are pushed 610 * through phy_op->set_settings(), and pushed asynchronously to the MAC 611 * through efx_monitor(). 612 * 613 * Callers must hold the mac_lock 614 */ 615 int __efx_reconfigure_port(struct efx_nic *efx) 616 { 617 enum efx_phy_mode phy_mode; 618 int rc = 0; 619 620 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 621 622 /* Disable PHY transmit in mac level loopbacks */ 623 phy_mode = efx->phy_mode; 624 if (LOOPBACK_INTERNAL(efx)) 625 efx->phy_mode |= PHY_MODE_TX_DISABLED; 626 else 627 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 628 629 if (efx->type->reconfigure_port) 630 rc = efx->type->reconfigure_port(efx); 631 632 if (rc) 633 efx->phy_mode = phy_mode; 634 635 return rc; 636 } 637 638 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 639 * disabled. 640 */ 641 int efx_reconfigure_port(struct efx_nic *efx) 642 { 643 int rc; 644 645 EFX_ASSERT_RESET_SERIALISED(efx); 646 647 mutex_lock(&efx->mac_lock); 648 rc = __efx_reconfigure_port(efx); 649 mutex_unlock(&efx->mac_lock); 650 651 return rc; 652 } 653 654 /************************************************************************** 655 * 656 * Device reset and suspend 657 * 658 **************************************************************************/ 659 660 static void efx_wait_for_bist_end(struct efx_nic *efx) 661 { 662 int i; 663 664 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 665 if (efx_mcdi_poll_reboot(efx)) 666 goto out; 667 msleep(BIST_WAIT_DELAY_MS); 668 } 669 670 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 671 out: 672 /* Either way unset the BIST flag. If we found no reboot we probably 673 * won't recover, but we should try. 674 */ 675 efx->mc_bist_for_other_fn = false; 676 } 677 678 /* Try recovery mechanisms. 679 * For now only EEH is supported. 680 * Returns 0 if the recovery mechanisms are unsuccessful. 681 * Returns a non-zero value otherwise. 682 */ 683 int efx_try_recovery(struct efx_nic *efx) 684 { 685 #ifdef CONFIG_EEH 686 /* A PCI error can occur and not be seen by EEH because nothing 687 * happens on the PCI bus. In this case the driver may fail and 688 * schedule a 'recover or reset', leading to this recovery handler. 689 * Manually call the eeh failure check function. 690 */ 691 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 692 if (eeh_dev_check_failure(eehdev)) { 693 /* The EEH mechanisms will handle the error and reset the 694 * device if necessary. 695 */ 696 return 1; 697 } 698 #endif 699 return 0; 700 } 701 702 /* Tears down the entire software state and most of the hardware state 703 * before reset. 704 */ 705 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 706 { 707 EFX_ASSERT_RESET_SERIALISED(efx); 708 709 if (method == RESET_TYPE_MCDI_TIMEOUT) 710 efx->type->prepare_flr(efx); 711 712 efx_stop_all(efx); 713 efx_disable_interrupts(efx); 714 715 mutex_lock(&efx->mac_lock); 716 down_write(&efx->filter_sem); 717 mutex_lock(&efx->rss_lock); 718 efx->type->fini(efx); 719 } 720 721 /* Context: netif_tx_lock held, BHs disabled. */ 722 void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) 723 { 724 struct efx_nic *efx = efx_netdev_priv(net_dev); 725 726 netif_err(efx, tx_err, efx->net_dev, 727 "TX stuck with port_enabled=%d: resetting channels\n", 728 efx->port_enabled); 729 730 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 731 } 732 733 /* This function will always ensure that the locks acquired in 734 * efx_reset_down() are released. A failure return code indicates 735 * that we were unable to reinitialise the hardware, and the 736 * driver should be disabled. If ok is false, then the rx and tx 737 * engines are not restarted, pending a RESET_DISABLE. 738 */ 739 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 740 { 741 int rc; 742 743 EFX_ASSERT_RESET_SERIALISED(efx); 744 745 if (method == RESET_TYPE_MCDI_TIMEOUT) 746 efx->type->finish_flr(efx); 747 748 /* Ensure that SRAM is initialised even if we're disabling the device */ 749 rc = efx->type->init(efx); 750 if (rc) { 751 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 752 goto fail; 753 } 754 755 if (!ok) 756 goto fail; 757 758 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 759 method != RESET_TYPE_DATAPATH) { 760 rc = efx_mcdi_port_reconfigure(efx); 761 if (rc && rc != -EPERM) 762 netif_err(efx, drv, efx->net_dev, 763 "could not restore PHY settings\n"); 764 } 765 766 rc = efx_enable_interrupts(efx); 767 if (rc) 768 goto fail; 769 770 #ifdef CONFIG_SFC_SRIOV 771 rc = efx->type->vswitching_restore(efx); 772 if (rc) /* not fatal; the PF will still work fine */ 773 netif_warn(efx, probe, efx->net_dev, 774 "failed to restore vswitching rc=%d;" 775 " VFs may not function\n", rc); 776 #endif 777 778 if (efx->type->rx_restore_rss_contexts) 779 efx->type->rx_restore_rss_contexts(efx); 780 mutex_unlock(&efx->rss_lock); 781 efx->type->filter_table_restore(efx); 782 up_write(&efx->filter_sem); 783 784 mutex_unlock(&efx->mac_lock); 785 786 efx_start_all(efx); 787 788 if (efx->type->udp_tnl_push_ports) 789 efx->type->udp_tnl_push_ports(efx); 790 791 return 0; 792 793 fail: 794 efx->port_initialized = false; 795 796 mutex_unlock(&efx->rss_lock); 797 up_write(&efx->filter_sem); 798 mutex_unlock(&efx->mac_lock); 799 800 return rc; 801 } 802 803 /* Reset the NIC using the specified method. Note that the reset may 804 * fail, in which case the card will be left in an unusable state. 805 * 806 * Caller must hold the rtnl_lock. 807 */ 808 int efx_reset(struct efx_nic *efx, enum reset_type method) 809 { 810 int rc, rc2 = 0; 811 bool disabled; 812 813 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 814 RESET_TYPE(method)); 815 816 efx_device_detach_sync(efx); 817 /* efx_reset_down() grabs locks that prevent recovery on EF100. 818 * EF100 reset is handled in the efx_nic_type callback below. 819 */ 820 if (efx_nic_rev(efx) != EFX_REV_EF100) 821 efx_reset_down(efx, method); 822 823 rc = efx->type->reset(efx, method); 824 if (rc) { 825 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 826 goto out; 827 } 828 829 /* Clear flags for the scopes we covered. We assume the NIC and 830 * driver are now quiescent so that there is no race here. 831 */ 832 if (method < RESET_TYPE_MAX_METHOD) 833 efx->reset_pending &= -(1 << (method + 1)); 834 else /* it doesn't fit into the well-ordered scope hierarchy */ 835 __clear_bit(method, &efx->reset_pending); 836 837 /* Reinitialise bus-mastering, which may have been turned off before 838 * the reset was scheduled. This is still appropriate, even in the 839 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 840 * can respond to requests. 841 */ 842 pci_set_master(efx->pci_dev); 843 844 out: 845 /* Leave device stopped if necessary */ 846 disabled = rc || 847 method == RESET_TYPE_DISABLE || 848 method == RESET_TYPE_RECOVER_OR_DISABLE; 849 if (efx_nic_rev(efx) != EFX_REV_EF100) 850 rc2 = efx_reset_up(efx, method, !disabled); 851 if (rc2) { 852 disabled = true; 853 if (!rc) 854 rc = rc2; 855 } 856 857 if (disabled) { 858 dev_close(efx->net_dev); 859 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 860 efx->state = STATE_DISABLED; 861 } else { 862 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 863 efx_device_attach_if_not_resetting(efx); 864 } 865 return rc; 866 } 867 868 /* The worker thread exists so that code that cannot sleep can 869 * schedule a reset for later. 870 */ 871 static void efx_reset_work(struct work_struct *data) 872 { 873 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 874 unsigned long pending; 875 enum reset_type method; 876 877 pending = READ_ONCE(efx->reset_pending); 878 method = fls(pending) - 1; 879 880 if (method == RESET_TYPE_MC_BIST) 881 efx_wait_for_bist_end(efx); 882 883 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 884 method == RESET_TYPE_RECOVER_OR_ALL) && 885 efx_try_recovery(efx)) 886 return; 887 888 if (!pending) 889 return; 890 891 rtnl_lock(); 892 893 /* We checked the state in efx_schedule_reset() but it may 894 * have changed by now. Now that we have the RTNL lock, 895 * it cannot change again. 896 */ 897 if (efx_net_active(efx->state)) 898 (void)efx_reset(efx, method); 899 900 rtnl_unlock(); 901 } 902 903 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 904 { 905 enum reset_type method; 906 907 if (efx_recovering(efx->state)) { 908 netif_dbg(efx, drv, efx->net_dev, 909 "recovering: skip scheduling %s reset\n", 910 RESET_TYPE(type)); 911 return; 912 } 913 914 switch (type) { 915 case RESET_TYPE_INVISIBLE: 916 case RESET_TYPE_ALL: 917 case RESET_TYPE_RECOVER_OR_ALL: 918 case RESET_TYPE_WORLD: 919 case RESET_TYPE_DISABLE: 920 case RESET_TYPE_RECOVER_OR_DISABLE: 921 case RESET_TYPE_DATAPATH: 922 case RESET_TYPE_MC_BIST: 923 case RESET_TYPE_MCDI_TIMEOUT: 924 method = type; 925 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 926 RESET_TYPE(method)); 927 break; 928 default: 929 method = efx->type->map_reset_reason(type); 930 netif_dbg(efx, drv, efx->net_dev, 931 "scheduling %s reset for %s\n", 932 RESET_TYPE(method), RESET_TYPE(type)); 933 break; 934 } 935 936 set_bit(method, &efx->reset_pending); 937 smp_mb(); /* ensure we change reset_pending before checking state */ 938 939 /* If we're not READY then just leave the flags set as the cue 940 * to abort probing or reschedule the reset later. 941 */ 942 if (!efx_net_active(READ_ONCE(efx->state))) 943 return; 944 945 /* efx_process_channel() will no longer read events once a 946 * reset is scheduled. So switch back to poll'd MCDI completions. 947 */ 948 efx_mcdi_mode_poll(efx); 949 950 efx_queue_reset_work(efx); 951 } 952 953 /************************************************************************** 954 * 955 * Dummy NIC operations 956 * 957 * Can be used for some unimplemented operations 958 * Needed so all function pointers are valid and do not have to be tested 959 * before use 960 * 961 **************************************************************************/ 962 int efx_port_dummy_op_int(struct efx_nic *efx) 963 { 964 return 0; 965 } 966 void efx_port_dummy_op_void(struct efx_nic *efx) {} 967 968 /************************************************************************** 969 * 970 * Data housekeeping 971 * 972 **************************************************************************/ 973 974 /* This zeroes out and then fills in the invariants in a struct 975 * efx_nic (including all sub-structures). 976 */ 977 int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) 978 { 979 int rc = -ENOMEM; 980 981 /* Initialise common structures */ 982 INIT_LIST_HEAD(&efx->node); 983 INIT_LIST_HEAD(&efx->secondary_list); 984 spin_lock_init(&efx->biu_lock); 985 #ifdef CONFIG_SFC_MTD 986 INIT_LIST_HEAD(&efx->mtd_list); 987 #endif 988 INIT_WORK(&efx->reset_work, efx_reset_work); 989 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 990 efx_selftest_async_init(efx); 991 efx->pci_dev = pci_dev; 992 efx->msg_enable = debug; 993 efx->state = STATE_UNINIT; 994 strscpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 995 996 efx->rx_prefix_size = efx->type->rx_prefix_size; 997 efx->rx_ip_align = 998 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 999 efx->rx_packet_hash_offset = 1000 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 1001 efx->rx_packet_ts_offset = 1002 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 1003 INIT_LIST_HEAD(&efx->rss_context.list); 1004 efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; 1005 mutex_init(&efx->rss_lock); 1006 efx->vport_id = EVB_PORT_ID_ASSIGNED; 1007 spin_lock_init(&efx->stats_lock); 1008 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 1009 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 1010 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 1011 mutex_init(&efx->mac_lock); 1012 init_rwsem(&efx->filter_sem); 1013 #ifdef CONFIG_RFS_ACCEL 1014 mutex_init(&efx->rps_mutex); 1015 spin_lock_init(&efx->rps_hash_lock); 1016 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 1017 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 1018 sizeof(*efx->rps_hash_table), GFP_KERNEL); 1019 #endif 1020 spin_lock_init(&efx->vf_reps_lock); 1021 INIT_LIST_HEAD(&efx->vf_reps); 1022 INIT_WORK(&efx->mac_work, efx_mac_work); 1023 init_waitqueue_head(&efx->flush_wq); 1024 1025 efx->tx_queues_per_channel = 1; 1026 efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; 1027 efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1028 1029 efx->mem_bar = UINT_MAX; 1030 1031 rc = efx_init_channels(efx); 1032 if (rc) 1033 goto fail; 1034 1035 /* Would be good to use the net_dev name, but we're too early */ 1036 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 1037 pci_name(pci_dev)); 1038 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 1039 if (!efx->workqueue) { 1040 rc = -ENOMEM; 1041 goto fail; 1042 } 1043 1044 return 0; 1045 1046 fail: 1047 efx_fini_struct(efx); 1048 return rc; 1049 } 1050 1051 void efx_fini_struct(struct efx_nic *efx) 1052 { 1053 #ifdef CONFIG_RFS_ACCEL 1054 kfree(efx->rps_hash_table); 1055 #endif 1056 1057 efx_fini_channels(efx); 1058 1059 kfree(efx->vpd_sn); 1060 1061 if (efx->workqueue) { 1062 destroy_workqueue(efx->workqueue); 1063 efx->workqueue = NULL; 1064 } 1065 } 1066 1067 /* This configures the PCI device to enable I/O and DMA. */ 1068 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, 1069 unsigned int mem_map_size) 1070 { 1071 struct pci_dev *pci_dev = efx->pci_dev; 1072 int rc; 1073 1074 efx->mem_bar = UINT_MAX; 1075 pci_dbg(pci_dev, "initialising I/O bar=%d\n", bar); 1076 1077 rc = pci_enable_device(pci_dev); 1078 if (rc) { 1079 pci_err(pci_dev, "failed to enable PCI device\n"); 1080 goto fail1; 1081 } 1082 1083 pci_set_master(pci_dev); 1084 1085 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1086 if (rc) { 1087 pci_err(efx->pci_dev, "could not find a suitable DMA mask\n"); 1088 goto fail2; 1089 } 1090 pci_dbg(efx->pci_dev, "using DMA mask %llx\n", (unsigned long long)dma_mask); 1091 1092 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1093 if (!efx->membase_phys) { 1094 pci_err(efx->pci_dev, 1095 "ERROR: No BAR%d mapping from the BIOS. Try pci=realloc on the kernel command line\n", 1096 bar); 1097 rc = -ENODEV; 1098 goto fail3; 1099 } 1100 1101 rc = pci_request_region(pci_dev, bar, "sfc"); 1102 if (rc) { 1103 pci_err(efx->pci_dev, 1104 "request for memory BAR[%d] failed\n", bar); 1105 rc = -EIO; 1106 goto fail3; 1107 } 1108 efx->mem_bar = bar; 1109 efx->membase = ioremap(efx->membase_phys, mem_map_size); 1110 if (!efx->membase) { 1111 pci_err(efx->pci_dev, 1112 "could not map memory BAR[%d] at %llx+%x\n", bar, 1113 (unsigned long long)efx->membase_phys, mem_map_size); 1114 rc = -ENOMEM; 1115 goto fail4; 1116 } 1117 pci_dbg(efx->pci_dev, 1118 "memory BAR[%d] at %llx+%x (virtual %p)\n", bar, 1119 (unsigned long long)efx->membase_phys, mem_map_size, 1120 efx->membase); 1121 1122 return 0; 1123 1124 fail4: 1125 pci_release_region(efx->pci_dev, bar); 1126 fail3: 1127 efx->membase_phys = 0; 1128 fail2: 1129 pci_disable_device(efx->pci_dev); 1130 fail1: 1131 return rc; 1132 } 1133 1134 void efx_fini_io(struct efx_nic *efx) 1135 { 1136 pci_dbg(efx->pci_dev, "shutting down I/O\n"); 1137 1138 if (efx->membase) { 1139 iounmap(efx->membase); 1140 efx->membase = NULL; 1141 } 1142 1143 if (efx->membase_phys) { 1144 pci_release_region(efx->pci_dev, efx->mem_bar); 1145 efx->membase_phys = 0; 1146 efx->mem_bar = UINT_MAX; 1147 } 1148 1149 /* Don't disable bus-mastering if VFs are assigned */ 1150 if (!pci_vfs_assigned(efx->pci_dev)) 1151 pci_disable_device(efx->pci_dev); 1152 } 1153 1154 #ifdef CONFIG_SFC_MCDI_LOGGING 1155 static ssize_t mcdi_logging_show(struct device *dev, 1156 struct device_attribute *attr, 1157 char *buf) 1158 { 1159 struct efx_nic *efx = dev_get_drvdata(dev); 1160 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1161 1162 return sysfs_emit(buf, "%d\n", mcdi->logging_enabled); 1163 } 1164 1165 static ssize_t mcdi_logging_store(struct device *dev, 1166 struct device_attribute *attr, 1167 const char *buf, size_t count) 1168 { 1169 struct efx_nic *efx = dev_get_drvdata(dev); 1170 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1171 bool enable = count > 0 && *buf != '0'; 1172 1173 mcdi->logging_enabled = enable; 1174 return count; 1175 } 1176 1177 static DEVICE_ATTR_RW(mcdi_logging); 1178 1179 void efx_init_mcdi_logging(struct efx_nic *efx) 1180 { 1181 int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1182 1183 if (rc) { 1184 netif_warn(efx, drv, efx->net_dev, 1185 "failed to init net dev attributes\n"); 1186 } 1187 } 1188 1189 void efx_fini_mcdi_logging(struct efx_nic *efx) 1190 { 1191 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1192 } 1193 #endif 1194 1195 /* A PCI error affecting this device was detected. 1196 * At this point MMIO and DMA may be disabled. 1197 * Stop the software path and request a slot reset. 1198 */ 1199 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 1200 pci_channel_state_t state) 1201 { 1202 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1203 struct efx_nic *efx = pci_get_drvdata(pdev); 1204 1205 if (state == pci_channel_io_perm_failure) 1206 return PCI_ERS_RESULT_DISCONNECT; 1207 1208 rtnl_lock(); 1209 1210 if (efx->state != STATE_DISABLED) { 1211 efx->state = efx_recover(efx->state); 1212 efx->reset_pending = 0; 1213 1214 efx_device_detach_sync(efx); 1215 1216 if (efx_net_active(efx->state)) { 1217 efx_stop_all(efx); 1218 efx_disable_interrupts(efx); 1219 } 1220 1221 status = PCI_ERS_RESULT_NEED_RESET; 1222 } else { 1223 /* If the interface is disabled we don't want to do anything 1224 * with it. 1225 */ 1226 status = PCI_ERS_RESULT_RECOVERED; 1227 } 1228 1229 rtnl_unlock(); 1230 1231 pci_disable_device(pdev); 1232 1233 return status; 1234 } 1235 1236 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 1237 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 1238 { 1239 struct efx_nic *efx = pci_get_drvdata(pdev); 1240 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1241 1242 if (pci_enable_device(pdev)) { 1243 netif_err(efx, hw, efx->net_dev, 1244 "Cannot re-enable PCI device after reset.\n"); 1245 status = PCI_ERS_RESULT_DISCONNECT; 1246 } 1247 1248 return status; 1249 } 1250 1251 /* Perform the actual reset and resume I/O operations. */ 1252 static void efx_io_resume(struct pci_dev *pdev) 1253 { 1254 struct efx_nic *efx = pci_get_drvdata(pdev); 1255 int rc; 1256 1257 rtnl_lock(); 1258 1259 if (efx->state == STATE_DISABLED) 1260 goto out; 1261 1262 rc = efx_reset(efx, RESET_TYPE_ALL); 1263 if (rc) { 1264 netif_err(efx, hw, efx->net_dev, 1265 "efx_reset failed after PCI error (%d)\n", rc); 1266 } else { 1267 efx->state = efx_recovered(efx->state); 1268 netif_dbg(efx, hw, efx->net_dev, 1269 "Done resetting and resuming IO after PCI error.\n"); 1270 } 1271 1272 out: 1273 rtnl_unlock(); 1274 } 1275 1276 /* For simplicity and reliability, we always require a slot reset and try to 1277 * reset the hardware when a pci error affecting the device is detected. 1278 * We leave both the link_reset and mmio_enabled callback unimplemented: 1279 * with our request for slot reset the mmio_enabled callback will never be 1280 * called, and the link_reset callback is not used by AER or EEH mechanisms. 1281 */ 1282 const struct pci_error_handlers efx_err_handlers = { 1283 .error_detected = efx_io_error_detected, 1284 .slot_reset = efx_io_slot_reset, 1285 .resume = efx_io_resume, 1286 }; 1287 1288 /* Determine whether the NIC will be able to handle TX offloads for a given 1289 * encapsulated packet. 1290 */ 1291 static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb) 1292 { 1293 struct gre_base_hdr *greh; 1294 __be16 dst_port; 1295 u8 ipproto; 1296 1297 /* Does the NIC support encap offloads? 1298 * If not, we should never get here, because we shouldn't have 1299 * advertised encap offload feature flags in the first place. 1300 */ 1301 if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port)) 1302 return false; 1303 1304 /* Determine encapsulation protocol in use */ 1305 switch (skb->protocol) { 1306 case htons(ETH_P_IP): 1307 ipproto = ip_hdr(skb)->protocol; 1308 break; 1309 case htons(ETH_P_IPV6): 1310 /* If there are extension headers, this will cause us to 1311 * think we can't offload something that we maybe could have. 1312 */ 1313 ipproto = ipv6_hdr(skb)->nexthdr; 1314 break; 1315 default: 1316 /* Not IP, so can't offload it */ 1317 return false; 1318 } 1319 switch (ipproto) { 1320 case IPPROTO_GRE: 1321 /* We support NVGRE but not IP over GRE or random gretaps. 1322 * Specifically, the NIC will accept GRE as encapsulated if 1323 * the inner protocol is Ethernet, but only handle it 1324 * correctly if the GRE header is 8 bytes long. Moreover, 1325 * it will not update the Checksum or Sequence Number fields 1326 * if they are present. (The Routing Present flag, 1327 * GRE_ROUTING, cannot be set else the header would be more 1328 * than 8 bytes long; so we don't have to worry about it.) 1329 */ 1330 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 1331 return false; 1332 if (ntohs(skb->inner_protocol) != ETH_P_TEB) 1333 return false; 1334 if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8) 1335 return false; 1336 greh = (struct gre_base_hdr *)skb_transport_header(skb); 1337 return !(greh->flags & (GRE_CSUM | GRE_SEQ)); 1338 case IPPROTO_UDP: 1339 /* If the port is registered for a UDP tunnel, we assume the 1340 * packet is for that tunnel, and the NIC will handle it as 1341 * such. If not, the NIC won't know what to do with it. 1342 */ 1343 dst_port = udp_hdr(skb)->dest; 1344 return efx->type->udp_tnl_has_port(efx, dst_port); 1345 default: 1346 return false; 1347 } 1348 } 1349 1350 netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev, 1351 netdev_features_t features) 1352 { 1353 struct efx_nic *efx = efx_netdev_priv(dev); 1354 1355 if (skb->encapsulation) { 1356 if (features & NETIF_F_GSO_MASK) 1357 /* Hardware can only do TSO with at most 208 bytes 1358 * of headers. 1359 */ 1360 if (skb_inner_transport_offset(skb) > 1361 EFX_TSO2_MAX_HDRLEN) 1362 features &= ~(NETIF_F_GSO_MASK); 1363 if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK)) 1364 if (!efx_can_encap_offloads(efx, skb)) 1365 features &= ~(NETIF_F_GSO_MASK | 1366 NETIF_F_CSUM_MASK); 1367 } 1368 return features; 1369 } 1370 1371 int efx_get_phys_port_id(struct net_device *net_dev, 1372 struct netdev_phys_item_id *ppid) 1373 { 1374 struct efx_nic *efx = efx_netdev_priv(net_dev); 1375 1376 if (efx->type->get_phys_port_id) 1377 return efx->type->get_phys_port_id(efx, ppid); 1378 else 1379 return -EOPNOTSUPP; 1380 } 1381 1382 int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) 1383 { 1384 struct efx_nic *efx = efx_netdev_priv(net_dev); 1385 1386 if (snprintf(name, len, "p%u", efx->port_num) >= len) 1387 return -EINVAL; 1388 return 0; 1389 } 1390 1391 void efx_detach_reps(struct efx_nic *efx) 1392 { 1393 struct net_device *rep_dev; 1394 struct efx_rep *efv; 1395 1396 ASSERT_RTNL(); 1397 netif_dbg(efx, drv, efx->net_dev, "Detaching VF representors\n"); 1398 list_for_each_entry(efv, &efx->vf_reps, list) { 1399 rep_dev = efv->net_dev; 1400 if (!rep_dev) 1401 continue; 1402 netif_carrier_off(rep_dev); 1403 /* See efx_device_detach_sync() */ 1404 netif_tx_lock_bh(rep_dev); 1405 netif_tx_stop_all_queues(rep_dev); 1406 netif_tx_unlock_bh(rep_dev); 1407 } 1408 } 1409 1410 void efx_attach_reps(struct efx_nic *efx) 1411 { 1412 struct net_device *rep_dev; 1413 struct efx_rep *efv; 1414 1415 ASSERT_RTNL(); 1416 netif_dbg(efx, drv, efx->net_dev, "Attaching VF representors\n"); 1417 list_for_each_entry(efv, &efx->vf_reps, list) { 1418 rep_dev = efv->net_dev; 1419 if (!rep_dev) 1420 continue; 1421 netif_tx_wake_all_queues(rep_dev); 1422 netif_carrier_on(rep_dev); 1423 } 1424 } 1425