1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /**************************************************************************/ 3 /* */ 4 /* IBM System i and System p Virtual NIC Device Driver */ 5 /* Copyright (C) 2014 IBM Corp. */ 6 /* Santiago Leon (santi_leon@yahoo.com) */ 7 /* Thomas Falcon (tlfalcon@linux.vnet.ibm.com) */ 8 /* John Allen (jallen@linux.vnet.ibm.com) */ 9 /* */ 10 /* */ 11 /* This module contains the implementation of a virtual ethernet device */ 12 /* for use with IBM i/p Series LPAR Linux. It utilizes the logical LAN */ 13 /* option of the RS/6000 Platform Architecture to interface with virtual */ 14 /* ethernet NICs that are presented to the partition by the hypervisor. */ 15 /* */ 16 /* Messages are passed between the VNIC driver and the VNIC server using */ 17 /* Command/Response Queues (CRQs) and sub CRQs (sCRQs). CRQs are used to */ 18 /* issue and receive commands that initiate communication with the server */ 19 /* on driver initialization. Sub CRQs (sCRQs) are similar to CRQs, but */ 20 /* are used by the driver to notify the server that a packet is */ 21 /* ready for transmission or that a buffer has been added to receive a */ 22 /* packet. Subsequently, sCRQs are used by the server to notify the */ 23 /* driver that a packet transmission has been completed or that a packet */ 24 /* has been received and placed in a waiting buffer. */ 25 /* */ 26 /* In lieu of a more conventional "on-the-fly" DMA mapping strategy in */ 27 /* which skbs are DMA mapped and immediately unmapped when the transmit */ 28 /* or receive has been completed, the VNIC driver is required to use */ 29 /* "long term mapping". This entails that large, continuous DMA mapped */ 30 /* buffers are allocated on driver initialization and these buffers are */ 31 /* then continuously reused to pass skbs to and from the VNIC server. */ 32 /* */ 33 /**************************************************************************/ 34 35 #include <linux/module.h> 36 #include <linux/moduleparam.h> 37 #include <linux/types.h> 38 #include <linux/errno.h> 39 #include <linux/completion.h> 40 #include <linux/ioport.h> 41 #include <linux/dma-mapping.h> 42 #include <linux/kernel.h> 43 #include <linux/netdevice.h> 44 #include <linux/etherdevice.h> 45 #include <linux/skbuff.h> 46 #include <linux/init.h> 47 #include <linux/delay.h> 48 #include <linux/mm.h> 49 #include <linux/ethtool.h> 50 #include <linux/proc_fs.h> 51 #include <linux/if_arp.h> 52 #include <linux/in.h> 53 #include <linux/ip.h> 54 #include <linux/ipv6.h> 55 #include <linux/irq.h> 56 #include <linux/irqdomain.h> 57 #include <linux/kthread.h> 58 #include <linux/seq_file.h> 59 #include <linux/interrupt.h> 60 #include <net/net_namespace.h> 61 #include <asm/hvcall.h> 62 #include <linux/atomic.h> 63 #include <asm/vio.h> 64 #include <asm/xive.h> 65 #include <asm/iommu.h> 66 #include <linux/uaccess.h> 67 #include <asm/firmware.h> 68 #include <linux/workqueue.h> 69 #include <linux/if_vlan.h> 70 #include <linux/utsname.h> 71 #include <linux/cpu.h> 72 73 #include "ibmvnic.h" 74 75 static const char ibmvnic_driver_name[] = "ibmvnic"; 76 static const char ibmvnic_driver_string[] = "IBM System i/p Virtual NIC Driver"; 77 78 MODULE_AUTHOR("Santiago Leon"); 79 MODULE_DESCRIPTION("IBM System i/p Virtual NIC Driver"); 80 MODULE_LICENSE("GPL"); 81 MODULE_VERSION(IBMVNIC_DRIVER_VERSION); 82 83 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION; 84 static void release_sub_crqs(struct ibmvnic_adapter *, bool); 85 static int ibmvnic_reset_crq(struct ibmvnic_adapter *); 86 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *); 87 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *); 88 static int ibmvnic_send_crq(struct ibmvnic_adapter *, union ibmvnic_crq *); 89 static int send_subcrq_indirect(struct ibmvnic_adapter *, u64, u64, u64); 90 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance); 91 static int enable_scrq_irq(struct ibmvnic_adapter *, 92 struct ibmvnic_sub_crq_queue *); 93 static int disable_scrq_irq(struct ibmvnic_adapter *, 94 struct ibmvnic_sub_crq_queue *); 95 static int pending_scrq(struct ibmvnic_adapter *, 96 struct ibmvnic_sub_crq_queue *); 97 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *, 98 struct ibmvnic_sub_crq_queue *); 99 static int ibmvnic_poll(struct napi_struct *napi, int data); 100 static void send_query_map(struct ibmvnic_adapter *adapter); 101 static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8); 102 static int send_request_unmap(struct ibmvnic_adapter *, u8); 103 static int send_login(struct ibmvnic_adapter *adapter); 104 static void send_query_cap(struct ibmvnic_adapter *adapter); 105 static int init_sub_crqs(struct ibmvnic_adapter *); 106 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); 107 static int ibmvnic_reset_init(struct ibmvnic_adapter *, bool reset); 108 static void release_crq_queue(struct ibmvnic_adapter *); 109 static int __ibmvnic_set_mac(struct net_device *, u8 *); 110 static int init_crq_queue(struct ibmvnic_adapter *adapter); 111 static int send_query_phys_parms(struct ibmvnic_adapter *adapter); 112 static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, 113 struct ibmvnic_sub_crq_queue *tx_scrq); 114 static void free_long_term_buff(struct ibmvnic_adapter *adapter, 115 struct ibmvnic_long_term_buff *ltb); 116 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); 117 118 struct ibmvnic_stat { 119 char name[ETH_GSTRING_LEN]; 120 int offset; 121 }; 122 123 #define IBMVNIC_STAT_OFF(stat) (offsetof(struct ibmvnic_adapter, stats) + \ 124 offsetof(struct ibmvnic_statistics, stat)) 125 #define IBMVNIC_GET_STAT(a, off) (*((u64 *)(((unsigned long)(a)) + (off)))) 126 127 static const struct ibmvnic_stat ibmvnic_stats[] = { 128 {"rx_packets", IBMVNIC_STAT_OFF(rx_packets)}, 129 {"rx_bytes", IBMVNIC_STAT_OFF(rx_bytes)}, 130 {"tx_packets", IBMVNIC_STAT_OFF(tx_packets)}, 131 {"tx_bytes", IBMVNIC_STAT_OFF(tx_bytes)}, 132 {"ucast_tx_packets", IBMVNIC_STAT_OFF(ucast_tx_packets)}, 133 {"ucast_rx_packets", IBMVNIC_STAT_OFF(ucast_rx_packets)}, 134 {"mcast_tx_packets", IBMVNIC_STAT_OFF(mcast_tx_packets)}, 135 {"mcast_rx_packets", IBMVNIC_STAT_OFF(mcast_rx_packets)}, 136 {"bcast_tx_packets", IBMVNIC_STAT_OFF(bcast_tx_packets)}, 137 {"bcast_rx_packets", IBMVNIC_STAT_OFF(bcast_rx_packets)}, 138 {"align_errors", IBMVNIC_STAT_OFF(align_errors)}, 139 {"fcs_errors", IBMVNIC_STAT_OFF(fcs_errors)}, 140 {"single_collision_frames", IBMVNIC_STAT_OFF(single_collision_frames)}, 141 {"multi_collision_frames", IBMVNIC_STAT_OFF(multi_collision_frames)}, 142 {"sqe_test_errors", IBMVNIC_STAT_OFF(sqe_test_errors)}, 143 {"deferred_tx", IBMVNIC_STAT_OFF(deferred_tx)}, 144 {"late_collisions", IBMVNIC_STAT_OFF(late_collisions)}, 145 {"excess_collisions", IBMVNIC_STAT_OFF(excess_collisions)}, 146 {"internal_mac_tx_errors", IBMVNIC_STAT_OFF(internal_mac_tx_errors)}, 147 {"carrier_sense", IBMVNIC_STAT_OFF(carrier_sense)}, 148 {"too_long_frames", IBMVNIC_STAT_OFF(too_long_frames)}, 149 {"internal_mac_rx_errors", IBMVNIC_STAT_OFF(internal_mac_rx_errors)}, 150 }; 151 152 static int send_crq_init_complete(struct ibmvnic_adapter *adapter) 153 { 154 union ibmvnic_crq crq; 155 156 memset(&crq, 0, sizeof(crq)); 157 crq.generic.first = IBMVNIC_CRQ_INIT_CMD; 158 crq.generic.cmd = IBMVNIC_CRQ_INIT_COMPLETE; 159 160 return ibmvnic_send_crq(adapter, &crq); 161 } 162 163 static int send_version_xchg(struct ibmvnic_adapter *adapter) 164 { 165 union ibmvnic_crq crq; 166 167 memset(&crq, 0, sizeof(crq)); 168 crq.version_exchange.first = IBMVNIC_CRQ_CMD; 169 crq.version_exchange.cmd = VERSION_EXCHANGE; 170 crq.version_exchange.version = cpu_to_be16(ibmvnic_version); 171 172 return ibmvnic_send_crq(adapter, &crq); 173 } 174 175 static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter, 176 struct ibmvnic_sub_crq_queue *queue) 177 { 178 if (!(queue && queue->irq)) 179 return; 180 181 cpumask_clear(queue->affinity_mask); 182 183 if (irq_set_affinity_and_hint(queue->irq, NULL)) 184 netdev_warn(adapter->netdev, 185 "%s: Clear affinity failed, queue addr = %p, IRQ = %d\n", 186 __func__, queue, queue->irq); 187 } 188 189 static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter) 190 { 191 struct ibmvnic_sub_crq_queue **rxqs; 192 struct ibmvnic_sub_crq_queue **txqs; 193 int num_rxqs, num_txqs; 194 int rc, i; 195 196 rc = 0; 197 rxqs = adapter->rx_scrq; 198 txqs = adapter->tx_scrq; 199 num_txqs = adapter->num_active_tx_scrqs; 200 num_rxqs = adapter->num_active_rx_scrqs; 201 202 netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__); 203 if (txqs) { 204 for (i = 0; i < num_txqs; i++) 205 ibmvnic_clean_queue_affinity(adapter, txqs[i]); 206 } 207 if (rxqs) { 208 for (i = 0; i < num_rxqs; i++) 209 ibmvnic_clean_queue_affinity(adapter, rxqs[i]); 210 } 211 } 212 213 static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue, 214 unsigned int *cpu, int *stragglers, 215 int stride) 216 { 217 cpumask_var_t mask; 218 int i; 219 int rc = 0; 220 221 if (!(queue && queue->irq)) 222 return rc; 223 224 /* cpumask_var_t is either a pointer or array, allocation works here */ 225 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) 226 return -ENOMEM; 227 228 /* while we have extra cpu give one extra to this irq */ 229 if (*stragglers) { 230 stride++; 231 (*stragglers)--; 232 } 233 /* atomic write is safer than writing bit by bit directly */ 234 for (i = 0; i < stride; i++) { 235 cpumask_set_cpu(*cpu, mask); 236 *cpu = cpumask_next_wrap(*cpu, cpu_online_mask, 237 nr_cpu_ids, false); 238 } 239 /* set queue affinity mask */ 240 cpumask_copy(queue->affinity_mask, mask); 241 rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask); 242 free_cpumask_var(mask); 243 244 return rc; 245 } 246 247 /* assumes cpu read lock is held */ 248 static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter) 249 { 250 struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq; 251 struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq; 252 struct ibmvnic_sub_crq_queue *queue; 253 int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0; 254 int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0; 255 int total_queues, stride, stragglers, i; 256 unsigned int num_cpu, cpu; 257 bool is_rx_queue; 258 int rc = 0; 259 260 netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__); 261 if (!(adapter->rx_scrq && adapter->tx_scrq)) { 262 netdev_warn(adapter->netdev, 263 "%s: Set affinity failed, queues not allocated\n", 264 __func__); 265 return; 266 } 267 268 total_queues = num_rxqs + num_txqs; 269 num_cpu = num_online_cpus(); 270 /* number of cpu's assigned per irq */ 271 stride = max_t(int, num_cpu / total_queues, 1); 272 /* number of leftover cpu's */ 273 stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0; 274 /* next available cpu to assign irq to */ 275 cpu = cpumask_next(-1, cpu_online_mask); 276 277 for (i = 0; i < total_queues; i++) { 278 is_rx_queue = false; 279 /* balance core load by alternating rx and tx assignments 280 * ex: TX0 -> RX0 -> TX1 -> RX1 etc. 281 */ 282 if ((i % 2 == 1 && i_rxqs < num_rxqs) || i_txqs == num_txqs) { 283 queue = rxqs[i_rxqs++]; 284 is_rx_queue = true; 285 } else { 286 queue = txqs[i_txqs++]; 287 } 288 289 rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers, 290 stride); 291 if (rc) 292 goto out; 293 294 if (!queue || is_rx_queue) 295 continue; 296 297 rc = __netif_set_xps_queue(adapter->netdev, 298 cpumask_bits(queue->affinity_mask), 299 i, XPS_CPUS); 300 if (rc) 301 netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n", 302 __func__, i, rc); 303 } 304 305 out: 306 if (rc) { 307 netdev_warn(adapter->netdev, 308 "%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n", 309 __func__, queue, queue->irq, rc); 310 ibmvnic_clean_affinity(adapter); 311 } 312 } 313 314 static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node) 315 { 316 struct ibmvnic_adapter *adapter; 317 318 adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); 319 ibmvnic_set_affinity(adapter); 320 return 0; 321 } 322 323 static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node) 324 { 325 struct ibmvnic_adapter *adapter; 326 327 adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead); 328 ibmvnic_set_affinity(adapter); 329 return 0; 330 } 331 332 static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node) 333 { 334 struct ibmvnic_adapter *adapter; 335 336 adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node); 337 ibmvnic_clean_affinity(adapter); 338 return 0; 339 } 340 341 static enum cpuhp_state ibmvnic_online; 342 343 static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter) 344 { 345 int ret; 346 347 ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node); 348 if (ret) 349 return ret; 350 ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD, 351 &adapter->node_dead); 352 if (!ret) 353 return ret; 354 cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); 355 return ret; 356 } 357 358 static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter) 359 { 360 cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node); 361 cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD, 362 &adapter->node_dead); 363 } 364 365 static long h_reg_sub_crq(unsigned long unit_address, unsigned long token, 366 unsigned long length, unsigned long *number, 367 unsigned long *irq) 368 { 369 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 370 long rc; 371 372 rc = plpar_hcall(H_REG_SUB_CRQ, retbuf, unit_address, token, length); 373 *number = retbuf[0]; 374 *irq = retbuf[1]; 375 376 return rc; 377 } 378 379 /** 380 * ibmvnic_wait_for_completion - Check device state and wait for completion 381 * @adapter: private device data 382 * @comp_done: completion structure to wait for 383 * @timeout: time to wait in milliseconds 384 * 385 * Wait for a completion signal or until the timeout limit is reached 386 * while checking that the device is still active. 387 */ 388 static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter, 389 struct completion *comp_done, 390 unsigned long timeout) 391 { 392 struct net_device *netdev; 393 unsigned long div_timeout; 394 u8 retry; 395 396 netdev = adapter->netdev; 397 retry = 5; 398 div_timeout = msecs_to_jiffies(timeout / retry); 399 while (true) { 400 if (!adapter->crq.active) { 401 netdev_err(netdev, "Device down!\n"); 402 return -ENODEV; 403 } 404 if (!retry--) 405 break; 406 if (wait_for_completion_timeout(comp_done, div_timeout)) 407 return 0; 408 } 409 netdev_err(netdev, "Operation timed out.\n"); 410 return -ETIMEDOUT; 411 } 412 413 /** 414 * reuse_ltb() - Check if a long term buffer can be reused 415 * @ltb: The long term buffer to be checked 416 * @size: The size of the long term buffer. 417 * 418 * An LTB can be reused unless its size has changed. 419 * 420 * Return: Return true if the LTB can be reused, false otherwise. 421 */ 422 static bool reuse_ltb(struct ibmvnic_long_term_buff *ltb, int size) 423 { 424 return (ltb->buff && ltb->size == size); 425 } 426 427 /** 428 * alloc_long_term_buff() - Allocate a long term buffer (LTB) 429 * 430 * @adapter: ibmvnic adapter associated to the LTB 431 * @ltb: container object for the LTB 432 * @size: size of the LTB 433 * 434 * Allocate an LTB of the specified size and notify VIOS. 435 * 436 * If the given @ltb already has the correct size, reuse it. Otherwise if 437 * its non-NULL, free it. Then allocate a new one of the correct size. 438 * Notify the VIOS either way since we may now be working with a new VIOS. 439 * 440 * Allocating larger chunks of memory during resets, specially LPM or under 441 * low memory situations can cause resets to fail/timeout and for LPAR to 442 * lose connectivity. So hold onto the LTB even if we fail to communicate 443 * with the VIOS and reuse it on next open. Free LTB when adapter is closed. 444 * 445 * Return: 0 if we were able to allocate the LTB and notify the VIOS and 446 * a negative value otherwise. 447 */ 448 static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, 449 struct ibmvnic_long_term_buff *ltb, int size) 450 { 451 struct device *dev = &adapter->vdev->dev; 452 u64 prev = 0; 453 int rc; 454 455 if (!reuse_ltb(ltb, size)) { 456 dev_dbg(dev, 457 "LTB size changed from 0x%llx to 0x%x, reallocating\n", 458 ltb->size, size); 459 prev = ltb->size; 460 free_long_term_buff(adapter, ltb); 461 } 462 463 if (ltb->buff) { 464 dev_dbg(dev, "Reusing LTB [map %d, size 0x%llx]\n", 465 ltb->map_id, ltb->size); 466 } else { 467 ltb->buff = dma_alloc_coherent(dev, size, <b->addr, 468 GFP_KERNEL); 469 if (!ltb->buff) { 470 dev_err(dev, "Couldn't alloc long term buffer\n"); 471 return -ENOMEM; 472 } 473 ltb->size = size; 474 475 ltb->map_id = find_first_zero_bit(adapter->map_ids, 476 MAX_MAP_ID); 477 bitmap_set(adapter->map_ids, ltb->map_id, 1); 478 479 dev_dbg(dev, 480 "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", 481 ltb->map_id, ltb->size, prev); 482 } 483 484 /* Ensure ltb is zeroed - specially when reusing it. */ 485 memset(ltb->buff, 0, ltb->size); 486 487 mutex_lock(&adapter->fw_lock); 488 adapter->fw_done_rc = 0; 489 reinit_completion(&adapter->fw_done); 490 491 rc = send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); 492 if (rc) { 493 dev_err(dev, "send_request_map failed, rc = %d\n", rc); 494 goto out; 495 } 496 497 rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); 498 if (rc) { 499 dev_err(dev, "LTB map request aborted or timed out, rc = %d\n", 500 rc); 501 goto out; 502 } 503 504 if (adapter->fw_done_rc) { 505 dev_err(dev, "Couldn't map LTB, rc = %d\n", 506 adapter->fw_done_rc); 507 rc = -EIO; 508 goto out; 509 } 510 rc = 0; 511 out: 512 /* don't free LTB on communication error - see function header */ 513 mutex_unlock(&adapter->fw_lock); 514 return rc; 515 } 516 517 static void free_long_term_buff(struct ibmvnic_adapter *adapter, 518 struct ibmvnic_long_term_buff *ltb) 519 { 520 struct device *dev = &adapter->vdev->dev; 521 522 if (!ltb->buff) 523 return; 524 525 /* VIOS automatically unmaps the long term buffer at remote 526 * end for the following resets: 527 * FAILOVER, MOBILITY, TIMEOUT. 528 */ 529 if (adapter->reset_reason != VNIC_RESET_FAILOVER && 530 adapter->reset_reason != VNIC_RESET_MOBILITY && 531 adapter->reset_reason != VNIC_RESET_TIMEOUT) 532 send_request_unmap(adapter, ltb->map_id); 533 534 dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); 535 536 ltb->buff = NULL; 537 /* mark this map_id free */ 538 bitmap_clear(adapter->map_ids, ltb->map_id, 1); 539 ltb->map_id = 0; 540 } 541 542 /** 543 * free_ltb_set - free the given set of long term buffers (LTBS) 544 * @adapter: The ibmvnic adapter containing this ltb set 545 * @ltb_set: The ltb_set to be freed 546 * 547 * Free the set of LTBs in the given set. 548 */ 549 550 static void free_ltb_set(struct ibmvnic_adapter *adapter, 551 struct ibmvnic_ltb_set *ltb_set) 552 { 553 int i; 554 555 for (i = 0; i < ltb_set->num_ltbs; i++) 556 free_long_term_buff(adapter, <b_set->ltbs[i]); 557 558 kfree(ltb_set->ltbs); 559 ltb_set->ltbs = NULL; 560 ltb_set->num_ltbs = 0; 561 } 562 563 /** 564 * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) 565 * 566 * @adapter: ibmvnic adapter associated to the LTB 567 * @ltb_set: container object for the set of LTBs 568 * @num_buffs: Number of buffers in the LTB 569 * @buff_size: Size of each buffer in the LTB 570 * 571 * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size 572 * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the 573 * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. 574 * If new set needs more than in old set, allocate the remaining ones. 575 * Try and reuse as many LTBs as possible and avoid reallocation. 576 * 577 * Any changes to this allocation strategy must be reflected in 578 * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). 579 */ 580 static int alloc_ltb_set(struct ibmvnic_adapter *adapter, 581 struct ibmvnic_ltb_set *ltb_set, int num_buffs, 582 int buff_size) 583 { 584 struct device *dev = &adapter->vdev->dev; 585 struct ibmvnic_ltb_set old_set; 586 struct ibmvnic_ltb_set new_set; 587 int rem_size; 588 int tot_size; /* size of all ltbs */ 589 int ltb_size; /* size of one ltb */ 590 int nltbs; 591 int rc; 592 int n; 593 int i; 594 595 dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, 596 buff_size); 597 598 ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); 599 tot_size = num_buffs * buff_size; 600 601 if (ltb_size > tot_size) 602 ltb_size = tot_size; 603 604 nltbs = tot_size / ltb_size; 605 if (tot_size % ltb_size) 606 nltbs++; 607 608 old_set = *ltb_set; 609 610 if (old_set.num_ltbs == nltbs) { 611 new_set = old_set; 612 } else { 613 int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); 614 615 new_set.ltbs = kzalloc(tmp, GFP_KERNEL); 616 if (!new_set.ltbs) 617 return -ENOMEM; 618 619 new_set.num_ltbs = nltbs; 620 621 /* Free any excess ltbs in old set */ 622 for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) 623 free_long_term_buff(adapter, &old_set.ltbs[i]); 624 625 /* Copy remaining ltbs to new set. All LTBs except the 626 * last one are of the same size. alloc_long_term_buff() 627 * will realloc if the size changes. 628 */ 629 n = min(old_set.num_ltbs, new_set.num_ltbs); 630 for (i = 0; i < n; i++) 631 new_set.ltbs[i] = old_set.ltbs[i]; 632 633 /* Any additional ltbs in new set will have NULL ltbs for 634 * now and will be allocated in alloc_long_term_buff(). 635 */ 636 637 /* We no longer need the old_set so free it. Note that we 638 * may have reused some ltbs from old set and freed excess 639 * ltbs above. So we only need to free the container now 640 * not the LTBs themselves. (i.e. dont free_ltb_set()!) 641 */ 642 kfree(old_set.ltbs); 643 old_set.ltbs = NULL; 644 old_set.num_ltbs = 0; 645 646 /* Install the new set. If allocations fail below, we will 647 * retry later and know what size LTBs we need. 648 */ 649 *ltb_set = new_set; 650 } 651 652 i = 0; 653 rem_size = tot_size; 654 while (rem_size) { 655 if (ltb_size > rem_size) 656 ltb_size = rem_size; 657 658 rem_size -= ltb_size; 659 660 rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); 661 if (rc) 662 goto out; 663 i++; 664 } 665 666 WARN_ON(i != new_set.num_ltbs); 667 668 return 0; 669 out: 670 /* We may have allocated one/more LTBs before failing and we 671 * want to try and reuse on next reset. So don't free ltb set. 672 */ 673 return rc; 674 } 675 676 /** 677 * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. 678 * @rxpool: The receive buffer pool containing buffer 679 * @bufidx: Index of buffer in rxpool 680 * @ltbp: (Output) pointer to the long term buffer containing the buffer 681 * @offset: (Output) offset of buffer in the LTB from @ltbp 682 * 683 * Map the given buffer identified by [rxpool, bufidx] to an LTB in the 684 * pool and its corresponding offset. Assume for now that each LTB is of 685 * different size but could possibly be optimized based on the allocation 686 * strategy in alloc_ltb_set(). 687 */ 688 static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, 689 unsigned int bufidx, 690 struct ibmvnic_long_term_buff **ltbp, 691 unsigned int *offset) 692 { 693 struct ibmvnic_long_term_buff *ltb; 694 int nbufs; /* # of buffers in one ltb */ 695 int i; 696 697 WARN_ON(bufidx >= rxpool->size); 698 699 for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { 700 ltb = &rxpool->ltb_set.ltbs[i]; 701 nbufs = ltb->size / rxpool->buff_size; 702 if (bufidx < nbufs) 703 break; 704 bufidx -= nbufs; 705 } 706 707 *ltbp = ltb; 708 *offset = bufidx * rxpool->buff_size; 709 } 710 711 /** 712 * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. 713 * @txpool: The transmit buffer pool containing buffer 714 * @bufidx: Index of buffer in txpool 715 * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer 716 * @offset: (Output) offset of buffer in the LTB from @ltbp 717 * 718 * Map the given buffer identified by [txpool, bufidx] to an LTB in the 719 * pool and its corresponding offset. 720 */ 721 static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, 722 unsigned int bufidx, 723 struct ibmvnic_long_term_buff **ltbp, 724 unsigned int *offset) 725 { 726 struct ibmvnic_long_term_buff *ltb; 727 int nbufs; /* # of buffers in one ltb */ 728 int i; 729 730 WARN_ON_ONCE(bufidx >= txpool->num_buffers); 731 732 for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { 733 ltb = &txpool->ltb_set.ltbs[i]; 734 nbufs = ltb->size / txpool->buf_size; 735 if (bufidx < nbufs) 736 break; 737 bufidx -= nbufs; 738 } 739 740 *ltbp = ltb; 741 *offset = bufidx * txpool->buf_size; 742 } 743 744 static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) 745 { 746 int i; 747 748 for (i = 0; i < adapter->num_active_rx_pools; i++) 749 adapter->rx_pool[i].active = 0; 750 } 751 752 static void replenish_rx_pool(struct ibmvnic_adapter *adapter, 753 struct ibmvnic_rx_pool *pool) 754 { 755 int count = pool->size - atomic_read(&pool->available); 756 u64 handle = adapter->rx_scrq[pool->index]->handle; 757 struct device *dev = &adapter->vdev->dev; 758 struct ibmvnic_ind_xmit_queue *ind_bufp; 759 struct ibmvnic_sub_crq_queue *rx_scrq; 760 struct ibmvnic_long_term_buff *ltb; 761 union sub_crq *sub_crq; 762 int buffers_added = 0; 763 unsigned long lpar_rc; 764 struct sk_buff *skb; 765 unsigned int offset; 766 dma_addr_t dma_addr; 767 unsigned char *dst; 768 int shift = 0; 769 int bufidx; 770 int i; 771 772 if (!pool->active) 773 return; 774 775 rx_scrq = adapter->rx_scrq[pool->index]; 776 ind_bufp = &rx_scrq->ind_buf; 777 778 /* netdev_skb_alloc() could have failed after we saved a few skbs 779 * in the indir_buf and we would not have sent them to VIOS yet. 780 * To account for them, start the loop at ind_bufp->index rather 781 * than 0. If we pushed all the skbs to VIOS, ind_bufp->index will 782 * be 0. 783 */ 784 for (i = ind_bufp->index; i < count; ++i) { 785 bufidx = pool->free_map[pool->next_free]; 786 787 /* We maybe reusing the skb from earlier resets. Allocate 788 * only if necessary. But since the LTB may have changed 789 * during reset (see init_rx_pools()), update LTB below 790 * even if reusing skb. 791 */ 792 skb = pool->rx_buff[bufidx].skb; 793 if (!skb) { 794 skb = netdev_alloc_skb(adapter->netdev, 795 pool->buff_size); 796 if (!skb) { 797 dev_err(dev, "Couldn't replenish rx buff\n"); 798 adapter->replenish_no_mem++; 799 break; 800 } 801 } 802 803 pool->free_map[pool->next_free] = IBMVNIC_INVALID_MAP; 804 pool->next_free = (pool->next_free + 1) % pool->size; 805 806 /* Copy the skb to the long term mapped DMA buffer */ 807 map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); 808 dst = ltb->buff + offset; 809 memset(dst, 0, pool->buff_size); 810 dma_addr = ltb->addr + offset; 811 812 /* add the skb to an rx_buff in the pool */ 813 pool->rx_buff[bufidx].data = dst; 814 pool->rx_buff[bufidx].dma = dma_addr; 815 pool->rx_buff[bufidx].skb = skb; 816 pool->rx_buff[bufidx].pool_index = pool->index; 817 pool->rx_buff[bufidx].size = pool->buff_size; 818 819 /* queue the rx_buff for the next send_subcrq_indirect */ 820 sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; 821 memset(sub_crq, 0, sizeof(*sub_crq)); 822 sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; 823 sub_crq->rx_add.correlator = 824 cpu_to_be64((u64)&pool->rx_buff[bufidx]); 825 sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); 826 sub_crq->rx_add.map_id = ltb->map_id; 827 828 /* The length field of the sCRQ is defined to be 24 bits so the 829 * buffer size needs to be left shifted by a byte before it is 830 * converted to big endian to prevent the last byte from being 831 * truncated. 832 */ 833 #ifdef __LITTLE_ENDIAN__ 834 shift = 8; 835 #endif 836 sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); 837 838 /* if send_subcrq_indirect queue is full, flush to VIOS */ 839 if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || 840 i == count - 1) { 841 lpar_rc = 842 send_subcrq_indirect(adapter, handle, 843 (u64)ind_bufp->indir_dma, 844 (u64)ind_bufp->index); 845 if (lpar_rc != H_SUCCESS) 846 goto failure; 847 buffers_added += ind_bufp->index; 848 adapter->replenish_add_buff_success += ind_bufp->index; 849 ind_bufp->index = 0; 850 } 851 } 852 atomic_add(buffers_added, &pool->available); 853 return; 854 855 failure: 856 if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) 857 dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); 858 for (i = ind_bufp->index - 1; i >= 0; --i) { 859 struct ibmvnic_rx_buff *rx_buff; 860 861 pool->next_free = pool->next_free == 0 ? 862 pool->size - 1 : pool->next_free - 1; 863 sub_crq = &ind_bufp->indir_arr[i]; 864 rx_buff = (struct ibmvnic_rx_buff *) 865 be64_to_cpu(sub_crq->rx_add.correlator); 866 bufidx = (int)(rx_buff - pool->rx_buff); 867 pool->free_map[pool->next_free] = bufidx; 868 dev_kfree_skb_any(pool->rx_buff[bufidx].skb); 869 pool->rx_buff[bufidx].skb = NULL; 870 } 871 adapter->replenish_add_buff_failure += ind_bufp->index; 872 atomic_add(buffers_added, &pool->available); 873 ind_bufp->index = 0; 874 if (lpar_rc == H_CLOSED || adapter->failover_pending) { 875 /* Disable buffer pool replenishment and report carrier off if 876 * queue is closed or pending failover. 877 * Firmware guarantees that a signal will be sent to the 878 * driver, triggering a reset. 879 */ 880 deactivate_rx_pools(adapter); 881 netif_carrier_off(adapter->netdev); 882 } 883 } 884 885 static void replenish_pools(struct ibmvnic_adapter *adapter) 886 { 887 int i; 888 889 adapter->replenish_task_cycles++; 890 for (i = 0; i < adapter->num_active_rx_pools; i++) { 891 if (adapter->rx_pool[i].active) 892 replenish_rx_pool(adapter, &adapter->rx_pool[i]); 893 } 894 895 netdev_dbg(adapter->netdev, "Replenished %d pools\n", i); 896 } 897 898 static void release_stats_buffers(struct ibmvnic_adapter *adapter) 899 { 900 kfree(adapter->tx_stats_buffers); 901 kfree(adapter->rx_stats_buffers); 902 adapter->tx_stats_buffers = NULL; 903 adapter->rx_stats_buffers = NULL; 904 } 905 906 static int init_stats_buffers(struct ibmvnic_adapter *adapter) 907 { 908 adapter->tx_stats_buffers = 909 kcalloc(IBMVNIC_MAX_QUEUES, 910 sizeof(struct ibmvnic_tx_queue_stats), 911 GFP_KERNEL); 912 if (!adapter->tx_stats_buffers) 913 return -ENOMEM; 914 915 adapter->rx_stats_buffers = 916 kcalloc(IBMVNIC_MAX_QUEUES, 917 sizeof(struct ibmvnic_rx_queue_stats), 918 GFP_KERNEL); 919 if (!adapter->rx_stats_buffers) 920 return -ENOMEM; 921 922 return 0; 923 } 924 925 static void release_stats_token(struct ibmvnic_adapter *adapter) 926 { 927 struct device *dev = &adapter->vdev->dev; 928 929 if (!adapter->stats_token) 930 return; 931 932 dma_unmap_single(dev, adapter->stats_token, 933 sizeof(struct ibmvnic_statistics), 934 DMA_FROM_DEVICE); 935 adapter->stats_token = 0; 936 } 937 938 static int init_stats_token(struct ibmvnic_adapter *adapter) 939 { 940 struct device *dev = &adapter->vdev->dev; 941 dma_addr_t stok; 942 int rc; 943 944 stok = dma_map_single(dev, &adapter->stats, 945 sizeof(struct ibmvnic_statistics), 946 DMA_FROM_DEVICE); 947 rc = dma_mapping_error(dev, stok); 948 if (rc) { 949 dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); 950 return rc; 951 } 952 953 adapter->stats_token = stok; 954 netdev_dbg(adapter->netdev, "Stats token initialized (%llx)\n", stok); 955 return 0; 956 } 957 958 /** 959 * release_rx_pools() - Release any rx pools attached to @adapter. 960 * @adapter: ibmvnic adapter 961 * 962 * Safe to call this multiple times - even if no pools are attached. 963 */ 964 static void release_rx_pools(struct ibmvnic_adapter *adapter) 965 { 966 struct ibmvnic_rx_pool *rx_pool; 967 int i, j; 968 969 if (!adapter->rx_pool) 970 return; 971 972 for (i = 0; i < adapter->num_active_rx_pools; i++) { 973 rx_pool = &adapter->rx_pool[i]; 974 975 netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i); 976 977 kfree(rx_pool->free_map); 978 979 free_ltb_set(adapter, &rx_pool->ltb_set); 980 981 if (!rx_pool->rx_buff) 982 continue; 983 984 for (j = 0; j < rx_pool->size; j++) { 985 if (rx_pool->rx_buff[j].skb) { 986 dev_kfree_skb_any(rx_pool->rx_buff[j].skb); 987 rx_pool->rx_buff[j].skb = NULL; 988 } 989 } 990 991 kfree(rx_pool->rx_buff); 992 } 993 994 kfree(adapter->rx_pool); 995 adapter->rx_pool = NULL; 996 adapter->num_active_rx_pools = 0; 997 adapter->prev_rx_pool_size = 0; 998 } 999 1000 /** 1001 * reuse_rx_pools() - Check if the existing rx pools can be reused. 1002 * @adapter: ibmvnic adapter 1003 * 1004 * Check if the existing rx pools in the adapter can be reused. The 1005 * pools can be reused if the pool parameters (number of pools, 1006 * number of buffers in the pool and size of each buffer) have not 1007 * changed. 1008 * 1009 * NOTE: This assumes that all pools have the same number of buffers 1010 * which is the case currently. If that changes, we must fix this. 1011 * 1012 * Return: true if the rx pools can be reused, false otherwise. 1013 */ 1014 static bool reuse_rx_pools(struct ibmvnic_adapter *adapter) 1015 { 1016 u64 old_num_pools, new_num_pools; 1017 u64 old_pool_size, new_pool_size; 1018 u64 old_buff_size, new_buff_size; 1019 1020 if (!adapter->rx_pool) 1021 return false; 1022 1023 old_num_pools = adapter->num_active_rx_pools; 1024 new_num_pools = adapter->req_rx_queues; 1025 1026 old_pool_size = adapter->prev_rx_pool_size; 1027 new_pool_size = adapter->req_rx_add_entries_per_subcrq; 1028 1029 old_buff_size = adapter->prev_rx_buf_sz; 1030 new_buff_size = adapter->cur_rx_buf_sz; 1031 1032 if (old_buff_size != new_buff_size || 1033 old_num_pools != new_num_pools || 1034 old_pool_size != new_pool_size) 1035 return false; 1036 1037 return true; 1038 } 1039 1040 /** 1041 * init_rx_pools(): Initialize the set of receiver pools in the adapter. 1042 * @netdev: net device associated with the vnic interface 1043 * 1044 * Initialize the set of receiver pools in the ibmvnic adapter associated 1045 * with the net_device @netdev. If possible, reuse the existing rx pools. 1046 * Otherwise free any existing pools and allocate a new set of pools 1047 * before initializing them. 1048 * 1049 * Return: 0 on success and negative value on error. 1050 */ 1051 static int init_rx_pools(struct net_device *netdev) 1052 { 1053 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1054 struct device *dev = &adapter->vdev->dev; 1055 struct ibmvnic_rx_pool *rx_pool; 1056 u64 num_pools; 1057 u64 pool_size; /* # of buffers in one pool */ 1058 u64 buff_size; 1059 int i, j, rc; 1060 1061 pool_size = adapter->req_rx_add_entries_per_subcrq; 1062 num_pools = adapter->req_rx_queues; 1063 buff_size = adapter->cur_rx_buf_sz; 1064 1065 if (reuse_rx_pools(adapter)) { 1066 dev_dbg(dev, "Reusing rx pools\n"); 1067 goto update_ltb; 1068 } 1069 1070 /* Allocate/populate the pools. */ 1071 release_rx_pools(adapter); 1072 1073 adapter->rx_pool = kcalloc(num_pools, 1074 sizeof(struct ibmvnic_rx_pool), 1075 GFP_KERNEL); 1076 if (!adapter->rx_pool) { 1077 dev_err(dev, "Failed to allocate rx pools\n"); 1078 return -ENOMEM; 1079 } 1080 1081 /* Set num_active_rx_pools early. If we fail below after partial 1082 * allocation, release_rx_pools() will know how many to look for. 1083 */ 1084 adapter->num_active_rx_pools = num_pools; 1085 1086 for (i = 0; i < num_pools; i++) { 1087 rx_pool = &adapter->rx_pool[i]; 1088 1089 netdev_dbg(adapter->netdev, 1090 "Initializing rx_pool[%d], %lld buffs, %lld bytes each\n", 1091 i, pool_size, buff_size); 1092 1093 rx_pool->size = pool_size; 1094 rx_pool->index = i; 1095 rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES); 1096 1097 rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int), 1098 GFP_KERNEL); 1099 if (!rx_pool->free_map) { 1100 dev_err(dev, "Couldn't alloc free_map %d\n", i); 1101 rc = -ENOMEM; 1102 goto out_release; 1103 } 1104 1105 rx_pool->rx_buff = kcalloc(rx_pool->size, 1106 sizeof(struct ibmvnic_rx_buff), 1107 GFP_KERNEL); 1108 if (!rx_pool->rx_buff) { 1109 dev_err(dev, "Couldn't alloc rx buffers\n"); 1110 rc = -ENOMEM; 1111 goto out_release; 1112 } 1113 } 1114 1115 adapter->prev_rx_pool_size = pool_size; 1116 adapter->prev_rx_buf_sz = adapter->cur_rx_buf_sz; 1117 1118 update_ltb: 1119 for (i = 0; i < num_pools; i++) { 1120 rx_pool = &adapter->rx_pool[i]; 1121 dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", 1122 i, rx_pool->size, rx_pool->buff_size); 1123 1124 rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, 1125 rx_pool->size, rx_pool->buff_size); 1126 if (rc) 1127 goto out; 1128 1129 for (j = 0; j < rx_pool->size; ++j) { 1130 struct ibmvnic_rx_buff *rx_buff; 1131 1132 rx_pool->free_map[j] = j; 1133 1134 /* NOTE: Don't clear rx_buff->skb here - will leak 1135 * memory! replenish_rx_pool() will reuse skbs or 1136 * allocate as necessary. 1137 */ 1138 rx_buff = &rx_pool->rx_buff[j]; 1139 rx_buff->dma = 0; 1140 rx_buff->data = 0; 1141 rx_buff->size = 0; 1142 rx_buff->pool_index = 0; 1143 } 1144 1145 /* Mark pool "empty" so replenish_rx_pools() will 1146 * update the LTB info for each buffer 1147 */ 1148 atomic_set(&rx_pool->available, 0); 1149 rx_pool->next_alloc = 0; 1150 rx_pool->next_free = 0; 1151 /* replenish_rx_pool() may have called deactivate_rx_pools() 1152 * on failover. Ensure pool is active now. 1153 */ 1154 rx_pool->active = 1; 1155 } 1156 return 0; 1157 out_release: 1158 release_rx_pools(adapter); 1159 out: 1160 /* We failed to allocate one or more LTBs or map them on the VIOS. 1161 * Hold onto the pools and any LTBs that we did allocate/map. 1162 */ 1163 return rc; 1164 } 1165 1166 static void release_vpd_data(struct ibmvnic_adapter *adapter) 1167 { 1168 if (!adapter->vpd) 1169 return; 1170 1171 kfree(adapter->vpd->buff); 1172 kfree(adapter->vpd); 1173 1174 adapter->vpd = NULL; 1175 } 1176 1177 static void release_one_tx_pool(struct ibmvnic_adapter *adapter, 1178 struct ibmvnic_tx_pool *tx_pool) 1179 { 1180 kfree(tx_pool->tx_buff); 1181 kfree(tx_pool->free_map); 1182 free_ltb_set(adapter, &tx_pool->ltb_set); 1183 } 1184 1185 /** 1186 * release_tx_pools() - Release any tx pools attached to @adapter. 1187 * @adapter: ibmvnic adapter 1188 * 1189 * Safe to call this multiple times - even if no pools are attached. 1190 */ 1191 static void release_tx_pools(struct ibmvnic_adapter *adapter) 1192 { 1193 int i; 1194 1195 /* init_tx_pools() ensures that ->tx_pool and ->tso_pool are 1196 * both NULL or both non-NULL. So we only need to check one. 1197 */ 1198 if (!adapter->tx_pool) 1199 return; 1200 1201 for (i = 0; i < adapter->num_active_tx_pools; i++) { 1202 release_one_tx_pool(adapter, &adapter->tx_pool[i]); 1203 release_one_tx_pool(adapter, &adapter->tso_pool[i]); 1204 } 1205 1206 kfree(adapter->tx_pool); 1207 adapter->tx_pool = NULL; 1208 kfree(adapter->tso_pool); 1209 adapter->tso_pool = NULL; 1210 adapter->num_active_tx_pools = 0; 1211 adapter->prev_tx_pool_size = 0; 1212 } 1213 1214 static int init_one_tx_pool(struct net_device *netdev, 1215 struct ibmvnic_tx_pool *tx_pool, 1216 int pool_size, int buf_size) 1217 { 1218 int i; 1219 1220 tx_pool->tx_buff = kcalloc(pool_size, 1221 sizeof(struct ibmvnic_tx_buff), 1222 GFP_KERNEL); 1223 if (!tx_pool->tx_buff) 1224 return -ENOMEM; 1225 1226 tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); 1227 if (!tx_pool->free_map) { 1228 kfree(tx_pool->tx_buff); 1229 tx_pool->tx_buff = NULL; 1230 return -ENOMEM; 1231 } 1232 1233 for (i = 0; i < pool_size; i++) 1234 tx_pool->free_map[i] = i; 1235 1236 tx_pool->consumer_index = 0; 1237 tx_pool->producer_index = 0; 1238 tx_pool->num_buffers = pool_size; 1239 tx_pool->buf_size = buf_size; 1240 1241 return 0; 1242 } 1243 1244 /** 1245 * reuse_tx_pools() - Check if the existing tx pools can be reused. 1246 * @adapter: ibmvnic adapter 1247 * 1248 * Check if the existing tx pools in the adapter can be reused. The 1249 * pools can be reused if the pool parameters (number of pools, 1250 * number of buffers in the pool and mtu) have not changed. 1251 * 1252 * NOTE: This assumes that all pools have the same number of buffers 1253 * which is the case currently. If that changes, we must fix this. 1254 * 1255 * Return: true if the tx pools can be reused, false otherwise. 1256 */ 1257 static bool reuse_tx_pools(struct ibmvnic_adapter *adapter) 1258 { 1259 u64 old_num_pools, new_num_pools; 1260 u64 old_pool_size, new_pool_size; 1261 u64 old_mtu, new_mtu; 1262 1263 if (!adapter->tx_pool) 1264 return false; 1265 1266 old_num_pools = adapter->num_active_tx_pools; 1267 new_num_pools = adapter->num_active_tx_scrqs; 1268 old_pool_size = adapter->prev_tx_pool_size; 1269 new_pool_size = adapter->req_tx_entries_per_subcrq; 1270 old_mtu = adapter->prev_mtu; 1271 new_mtu = adapter->req_mtu; 1272 1273 if (old_mtu != new_mtu || 1274 old_num_pools != new_num_pools || 1275 old_pool_size != new_pool_size) 1276 return false; 1277 1278 return true; 1279 } 1280 1281 /** 1282 * init_tx_pools(): Initialize the set of transmit pools in the adapter. 1283 * @netdev: net device associated with the vnic interface 1284 * 1285 * Initialize the set of transmit pools in the ibmvnic adapter associated 1286 * with the net_device @netdev. If possible, reuse the existing tx pools. 1287 * Otherwise free any existing pools and allocate a new set of pools 1288 * before initializing them. 1289 * 1290 * Return: 0 on success and negative value on error. 1291 */ 1292 static int init_tx_pools(struct net_device *netdev) 1293 { 1294 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1295 struct device *dev = &adapter->vdev->dev; 1296 int num_pools; 1297 u64 pool_size; /* # of buffers in pool */ 1298 u64 buff_size; 1299 int i, j, rc; 1300 1301 num_pools = adapter->req_tx_queues; 1302 1303 /* We must notify the VIOS about the LTB on all resets - but we only 1304 * need to alloc/populate pools if either the number of buffers or 1305 * size of each buffer in the pool has changed. 1306 */ 1307 if (reuse_tx_pools(adapter)) { 1308 netdev_dbg(netdev, "Reusing tx pools\n"); 1309 goto update_ltb; 1310 } 1311 1312 /* Allocate/populate the pools. */ 1313 release_tx_pools(adapter); 1314 1315 pool_size = adapter->req_tx_entries_per_subcrq; 1316 num_pools = adapter->num_active_tx_scrqs; 1317 1318 adapter->tx_pool = kcalloc(num_pools, 1319 sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); 1320 if (!adapter->tx_pool) 1321 return -ENOMEM; 1322 1323 adapter->tso_pool = kcalloc(num_pools, 1324 sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); 1325 /* To simplify release_tx_pools() ensure that ->tx_pool and 1326 * ->tso_pool are either both NULL or both non-NULL. 1327 */ 1328 if (!adapter->tso_pool) { 1329 kfree(adapter->tx_pool); 1330 adapter->tx_pool = NULL; 1331 return -ENOMEM; 1332 } 1333 1334 /* Set num_active_tx_pools early. If we fail below after partial 1335 * allocation, release_tx_pools() will know how many to look for. 1336 */ 1337 adapter->num_active_tx_pools = num_pools; 1338 1339 buff_size = adapter->req_mtu + VLAN_HLEN; 1340 buff_size = ALIGN(buff_size, L1_CACHE_BYTES); 1341 1342 for (i = 0; i < num_pools; i++) { 1343 dev_dbg(dev, "Init tx pool %d [%llu, %llu]\n", 1344 i, adapter->req_tx_entries_per_subcrq, buff_size); 1345 1346 rc = init_one_tx_pool(netdev, &adapter->tx_pool[i], 1347 pool_size, buff_size); 1348 if (rc) 1349 goto out_release; 1350 1351 rc = init_one_tx_pool(netdev, &adapter->tso_pool[i], 1352 IBMVNIC_TSO_BUFS, 1353 IBMVNIC_TSO_BUF_SZ); 1354 if (rc) 1355 goto out_release; 1356 } 1357 1358 adapter->prev_tx_pool_size = pool_size; 1359 adapter->prev_mtu = adapter->req_mtu; 1360 1361 update_ltb: 1362 /* NOTE: All tx_pools have the same number of buffers (which is 1363 * same as pool_size). All tso_pools have IBMVNIC_TSO_BUFS 1364 * buffers (see calls init_one_tx_pool() for these). 1365 * For consistency, we use tx_pool->num_buffers and 1366 * tso_pool->num_buffers below. 1367 */ 1368 rc = -1; 1369 for (i = 0; i < num_pools; i++) { 1370 struct ibmvnic_tx_pool *tso_pool; 1371 struct ibmvnic_tx_pool *tx_pool; 1372 1373 tx_pool = &adapter->tx_pool[i]; 1374 1375 dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", 1376 i, tx_pool->num_buffers, tx_pool->buf_size); 1377 1378 rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, 1379 tx_pool->num_buffers, tx_pool->buf_size); 1380 if (rc) 1381 goto out; 1382 1383 tx_pool->consumer_index = 0; 1384 tx_pool->producer_index = 0; 1385 1386 for (j = 0; j < tx_pool->num_buffers; j++) 1387 tx_pool->free_map[j] = j; 1388 1389 tso_pool = &adapter->tso_pool[i]; 1390 1391 dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", 1392 i, tso_pool->num_buffers, tso_pool->buf_size); 1393 1394 rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, 1395 tso_pool->num_buffers, tso_pool->buf_size); 1396 if (rc) 1397 goto out; 1398 1399 tso_pool->consumer_index = 0; 1400 tso_pool->producer_index = 0; 1401 1402 for (j = 0; j < tso_pool->num_buffers; j++) 1403 tso_pool->free_map[j] = j; 1404 } 1405 1406 return 0; 1407 out_release: 1408 release_tx_pools(adapter); 1409 out: 1410 /* We failed to allocate one or more LTBs or map them on the VIOS. 1411 * Hold onto the pools and any LTBs that we did allocate/map. 1412 */ 1413 return rc; 1414 } 1415 1416 static void ibmvnic_napi_enable(struct ibmvnic_adapter *adapter) 1417 { 1418 int i; 1419 1420 if (adapter->napi_enabled) 1421 return; 1422 1423 for (i = 0; i < adapter->req_rx_queues; i++) 1424 napi_enable(&adapter->napi[i]); 1425 1426 adapter->napi_enabled = true; 1427 } 1428 1429 static void ibmvnic_napi_disable(struct ibmvnic_adapter *adapter) 1430 { 1431 int i; 1432 1433 if (!adapter->napi_enabled) 1434 return; 1435 1436 for (i = 0; i < adapter->req_rx_queues; i++) { 1437 netdev_dbg(adapter->netdev, "Disabling napi[%d]\n", i); 1438 napi_disable(&adapter->napi[i]); 1439 } 1440 1441 adapter->napi_enabled = false; 1442 } 1443 1444 static int init_napi(struct ibmvnic_adapter *adapter) 1445 { 1446 int i; 1447 1448 adapter->napi = kcalloc(adapter->req_rx_queues, 1449 sizeof(struct napi_struct), GFP_KERNEL); 1450 if (!adapter->napi) 1451 return -ENOMEM; 1452 1453 for (i = 0; i < adapter->req_rx_queues; i++) { 1454 netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); 1455 netif_napi_add(adapter->netdev, &adapter->napi[i], 1456 ibmvnic_poll); 1457 } 1458 1459 adapter->num_active_rx_napi = adapter->req_rx_queues; 1460 return 0; 1461 } 1462 1463 static void release_napi(struct ibmvnic_adapter *adapter) 1464 { 1465 int i; 1466 1467 if (!adapter->napi) 1468 return; 1469 1470 for (i = 0; i < adapter->num_active_rx_napi; i++) { 1471 netdev_dbg(adapter->netdev, "Releasing napi[%d]\n", i); 1472 netif_napi_del(&adapter->napi[i]); 1473 } 1474 1475 kfree(adapter->napi); 1476 adapter->napi = NULL; 1477 adapter->num_active_rx_napi = 0; 1478 adapter->napi_enabled = false; 1479 } 1480 1481 static const char *adapter_state_to_string(enum vnic_state state) 1482 { 1483 switch (state) { 1484 case VNIC_PROBING: 1485 return "PROBING"; 1486 case VNIC_PROBED: 1487 return "PROBED"; 1488 case VNIC_OPENING: 1489 return "OPENING"; 1490 case VNIC_OPEN: 1491 return "OPEN"; 1492 case VNIC_CLOSING: 1493 return "CLOSING"; 1494 case VNIC_CLOSED: 1495 return "CLOSED"; 1496 case VNIC_REMOVING: 1497 return "REMOVING"; 1498 case VNIC_REMOVED: 1499 return "REMOVED"; 1500 case VNIC_DOWN: 1501 return "DOWN"; 1502 } 1503 return "UNKNOWN"; 1504 } 1505 1506 static int ibmvnic_login(struct net_device *netdev) 1507 { 1508 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1509 unsigned long timeout = msecs_to_jiffies(20000); 1510 int retry_count = 0; 1511 int retries = 10; 1512 bool retry; 1513 int rc; 1514 1515 do { 1516 retry = false; 1517 if (retry_count > retries) { 1518 netdev_warn(netdev, "Login attempts exceeded\n"); 1519 return -EACCES; 1520 } 1521 1522 adapter->init_done_rc = 0; 1523 reinit_completion(&adapter->init_done); 1524 rc = send_login(adapter); 1525 if (rc) 1526 return rc; 1527 1528 if (!wait_for_completion_timeout(&adapter->init_done, 1529 timeout)) { 1530 netdev_warn(netdev, "Login timed out, retrying...\n"); 1531 retry = true; 1532 adapter->init_done_rc = 0; 1533 retry_count++; 1534 continue; 1535 } 1536 1537 if (adapter->init_done_rc == ABORTED) { 1538 netdev_warn(netdev, "Login aborted, retrying...\n"); 1539 retry = true; 1540 adapter->init_done_rc = 0; 1541 retry_count++; 1542 /* FW or device may be busy, so 1543 * wait a bit before retrying login 1544 */ 1545 msleep(500); 1546 } else if (adapter->init_done_rc == PARTIALSUCCESS) { 1547 retry_count++; 1548 release_sub_crqs(adapter, 1); 1549 1550 retry = true; 1551 netdev_dbg(netdev, 1552 "Received partial success, retrying...\n"); 1553 adapter->init_done_rc = 0; 1554 reinit_completion(&adapter->init_done); 1555 send_query_cap(adapter); 1556 if (!wait_for_completion_timeout(&adapter->init_done, 1557 timeout)) { 1558 netdev_warn(netdev, 1559 "Capabilities query timed out\n"); 1560 return -ETIMEDOUT; 1561 } 1562 1563 rc = init_sub_crqs(adapter); 1564 if (rc) { 1565 netdev_warn(netdev, 1566 "SCRQ initialization failed\n"); 1567 return rc; 1568 } 1569 1570 rc = init_sub_crq_irqs(adapter); 1571 if (rc) { 1572 netdev_warn(netdev, 1573 "SCRQ irq initialization failed\n"); 1574 return rc; 1575 } 1576 } else if (adapter->init_done_rc) { 1577 netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", 1578 adapter->init_done_rc); 1579 return -EIO; 1580 } 1581 } while (retry); 1582 1583 __ibmvnic_set_mac(netdev, adapter->mac_addr); 1584 1585 netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); 1586 return 0; 1587 } 1588 1589 static void release_login_buffer(struct ibmvnic_adapter *adapter) 1590 { 1591 kfree(adapter->login_buf); 1592 adapter->login_buf = NULL; 1593 } 1594 1595 static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter) 1596 { 1597 kfree(adapter->login_rsp_buf); 1598 adapter->login_rsp_buf = NULL; 1599 } 1600 1601 static void release_resources(struct ibmvnic_adapter *adapter) 1602 { 1603 release_vpd_data(adapter); 1604 1605 release_napi(adapter); 1606 release_login_buffer(adapter); 1607 release_login_rsp_buffer(adapter); 1608 } 1609 1610 static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) 1611 { 1612 struct net_device *netdev = adapter->netdev; 1613 unsigned long timeout = msecs_to_jiffies(20000); 1614 union ibmvnic_crq crq; 1615 bool resend; 1616 int rc; 1617 1618 netdev_dbg(netdev, "setting link state %d\n", link_state); 1619 1620 memset(&crq, 0, sizeof(crq)); 1621 crq.logical_link_state.first = IBMVNIC_CRQ_CMD; 1622 crq.logical_link_state.cmd = LOGICAL_LINK_STATE; 1623 crq.logical_link_state.link_state = link_state; 1624 1625 do { 1626 resend = false; 1627 1628 reinit_completion(&adapter->init_done); 1629 rc = ibmvnic_send_crq(adapter, &crq); 1630 if (rc) { 1631 netdev_err(netdev, "Failed to set link state\n"); 1632 return rc; 1633 } 1634 1635 if (!wait_for_completion_timeout(&adapter->init_done, 1636 timeout)) { 1637 netdev_err(netdev, "timeout setting link state\n"); 1638 return -ETIMEDOUT; 1639 } 1640 1641 if (adapter->init_done_rc == PARTIALSUCCESS) { 1642 /* Partuial success, delay and re-send */ 1643 mdelay(1000); 1644 resend = true; 1645 } else if (adapter->init_done_rc) { 1646 netdev_warn(netdev, "Unable to set link state, rc=%d\n", 1647 adapter->init_done_rc); 1648 return adapter->init_done_rc; 1649 } 1650 } while (resend); 1651 1652 return 0; 1653 } 1654 1655 static int set_real_num_queues(struct net_device *netdev) 1656 { 1657 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1658 int rc; 1659 1660 netdev_dbg(netdev, "Setting real tx/rx queues (%llx/%llx)\n", 1661 adapter->req_tx_queues, adapter->req_rx_queues); 1662 1663 rc = netif_set_real_num_tx_queues(netdev, adapter->req_tx_queues); 1664 if (rc) { 1665 netdev_err(netdev, "failed to set the number of tx queues\n"); 1666 return rc; 1667 } 1668 1669 rc = netif_set_real_num_rx_queues(netdev, adapter->req_rx_queues); 1670 if (rc) 1671 netdev_err(netdev, "failed to set the number of rx queues\n"); 1672 1673 return rc; 1674 } 1675 1676 static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) 1677 { 1678 struct device *dev = &adapter->vdev->dev; 1679 union ibmvnic_crq crq; 1680 int len = 0; 1681 int rc; 1682 1683 if (adapter->vpd->buff) 1684 len = adapter->vpd->len; 1685 1686 mutex_lock(&adapter->fw_lock); 1687 adapter->fw_done_rc = 0; 1688 reinit_completion(&adapter->fw_done); 1689 1690 crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; 1691 crq.get_vpd_size.cmd = GET_VPD_SIZE; 1692 rc = ibmvnic_send_crq(adapter, &crq); 1693 if (rc) { 1694 mutex_unlock(&adapter->fw_lock); 1695 return rc; 1696 } 1697 1698 rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); 1699 if (rc) { 1700 dev_err(dev, "Could not retrieve VPD size, rc = %d\n", rc); 1701 mutex_unlock(&adapter->fw_lock); 1702 return rc; 1703 } 1704 mutex_unlock(&adapter->fw_lock); 1705 1706 if (!adapter->vpd->len) 1707 return -ENODATA; 1708 1709 if (!adapter->vpd->buff) 1710 adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL); 1711 else if (adapter->vpd->len != len) 1712 adapter->vpd->buff = 1713 krealloc(adapter->vpd->buff, 1714 adapter->vpd->len, GFP_KERNEL); 1715 1716 if (!adapter->vpd->buff) { 1717 dev_err(dev, "Could allocate VPD buffer\n"); 1718 return -ENOMEM; 1719 } 1720 1721 adapter->vpd->dma_addr = 1722 dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len, 1723 DMA_FROM_DEVICE); 1724 if (dma_mapping_error(dev, adapter->vpd->dma_addr)) { 1725 dev_err(dev, "Could not map VPD buffer\n"); 1726 kfree(adapter->vpd->buff); 1727 adapter->vpd->buff = NULL; 1728 return -ENOMEM; 1729 } 1730 1731 mutex_lock(&adapter->fw_lock); 1732 adapter->fw_done_rc = 0; 1733 reinit_completion(&adapter->fw_done); 1734 1735 crq.get_vpd.first = IBMVNIC_CRQ_CMD; 1736 crq.get_vpd.cmd = GET_VPD; 1737 crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); 1738 crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); 1739 rc = ibmvnic_send_crq(adapter, &crq); 1740 if (rc) { 1741 kfree(adapter->vpd->buff); 1742 adapter->vpd->buff = NULL; 1743 mutex_unlock(&adapter->fw_lock); 1744 return rc; 1745 } 1746 1747 rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); 1748 if (rc) { 1749 dev_err(dev, "Unable to retrieve VPD, rc = %d\n", rc); 1750 kfree(adapter->vpd->buff); 1751 adapter->vpd->buff = NULL; 1752 mutex_unlock(&adapter->fw_lock); 1753 return rc; 1754 } 1755 1756 mutex_unlock(&adapter->fw_lock); 1757 return 0; 1758 } 1759 1760 static int init_resources(struct ibmvnic_adapter *adapter) 1761 { 1762 struct net_device *netdev = adapter->netdev; 1763 int rc; 1764 1765 rc = set_real_num_queues(netdev); 1766 if (rc) 1767 return rc; 1768 1769 adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); 1770 if (!adapter->vpd) 1771 return -ENOMEM; 1772 1773 /* Vital Product Data (VPD) */ 1774 rc = ibmvnic_get_vpd(adapter); 1775 if (rc) { 1776 netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); 1777 return rc; 1778 } 1779 1780 rc = init_napi(adapter); 1781 if (rc) 1782 return rc; 1783 1784 send_query_map(adapter); 1785 1786 rc = init_rx_pools(netdev); 1787 if (rc) 1788 return rc; 1789 1790 rc = init_tx_pools(netdev); 1791 return rc; 1792 } 1793 1794 static int __ibmvnic_open(struct net_device *netdev) 1795 { 1796 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1797 enum vnic_state prev_state = adapter->state; 1798 int i, rc; 1799 1800 adapter->state = VNIC_OPENING; 1801 replenish_pools(adapter); 1802 ibmvnic_napi_enable(adapter); 1803 1804 /* We're ready to receive frames, enable the sub-crq interrupts and 1805 * set the logical link state to up 1806 */ 1807 for (i = 0; i < adapter->req_rx_queues; i++) { 1808 netdev_dbg(netdev, "Enabling rx_scrq[%d] irq\n", i); 1809 if (prev_state == VNIC_CLOSED) 1810 enable_irq(adapter->rx_scrq[i]->irq); 1811 enable_scrq_irq(adapter, adapter->rx_scrq[i]); 1812 } 1813 1814 for (i = 0; i < adapter->req_tx_queues; i++) { 1815 netdev_dbg(netdev, "Enabling tx_scrq[%d] irq\n", i); 1816 if (prev_state == VNIC_CLOSED) 1817 enable_irq(adapter->tx_scrq[i]->irq); 1818 enable_scrq_irq(adapter, adapter->tx_scrq[i]); 1819 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); 1820 } 1821 1822 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); 1823 if (rc) { 1824 ibmvnic_napi_disable(adapter); 1825 ibmvnic_disable_irqs(adapter); 1826 return rc; 1827 } 1828 1829 adapter->tx_queues_active = true; 1830 1831 /* Since queues were stopped until now, there shouldn't be any 1832 * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we 1833 * don't need the synchronize_rcu()? Leaving it for consistency 1834 * with setting ->tx_queues_active = false. 1835 */ 1836 synchronize_rcu(); 1837 1838 netif_tx_start_all_queues(netdev); 1839 1840 if (prev_state == VNIC_CLOSED) { 1841 for (i = 0; i < adapter->req_rx_queues; i++) 1842 napi_schedule(&adapter->napi[i]); 1843 } 1844 1845 adapter->state = VNIC_OPEN; 1846 return rc; 1847 } 1848 1849 static int ibmvnic_open(struct net_device *netdev) 1850 { 1851 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 1852 int rc; 1853 1854 ASSERT_RTNL(); 1855 1856 /* If device failover is pending or we are about to reset, just set 1857 * device state and return. Device operation will be handled by reset 1858 * routine. 1859 * 1860 * It should be safe to overwrite the adapter->state here. Since 1861 * we hold the rtnl, either the reset has not actually started or 1862 * the rtnl got dropped during the set_link_state() in do_reset(). 1863 * In the former case, no one else is changing the state (again we 1864 * have the rtnl) and in the latter case, do_reset() will detect and 1865 * honor our setting below. 1866 */ 1867 if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { 1868 netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", 1869 adapter_state_to_string(adapter->state), 1870 adapter->failover_pending); 1871 adapter->state = VNIC_OPEN; 1872 rc = 0; 1873 goto out; 1874 } 1875 1876 if (adapter->state != VNIC_CLOSED) { 1877 rc = ibmvnic_login(netdev); 1878 if (rc) 1879 goto out; 1880 1881 rc = init_resources(adapter); 1882 if (rc) { 1883 netdev_err(netdev, "failed to initialize resources\n"); 1884 goto out; 1885 } 1886 } 1887 1888 rc = __ibmvnic_open(netdev); 1889 1890 out: 1891 /* If open failed and there is a pending failover or in-progress reset, 1892 * set device state and return. Device operation will be handled by 1893 * reset routine. See also comments above regarding rtnl. 1894 */ 1895 if (rc && 1896 (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) { 1897 adapter->state = VNIC_OPEN; 1898 rc = 0; 1899 } 1900 1901 if (rc) { 1902 release_resources(adapter); 1903 release_rx_pools(adapter); 1904 release_tx_pools(adapter); 1905 } 1906 1907 return rc; 1908 } 1909 1910 static void clean_rx_pools(struct ibmvnic_adapter *adapter) 1911 { 1912 struct ibmvnic_rx_pool *rx_pool; 1913 struct ibmvnic_rx_buff *rx_buff; 1914 u64 rx_entries; 1915 int rx_scrqs; 1916 int i, j; 1917 1918 if (!adapter->rx_pool) 1919 return; 1920 1921 rx_scrqs = adapter->num_active_rx_pools; 1922 rx_entries = adapter->req_rx_add_entries_per_subcrq; 1923 1924 /* Free any remaining skbs in the rx buffer pools */ 1925 for (i = 0; i < rx_scrqs; i++) { 1926 rx_pool = &adapter->rx_pool[i]; 1927 if (!rx_pool || !rx_pool->rx_buff) 1928 continue; 1929 1930 netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i); 1931 for (j = 0; j < rx_entries; j++) { 1932 rx_buff = &rx_pool->rx_buff[j]; 1933 if (rx_buff && rx_buff->skb) { 1934 dev_kfree_skb_any(rx_buff->skb); 1935 rx_buff->skb = NULL; 1936 } 1937 } 1938 } 1939 } 1940 1941 static void clean_one_tx_pool(struct ibmvnic_adapter *adapter, 1942 struct ibmvnic_tx_pool *tx_pool) 1943 { 1944 struct ibmvnic_tx_buff *tx_buff; 1945 u64 tx_entries; 1946 int i; 1947 1948 if (!tx_pool || !tx_pool->tx_buff) 1949 return; 1950 1951 tx_entries = tx_pool->num_buffers; 1952 1953 for (i = 0; i < tx_entries; i++) { 1954 tx_buff = &tx_pool->tx_buff[i]; 1955 if (tx_buff && tx_buff->skb) { 1956 dev_kfree_skb_any(tx_buff->skb); 1957 tx_buff->skb = NULL; 1958 } 1959 } 1960 } 1961 1962 static void clean_tx_pools(struct ibmvnic_adapter *adapter) 1963 { 1964 int tx_scrqs; 1965 int i; 1966 1967 if (!adapter->tx_pool || !adapter->tso_pool) 1968 return; 1969 1970 tx_scrqs = adapter->num_active_tx_pools; 1971 1972 /* Free any remaining skbs in the tx buffer pools */ 1973 for (i = 0; i < tx_scrqs; i++) { 1974 netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i); 1975 clean_one_tx_pool(adapter, &adapter->tx_pool[i]); 1976 clean_one_tx_pool(adapter, &adapter->tso_pool[i]); 1977 } 1978 } 1979 1980 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter) 1981 { 1982 struct net_device *netdev = adapter->netdev; 1983 int i; 1984 1985 if (adapter->tx_scrq) { 1986 for (i = 0; i < adapter->req_tx_queues; i++) 1987 if (adapter->tx_scrq[i]->irq) { 1988 netdev_dbg(netdev, 1989 "Disabling tx_scrq[%d] irq\n", i); 1990 disable_scrq_irq(adapter, adapter->tx_scrq[i]); 1991 disable_irq(adapter->tx_scrq[i]->irq); 1992 } 1993 } 1994 1995 if (adapter->rx_scrq) { 1996 for (i = 0; i < adapter->req_rx_queues; i++) { 1997 if (adapter->rx_scrq[i]->irq) { 1998 netdev_dbg(netdev, 1999 "Disabling rx_scrq[%d] irq\n", i); 2000 disable_scrq_irq(adapter, adapter->rx_scrq[i]); 2001 disable_irq(adapter->rx_scrq[i]->irq); 2002 } 2003 } 2004 } 2005 } 2006 2007 static void ibmvnic_cleanup(struct net_device *netdev) 2008 { 2009 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2010 2011 /* ensure that transmissions are stopped if called by do_reset */ 2012 2013 adapter->tx_queues_active = false; 2014 2015 /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active 2016 * update so they don't restart a queue after we stop it below. 2017 */ 2018 synchronize_rcu(); 2019 2020 if (test_bit(0, &adapter->resetting)) 2021 netif_tx_disable(netdev); 2022 else 2023 netif_tx_stop_all_queues(netdev); 2024 2025 ibmvnic_napi_disable(adapter); 2026 ibmvnic_disable_irqs(adapter); 2027 } 2028 2029 static int __ibmvnic_close(struct net_device *netdev) 2030 { 2031 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2032 int rc = 0; 2033 2034 adapter->state = VNIC_CLOSING; 2035 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); 2036 adapter->state = VNIC_CLOSED; 2037 return rc; 2038 } 2039 2040 static int ibmvnic_close(struct net_device *netdev) 2041 { 2042 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2043 int rc; 2044 2045 netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", 2046 adapter_state_to_string(adapter->state), 2047 adapter->failover_pending, 2048 adapter->force_reset_recovery); 2049 2050 /* If device failover is pending, just set device state and return. 2051 * Device operation will be handled by reset routine. 2052 */ 2053 if (adapter->failover_pending) { 2054 adapter->state = VNIC_CLOSED; 2055 return 0; 2056 } 2057 2058 rc = __ibmvnic_close(netdev); 2059 ibmvnic_cleanup(netdev); 2060 clean_rx_pools(adapter); 2061 clean_tx_pools(adapter); 2062 2063 return rc; 2064 } 2065 2066 /** 2067 * build_hdr_data - creates L2/L3/L4 header data buffer 2068 * @hdr_field: bitfield determining needed headers 2069 * @skb: socket buffer 2070 * @hdr_len: array of header lengths 2071 * @hdr_data: buffer to write the header to 2072 * 2073 * Reads hdr_field to determine which headers are needed by firmware. 2074 * Builds a buffer containing these headers. Saves individual header 2075 * lengths and total buffer length to be used to build descriptors. 2076 */ 2077 static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, 2078 int *hdr_len, u8 *hdr_data) 2079 { 2080 int len = 0; 2081 u8 *hdr; 2082 2083 if (skb_vlan_tagged(skb) && !skb_vlan_tag_present(skb)) 2084 hdr_len[0] = sizeof(struct vlan_ethhdr); 2085 else 2086 hdr_len[0] = sizeof(struct ethhdr); 2087 2088 if (skb->protocol == htons(ETH_P_IP)) { 2089 hdr_len[1] = ip_hdr(skb)->ihl * 4; 2090 if (ip_hdr(skb)->protocol == IPPROTO_TCP) 2091 hdr_len[2] = tcp_hdrlen(skb); 2092 else if (ip_hdr(skb)->protocol == IPPROTO_UDP) 2093 hdr_len[2] = sizeof(struct udphdr); 2094 } else if (skb->protocol == htons(ETH_P_IPV6)) { 2095 hdr_len[1] = sizeof(struct ipv6hdr); 2096 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) 2097 hdr_len[2] = tcp_hdrlen(skb); 2098 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) 2099 hdr_len[2] = sizeof(struct udphdr); 2100 } else if (skb->protocol == htons(ETH_P_ARP)) { 2101 hdr_len[1] = arp_hdr_len(skb->dev); 2102 hdr_len[2] = 0; 2103 } 2104 2105 memset(hdr_data, 0, 120); 2106 if ((hdr_field >> 6) & 1) { 2107 hdr = skb_mac_header(skb); 2108 memcpy(hdr_data, hdr, hdr_len[0]); 2109 len += hdr_len[0]; 2110 } 2111 2112 if ((hdr_field >> 5) & 1) { 2113 hdr = skb_network_header(skb); 2114 memcpy(hdr_data + len, hdr, hdr_len[1]); 2115 len += hdr_len[1]; 2116 } 2117 2118 if ((hdr_field >> 4) & 1) { 2119 hdr = skb_transport_header(skb); 2120 memcpy(hdr_data + len, hdr, hdr_len[2]); 2121 len += hdr_len[2]; 2122 } 2123 return len; 2124 } 2125 2126 /** 2127 * create_hdr_descs - create header and header extension descriptors 2128 * @hdr_field: bitfield determining needed headers 2129 * @hdr_data: buffer containing header data 2130 * @len: length of data buffer 2131 * @hdr_len: array of individual header lengths 2132 * @scrq_arr: descriptor array 2133 * 2134 * Creates header and, if needed, header extension descriptors and 2135 * places them in a descriptor array, scrq_arr 2136 */ 2137 2138 static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, 2139 union sub_crq *scrq_arr) 2140 { 2141 union sub_crq hdr_desc; 2142 int tmp_len = len; 2143 int num_descs = 0; 2144 u8 *data, *cur; 2145 int tmp; 2146 2147 while (tmp_len > 0) { 2148 cur = hdr_data + len - tmp_len; 2149 2150 memset(&hdr_desc, 0, sizeof(hdr_desc)); 2151 if (cur != hdr_data) { 2152 data = hdr_desc.hdr_ext.data; 2153 tmp = tmp_len > 29 ? 29 : tmp_len; 2154 hdr_desc.hdr_ext.first = IBMVNIC_CRQ_CMD; 2155 hdr_desc.hdr_ext.type = IBMVNIC_HDR_EXT_DESC; 2156 hdr_desc.hdr_ext.len = tmp; 2157 } else { 2158 data = hdr_desc.hdr.data; 2159 tmp = tmp_len > 24 ? 24 : tmp_len; 2160 hdr_desc.hdr.first = IBMVNIC_CRQ_CMD; 2161 hdr_desc.hdr.type = IBMVNIC_HDR_DESC; 2162 hdr_desc.hdr.len = tmp; 2163 hdr_desc.hdr.l2_len = (u8)hdr_len[0]; 2164 hdr_desc.hdr.l3_len = cpu_to_be16((u16)hdr_len[1]); 2165 hdr_desc.hdr.l4_len = (u8)hdr_len[2]; 2166 hdr_desc.hdr.flag = hdr_field << 1; 2167 } 2168 memcpy(data, cur, tmp); 2169 tmp_len -= tmp; 2170 *scrq_arr = hdr_desc; 2171 scrq_arr++; 2172 num_descs++; 2173 } 2174 2175 return num_descs; 2176 } 2177 2178 /** 2179 * build_hdr_descs_arr - build a header descriptor array 2180 * @skb: tx socket buffer 2181 * @indir_arr: indirect array 2182 * @num_entries: number of descriptors to be sent 2183 * @hdr_field: bit field determining which headers will be sent 2184 * 2185 * This function will build a TX descriptor array with applicable 2186 * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. 2187 */ 2188 2189 static void build_hdr_descs_arr(struct sk_buff *skb, 2190 union sub_crq *indir_arr, 2191 int *num_entries, u8 hdr_field) 2192 { 2193 int hdr_len[3] = {0, 0, 0}; 2194 u8 hdr_data[140] = {0}; 2195 int tot_len; 2196 2197 tot_len = build_hdr_data(hdr_field, skb, hdr_len, 2198 hdr_data); 2199 *num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len, 2200 indir_arr + 1); 2201 } 2202 2203 static int ibmvnic_xmit_workarounds(struct sk_buff *skb, 2204 struct net_device *netdev) 2205 { 2206 /* For some backing devices, mishandling of small packets 2207 * can result in a loss of connection or TX stall. Device 2208 * architects recommend that no packet should be smaller 2209 * than the minimum MTU value provided to the driver, so 2210 * pad any packets to that length 2211 */ 2212 if (skb->len < netdev->min_mtu) 2213 return skb_put_padto(skb, netdev->min_mtu); 2214 2215 return 0; 2216 } 2217 2218 static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, 2219 struct ibmvnic_sub_crq_queue *tx_scrq) 2220 { 2221 struct ibmvnic_ind_xmit_queue *ind_bufp; 2222 struct ibmvnic_tx_buff *tx_buff; 2223 struct ibmvnic_tx_pool *tx_pool; 2224 union sub_crq tx_scrq_entry; 2225 int queue_num; 2226 int entries; 2227 int index; 2228 int i; 2229 2230 ind_bufp = &tx_scrq->ind_buf; 2231 entries = (u64)ind_bufp->index; 2232 queue_num = tx_scrq->pool_index; 2233 2234 for (i = entries - 1; i >= 0; --i) { 2235 tx_scrq_entry = ind_bufp->indir_arr[i]; 2236 if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC) 2237 continue; 2238 index = be32_to_cpu(tx_scrq_entry.v1.correlator); 2239 if (index & IBMVNIC_TSO_POOL_MASK) { 2240 tx_pool = &adapter->tso_pool[queue_num]; 2241 index &= ~IBMVNIC_TSO_POOL_MASK; 2242 } else { 2243 tx_pool = &adapter->tx_pool[queue_num]; 2244 } 2245 tx_pool->free_map[tx_pool->consumer_index] = index; 2246 tx_pool->consumer_index = tx_pool->consumer_index == 0 ? 2247 tx_pool->num_buffers - 1 : 2248 tx_pool->consumer_index - 1; 2249 tx_buff = &tx_pool->tx_buff[index]; 2250 adapter->netdev->stats.tx_packets--; 2251 adapter->netdev->stats.tx_bytes -= tx_buff->skb->len; 2252 adapter->tx_stats_buffers[queue_num].packets--; 2253 adapter->tx_stats_buffers[queue_num].bytes -= 2254 tx_buff->skb->len; 2255 dev_kfree_skb_any(tx_buff->skb); 2256 tx_buff->skb = NULL; 2257 adapter->netdev->stats.tx_dropped++; 2258 } 2259 2260 ind_bufp->index = 0; 2261 2262 if (atomic_sub_return(entries, &tx_scrq->used) <= 2263 (adapter->req_tx_entries_per_subcrq / 2) && 2264 __netif_subqueue_stopped(adapter->netdev, queue_num)) { 2265 rcu_read_lock(); 2266 2267 if (adapter->tx_queues_active) { 2268 netif_wake_subqueue(adapter->netdev, queue_num); 2269 netdev_dbg(adapter->netdev, "Started queue %d\n", 2270 queue_num); 2271 } 2272 2273 rcu_read_unlock(); 2274 } 2275 } 2276 2277 static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, 2278 struct ibmvnic_sub_crq_queue *tx_scrq) 2279 { 2280 struct ibmvnic_ind_xmit_queue *ind_bufp; 2281 u64 dma_addr; 2282 u64 entries; 2283 u64 handle; 2284 int rc; 2285 2286 ind_bufp = &tx_scrq->ind_buf; 2287 dma_addr = (u64)ind_bufp->indir_dma; 2288 entries = (u64)ind_bufp->index; 2289 handle = tx_scrq->handle; 2290 2291 if (!entries) 2292 return 0; 2293 rc = send_subcrq_indirect(adapter, handle, dma_addr, entries); 2294 if (rc) 2295 ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); 2296 else 2297 ind_bufp->index = 0; 2298 return 0; 2299 } 2300 2301 static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) 2302 { 2303 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2304 int queue_num = skb_get_queue_mapping(skb); 2305 u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; 2306 struct device *dev = &adapter->vdev->dev; 2307 struct ibmvnic_ind_xmit_queue *ind_bufp; 2308 struct ibmvnic_tx_buff *tx_buff = NULL; 2309 struct ibmvnic_sub_crq_queue *tx_scrq; 2310 struct ibmvnic_long_term_buff *ltb; 2311 struct ibmvnic_tx_pool *tx_pool; 2312 unsigned int tx_send_failed = 0; 2313 netdev_tx_t ret = NETDEV_TX_OK; 2314 unsigned int tx_map_failed = 0; 2315 union sub_crq indir_arr[16]; 2316 unsigned int tx_dropped = 0; 2317 unsigned int tx_packets = 0; 2318 unsigned int tx_bytes = 0; 2319 dma_addr_t data_dma_addr; 2320 struct netdev_queue *txq; 2321 unsigned long lpar_rc; 2322 union sub_crq tx_crq; 2323 unsigned int offset; 2324 int num_entries = 1; 2325 unsigned char *dst; 2326 int bufidx = 0; 2327 u8 proto = 0; 2328 2329 /* If a reset is in progress, drop the packet since 2330 * the scrqs may get torn down. Otherwise use the 2331 * rcu to ensure reset waits for us to complete. 2332 */ 2333 rcu_read_lock(); 2334 if (!adapter->tx_queues_active) { 2335 dev_kfree_skb_any(skb); 2336 2337 tx_send_failed++; 2338 tx_dropped++; 2339 ret = NETDEV_TX_OK; 2340 goto out; 2341 } 2342 2343 tx_scrq = adapter->tx_scrq[queue_num]; 2344 txq = netdev_get_tx_queue(netdev, queue_num); 2345 ind_bufp = &tx_scrq->ind_buf; 2346 2347 if (ibmvnic_xmit_workarounds(skb, netdev)) { 2348 tx_dropped++; 2349 tx_send_failed++; 2350 ret = NETDEV_TX_OK; 2351 ibmvnic_tx_scrq_flush(adapter, tx_scrq); 2352 goto out; 2353 } 2354 2355 if (skb_is_gso(skb)) 2356 tx_pool = &adapter->tso_pool[queue_num]; 2357 else 2358 tx_pool = &adapter->tx_pool[queue_num]; 2359 2360 bufidx = tx_pool->free_map[tx_pool->consumer_index]; 2361 2362 if (bufidx == IBMVNIC_INVALID_MAP) { 2363 dev_kfree_skb_any(skb); 2364 tx_send_failed++; 2365 tx_dropped++; 2366 ibmvnic_tx_scrq_flush(adapter, tx_scrq); 2367 ret = NETDEV_TX_OK; 2368 goto out; 2369 } 2370 2371 tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; 2372 2373 map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); 2374 2375 dst = ltb->buff + offset; 2376 memset(dst, 0, tx_pool->buf_size); 2377 data_dma_addr = ltb->addr + offset; 2378 2379 if (skb_shinfo(skb)->nr_frags) { 2380 int cur, i; 2381 2382 /* Copy the head */ 2383 skb_copy_from_linear_data(skb, dst, skb_headlen(skb)); 2384 cur = skb_headlen(skb); 2385 2386 /* Copy the frags */ 2387 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2388 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2389 2390 memcpy(dst + cur, skb_frag_address(frag), 2391 skb_frag_size(frag)); 2392 cur += skb_frag_size(frag); 2393 } 2394 } else { 2395 skb_copy_from_linear_data(skb, dst, skb->len); 2396 } 2397 2398 /* post changes to long_term_buff *dst before VIOS accessing it */ 2399 dma_wmb(); 2400 2401 tx_pool->consumer_index = 2402 (tx_pool->consumer_index + 1) % tx_pool->num_buffers; 2403 2404 tx_buff = &tx_pool->tx_buff[bufidx]; 2405 tx_buff->skb = skb; 2406 tx_buff->index = bufidx; 2407 tx_buff->pool_index = queue_num; 2408 2409 memset(&tx_crq, 0, sizeof(tx_crq)); 2410 tx_crq.v1.first = IBMVNIC_CRQ_CMD; 2411 tx_crq.v1.type = IBMVNIC_TX_DESC; 2412 tx_crq.v1.n_crq_elem = 1; 2413 tx_crq.v1.n_sge = 1; 2414 tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED; 2415 2416 if (skb_is_gso(skb)) 2417 tx_crq.v1.correlator = 2418 cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); 2419 else 2420 tx_crq.v1.correlator = cpu_to_be32(bufidx); 2421 tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); 2422 tx_crq.v1.sge_len = cpu_to_be32(skb->len); 2423 tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); 2424 2425 if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) { 2426 tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT; 2427 tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci); 2428 } 2429 2430 if (skb->protocol == htons(ETH_P_IP)) { 2431 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4; 2432 proto = ip_hdr(skb)->protocol; 2433 } else if (skb->protocol == htons(ETH_P_IPV6)) { 2434 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6; 2435 proto = ipv6_hdr(skb)->nexthdr; 2436 } 2437 2438 if (proto == IPPROTO_TCP) 2439 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP; 2440 else if (proto == IPPROTO_UDP) 2441 tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP; 2442 2443 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2444 tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; 2445 hdrs += 2; 2446 } 2447 if (skb_is_gso(skb)) { 2448 tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; 2449 tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 2450 hdrs += 2; 2451 } 2452 2453 if ((*hdrs >> 7) & 1) 2454 build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs); 2455 2456 tx_crq.v1.n_crq_elem = num_entries; 2457 tx_buff->num_entries = num_entries; 2458 /* flush buffer if current entry can not fit */ 2459 if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { 2460 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); 2461 if (lpar_rc != H_SUCCESS) 2462 goto tx_flush_err; 2463 } 2464 2465 indir_arr[0] = tx_crq; 2466 memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0], 2467 num_entries * sizeof(struct ibmvnic_generic_scrq)); 2468 ind_bufp->index += num_entries; 2469 if (__netdev_tx_sent_queue(txq, skb->len, 2470 netdev_xmit_more() && 2471 ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { 2472 lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq); 2473 if (lpar_rc != H_SUCCESS) 2474 goto tx_err; 2475 } 2476 2477 if (atomic_add_return(num_entries, &tx_scrq->used) 2478 >= adapter->req_tx_entries_per_subcrq) { 2479 netdev_dbg(netdev, "Stopping queue %d\n", queue_num); 2480 netif_stop_subqueue(netdev, queue_num); 2481 } 2482 2483 tx_packets++; 2484 tx_bytes += skb->len; 2485 txq_trans_cond_update(txq); 2486 ret = NETDEV_TX_OK; 2487 goto out; 2488 2489 tx_flush_err: 2490 dev_kfree_skb_any(skb); 2491 tx_buff->skb = NULL; 2492 tx_pool->consumer_index = tx_pool->consumer_index == 0 ? 2493 tx_pool->num_buffers - 1 : 2494 tx_pool->consumer_index - 1; 2495 tx_dropped++; 2496 tx_err: 2497 if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER) 2498 dev_err_ratelimited(dev, "tx: send failed\n"); 2499 2500 if (lpar_rc == H_CLOSED || adapter->failover_pending) { 2501 /* Disable TX and report carrier off if queue is closed 2502 * or pending failover. 2503 * Firmware guarantees that a signal will be sent to the 2504 * driver, triggering a reset or some other action. 2505 */ 2506 netif_tx_stop_all_queues(netdev); 2507 netif_carrier_off(netdev); 2508 } 2509 out: 2510 rcu_read_unlock(); 2511 netdev->stats.tx_dropped += tx_dropped; 2512 netdev->stats.tx_bytes += tx_bytes; 2513 netdev->stats.tx_packets += tx_packets; 2514 adapter->tx_send_failed += tx_send_failed; 2515 adapter->tx_map_failed += tx_map_failed; 2516 adapter->tx_stats_buffers[queue_num].packets += tx_packets; 2517 adapter->tx_stats_buffers[queue_num].bytes += tx_bytes; 2518 adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped; 2519 2520 return ret; 2521 } 2522 2523 static void ibmvnic_set_multi(struct net_device *netdev) 2524 { 2525 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2526 struct netdev_hw_addr *ha; 2527 union ibmvnic_crq crq; 2528 2529 memset(&crq, 0, sizeof(crq)); 2530 crq.request_capability.first = IBMVNIC_CRQ_CMD; 2531 crq.request_capability.cmd = REQUEST_CAPABILITY; 2532 2533 if (netdev->flags & IFF_PROMISC) { 2534 if (!adapter->promisc_supported) 2535 return; 2536 } else { 2537 if (netdev->flags & IFF_ALLMULTI) { 2538 /* Accept all multicast */ 2539 memset(&crq, 0, sizeof(crq)); 2540 crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; 2541 crq.multicast_ctrl.cmd = MULTICAST_CTRL; 2542 crq.multicast_ctrl.flags = IBMVNIC_ENABLE_ALL; 2543 ibmvnic_send_crq(adapter, &crq); 2544 } else if (netdev_mc_empty(netdev)) { 2545 /* Reject all multicast */ 2546 memset(&crq, 0, sizeof(crq)); 2547 crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; 2548 crq.multicast_ctrl.cmd = MULTICAST_CTRL; 2549 crq.multicast_ctrl.flags = IBMVNIC_DISABLE_ALL; 2550 ibmvnic_send_crq(adapter, &crq); 2551 } else { 2552 /* Accept one or more multicast(s) */ 2553 netdev_for_each_mc_addr(ha, netdev) { 2554 memset(&crq, 0, sizeof(crq)); 2555 crq.multicast_ctrl.first = IBMVNIC_CRQ_CMD; 2556 crq.multicast_ctrl.cmd = MULTICAST_CTRL; 2557 crq.multicast_ctrl.flags = IBMVNIC_ENABLE_MC; 2558 ether_addr_copy(&crq.multicast_ctrl.mac_addr[0], 2559 ha->addr); 2560 ibmvnic_send_crq(adapter, &crq); 2561 } 2562 } 2563 } 2564 } 2565 2566 static int __ibmvnic_set_mac(struct net_device *netdev, u8 *dev_addr) 2567 { 2568 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2569 union ibmvnic_crq crq; 2570 int rc; 2571 2572 if (!is_valid_ether_addr(dev_addr)) { 2573 rc = -EADDRNOTAVAIL; 2574 goto err; 2575 } 2576 2577 memset(&crq, 0, sizeof(crq)); 2578 crq.change_mac_addr.first = IBMVNIC_CRQ_CMD; 2579 crq.change_mac_addr.cmd = CHANGE_MAC_ADDR; 2580 ether_addr_copy(&crq.change_mac_addr.mac_addr[0], dev_addr); 2581 2582 mutex_lock(&adapter->fw_lock); 2583 adapter->fw_done_rc = 0; 2584 reinit_completion(&adapter->fw_done); 2585 2586 rc = ibmvnic_send_crq(adapter, &crq); 2587 if (rc) { 2588 rc = -EIO; 2589 mutex_unlock(&adapter->fw_lock); 2590 goto err; 2591 } 2592 2593 rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); 2594 /* netdev->dev_addr is changed in handle_change_mac_rsp function */ 2595 if (rc || adapter->fw_done_rc) { 2596 rc = -EIO; 2597 mutex_unlock(&adapter->fw_lock); 2598 goto err; 2599 } 2600 mutex_unlock(&adapter->fw_lock); 2601 return 0; 2602 err: 2603 ether_addr_copy(adapter->mac_addr, netdev->dev_addr); 2604 return rc; 2605 } 2606 2607 static int ibmvnic_set_mac(struct net_device *netdev, void *p) 2608 { 2609 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 2610 struct sockaddr *addr = p; 2611 int rc; 2612 2613 rc = 0; 2614 if (!is_valid_ether_addr(addr->sa_data)) 2615 return -EADDRNOTAVAIL; 2616 2617 ether_addr_copy(adapter->mac_addr, addr->sa_data); 2618 if (adapter->state != VNIC_PROBED) 2619 rc = __ibmvnic_set_mac(netdev, addr->sa_data); 2620 2621 return rc; 2622 } 2623 2624 static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) 2625 { 2626 switch (reason) { 2627 case VNIC_RESET_FAILOVER: 2628 return "FAILOVER"; 2629 case VNIC_RESET_MOBILITY: 2630 return "MOBILITY"; 2631 case VNIC_RESET_FATAL: 2632 return "FATAL"; 2633 case VNIC_RESET_NON_FATAL: 2634 return "NON_FATAL"; 2635 case VNIC_RESET_TIMEOUT: 2636 return "TIMEOUT"; 2637 case VNIC_RESET_CHANGE_PARAM: 2638 return "CHANGE_PARAM"; 2639 case VNIC_RESET_PASSIVE_INIT: 2640 return "PASSIVE_INIT"; 2641 } 2642 return "UNKNOWN"; 2643 } 2644 2645 /* 2646 * Initialize the init_done completion and return code values. We 2647 * can get a transport event just after registering the CRQ and the 2648 * tasklet will use this to communicate the transport event. To ensure 2649 * we don't miss the notification/error, initialize these _before_ 2650 * regisering the CRQ. 2651 */ 2652 static inline void reinit_init_done(struct ibmvnic_adapter *adapter) 2653 { 2654 reinit_completion(&adapter->init_done); 2655 adapter->init_done_rc = 0; 2656 } 2657 2658 /* 2659 * do_reset returns zero if we are able to keep processing reset events, or 2660 * non-zero if we hit a fatal error and must halt. 2661 */ 2662 static int do_reset(struct ibmvnic_adapter *adapter, 2663 struct ibmvnic_rwi *rwi, u32 reset_state) 2664 { 2665 struct net_device *netdev = adapter->netdev; 2666 u64 old_num_rx_queues, old_num_tx_queues; 2667 u64 old_num_rx_slots, old_num_tx_slots; 2668 int rc; 2669 2670 netdev_dbg(adapter->netdev, 2671 "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", 2672 adapter_state_to_string(adapter->state), 2673 adapter->failover_pending, 2674 reset_reason_to_string(rwi->reset_reason), 2675 adapter_state_to_string(reset_state)); 2676 2677 adapter->reset_reason = rwi->reset_reason; 2678 /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ 2679 if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) 2680 rtnl_lock(); 2681 2682 /* Now that we have the rtnl lock, clear any pending failover. 2683 * This will ensure ibmvnic_open() has either completed or will 2684 * block until failover is complete. 2685 */ 2686 if (rwi->reset_reason == VNIC_RESET_FAILOVER) 2687 adapter->failover_pending = false; 2688 2689 /* read the state and check (again) after getting rtnl */ 2690 reset_state = adapter->state; 2691 2692 if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { 2693 rc = -EBUSY; 2694 goto out; 2695 } 2696 2697 netif_carrier_off(netdev); 2698 2699 old_num_rx_queues = adapter->req_rx_queues; 2700 old_num_tx_queues = adapter->req_tx_queues; 2701 old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq; 2702 old_num_tx_slots = adapter->req_tx_entries_per_subcrq; 2703 2704 ibmvnic_cleanup(netdev); 2705 2706 if (reset_state == VNIC_OPEN && 2707 adapter->reset_reason != VNIC_RESET_MOBILITY && 2708 adapter->reset_reason != VNIC_RESET_FAILOVER) { 2709 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { 2710 rc = __ibmvnic_close(netdev); 2711 if (rc) 2712 goto out; 2713 } else { 2714 adapter->state = VNIC_CLOSING; 2715 2716 /* Release the RTNL lock before link state change and 2717 * re-acquire after the link state change to allow 2718 * linkwatch_event to grab the RTNL lock and run during 2719 * a reset. 2720 */ 2721 rtnl_unlock(); 2722 rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); 2723 rtnl_lock(); 2724 if (rc) 2725 goto out; 2726 2727 if (adapter->state == VNIC_OPEN) { 2728 /* When we dropped rtnl, ibmvnic_open() got 2729 * it and noticed that we are resetting and 2730 * set the adapter state to OPEN. Update our 2731 * new "target" state, and resume the reset 2732 * from VNIC_CLOSING state. 2733 */ 2734 netdev_dbg(netdev, 2735 "Open changed state from %s, updating.\n", 2736 adapter_state_to_string(reset_state)); 2737 reset_state = VNIC_OPEN; 2738 adapter->state = VNIC_CLOSING; 2739 } 2740 2741 if (adapter->state != VNIC_CLOSING) { 2742 /* If someone else changed the adapter state 2743 * when we dropped the rtnl, fail the reset 2744 */ 2745 rc = -EAGAIN; 2746 goto out; 2747 } 2748 adapter->state = VNIC_CLOSED; 2749 } 2750 } 2751 2752 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { 2753 release_resources(adapter); 2754 release_sub_crqs(adapter, 1); 2755 release_crq_queue(adapter); 2756 } 2757 2758 if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { 2759 /* remove the closed state so when we call open it appears 2760 * we are coming from the probed state. 2761 */ 2762 adapter->state = VNIC_PROBED; 2763 2764 reinit_init_done(adapter); 2765 2766 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { 2767 rc = init_crq_queue(adapter); 2768 } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { 2769 rc = ibmvnic_reenable_crq_queue(adapter); 2770 release_sub_crqs(adapter, 1); 2771 } else { 2772 rc = ibmvnic_reset_crq(adapter); 2773 if (rc == H_CLOSED || rc == H_SUCCESS) { 2774 rc = vio_enable_interrupts(adapter->vdev); 2775 if (rc) 2776 netdev_err(adapter->netdev, 2777 "Reset failed to enable interrupts. rc=%d\n", 2778 rc); 2779 } 2780 } 2781 2782 if (rc) { 2783 netdev_err(adapter->netdev, 2784 "Reset couldn't initialize crq. rc=%d\n", rc); 2785 goto out; 2786 } 2787 2788 rc = ibmvnic_reset_init(adapter, true); 2789 if (rc) 2790 goto out; 2791 2792 /* If the adapter was in PROBE or DOWN state prior to the reset, 2793 * exit here. 2794 */ 2795 if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) { 2796 rc = 0; 2797 goto out; 2798 } 2799 2800 rc = ibmvnic_login(netdev); 2801 if (rc) 2802 goto out; 2803 2804 if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { 2805 rc = init_resources(adapter); 2806 if (rc) 2807 goto out; 2808 } else if (adapter->req_rx_queues != old_num_rx_queues || 2809 adapter->req_tx_queues != old_num_tx_queues || 2810 adapter->req_rx_add_entries_per_subcrq != 2811 old_num_rx_slots || 2812 adapter->req_tx_entries_per_subcrq != 2813 old_num_tx_slots || 2814 !adapter->rx_pool || 2815 !adapter->tso_pool || 2816 !adapter->tx_pool) { 2817 release_napi(adapter); 2818 release_vpd_data(adapter); 2819 2820 rc = init_resources(adapter); 2821 if (rc) 2822 goto out; 2823 2824 } else { 2825 rc = init_tx_pools(netdev); 2826 if (rc) { 2827 netdev_dbg(netdev, 2828 "init tx pools failed (%d)\n", 2829 rc); 2830 goto out; 2831 } 2832 2833 rc = init_rx_pools(netdev); 2834 if (rc) { 2835 netdev_dbg(netdev, 2836 "init rx pools failed (%d)\n", 2837 rc); 2838 goto out; 2839 } 2840 } 2841 ibmvnic_disable_irqs(adapter); 2842 } 2843 adapter->state = VNIC_CLOSED; 2844 2845 if (reset_state == VNIC_CLOSED) { 2846 rc = 0; 2847 goto out; 2848 } 2849 2850 rc = __ibmvnic_open(netdev); 2851 if (rc) { 2852 rc = IBMVNIC_OPEN_FAILED; 2853 goto out; 2854 } 2855 2856 /* refresh device's multicast list */ 2857 ibmvnic_set_multi(netdev); 2858 2859 if (adapter->reset_reason == VNIC_RESET_FAILOVER || 2860 adapter->reset_reason == VNIC_RESET_MOBILITY) 2861 __netdev_notify_peers(netdev); 2862 2863 rc = 0; 2864 2865 out: 2866 /* restore the adapter state if reset failed */ 2867 if (rc) 2868 adapter->state = reset_state; 2869 /* requestor of VNIC_RESET_CHANGE_PARAM should still hold the rtnl lock */ 2870 if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) 2871 rtnl_unlock(); 2872 2873 netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", 2874 adapter_state_to_string(adapter->state), 2875 adapter->failover_pending, rc); 2876 return rc; 2877 } 2878 2879 static int do_hard_reset(struct ibmvnic_adapter *adapter, 2880 struct ibmvnic_rwi *rwi, u32 reset_state) 2881 { 2882 struct net_device *netdev = adapter->netdev; 2883 int rc; 2884 2885 netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", 2886 reset_reason_to_string(rwi->reset_reason)); 2887 2888 /* read the state and check (again) after getting rtnl */ 2889 reset_state = adapter->state; 2890 2891 if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) { 2892 rc = -EBUSY; 2893 goto out; 2894 } 2895 2896 netif_carrier_off(netdev); 2897 adapter->reset_reason = rwi->reset_reason; 2898 2899 ibmvnic_cleanup(netdev); 2900 release_resources(adapter); 2901 release_sub_crqs(adapter, 0); 2902 release_crq_queue(adapter); 2903 2904 /* remove the closed state so when we call open it appears 2905 * we are coming from the probed state. 2906 */ 2907 adapter->state = VNIC_PROBED; 2908 2909 reinit_init_done(adapter); 2910 2911 rc = init_crq_queue(adapter); 2912 if (rc) { 2913 netdev_err(adapter->netdev, 2914 "Couldn't initialize crq. rc=%d\n", rc); 2915 goto out; 2916 } 2917 2918 rc = ibmvnic_reset_init(adapter, false); 2919 if (rc) 2920 goto out; 2921 2922 /* If the adapter was in PROBE or DOWN state prior to the reset, 2923 * exit here. 2924 */ 2925 if (reset_state == VNIC_PROBED || reset_state == VNIC_DOWN) 2926 goto out; 2927 2928 rc = ibmvnic_login(netdev); 2929 if (rc) 2930 goto out; 2931 2932 rc = init_resources(adapter); 2933 if (rc) 2934 goto out; 2935 2936 ibmvnic_disable_irqs(adapter); 2937 adapter->state = VNIC_CLOSED; 2938 2939 if (reset_state == VNIC_CLOSED) 2940 goto out; 2941 2942 rc = __ibmvnic_open(netdev); 2943 if (rc) { 2944 rc = IBMVNIC_OPEN_FAILED; 2945 goto out; 2946 } 2947 2948 __netdev_notify_peers(netdev); 2949 out: 2950 /* restore adapter state if reset failed */ 2951 if (rc) 2952 adapter->state = reset_state; 2953 netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", 2954 adapter_state_to_string(adapter->state), 2955 adapter->failover_pending, rc); 2956 return rc; 2957 } 2958 2959 static struct ibmvnic_rwi *get_next_rwi(struct ibmvnic_adapter *adapter) 2960 { 2961 struct ibmvnic_rwi *rwi; 2962 unsigned long flags; 2963 2964 spin_lock_irqsave(&adapter->rwi_lock, flags); 2965 2966 if (!list_empty(&adapter->rwi_list)) { 2967 rwi = list_first_entry(&adapter->rwi_list, struct ibmvnic_rwi, 2968 list); 2969 list_del(&rwi->list); 2970 } else { 2971 rwi = NULL; 2972 } 2973 2974 spin_unlock_irqrestore(&adapter->rwi_lock, flags); 2975 return rwi; 2976 } 2977 2978 /** 2979 * do_passive_init - complete probing when partner device is detected. 2980 * @adapter: ibmvnic_adapter struct 2981 * 2982 * If the ibmvnic device does not have a partner device to communicate with at boot 2983 * and that partner device comes online at a later time, this function is called 2984 * to complete the initialization process of ibmvnic device. 2985 * Caller is expected to hold rtnl_lock(). 2986 * 2987 * Returns non-zero if sub-CRQs are not initialized properly leaving the device 2988 * in the down state. 2989 * Returns 0 upon success and the device is in PROBED state. 2990 */ 2991 2992 static int do_passive_init(struct ibmvnic_adapter *adapter) 2993 { 2994 unsigned long timeout = msecs_to_jiffies(30000); 2995 struct net_device *netdev = adapter->netdev; 2996 struct device *dev = &adapter->vdev->dev; 2997 int rc; 2998 2999 netdev_dbg(netdev, "Partner device found, probing.\n"); 3000 3001 adapter->state = VNIC_PROBING; 3002 reinit_completion(&adapter->init_done); 3003 adapter->init_done_rc = 0; 3004 adapter->crq.active = true; 3005 3006 rc = send_crq_init_complete(adapter); 3007 if (rc) 3008 goto out; 3009 3010 rc = send_version_xchg(adapter); 3011 if (rc) 3012 netdev_dbg(adapter->netdev, "send_version_xchg failed, rc=%d\n", rc); 3013 3014 if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { 3015 dev_err(dev, "Initialization sequence timed out\n"); 3016 rc = -ETIMEDOUT; 3017 goto out; 3018 } 3019 3020 rc = init_sub_crqs(adapter); 3021 if (rc) { 3022 dev_err(dev, "Initialization of sub crqs failed, rc=%d\n", rc); 3023 goto out; 3024 } 3025 3026 rc = init_sub_crq_irqs(adapter); 3027 if (rc) { 3028 dev_err(dev, "Failed to initialize sub crq irqs\n, rc=%d", rc); 3029 goto init_failed; 3030 } 3031 3032 netdev->mtu = adapter->req_mtu - ETH_HLEN; 3033 netdev->min_mtu = adapter->min_mtu - ETH_HLEN; 3034 netdev->max_mtu = adapter->max_mtu - ETH_HLEN; 3035 3036 adapter->state = VNIC_PROBED; 3037 netdev_dbg(netdev, "Probed successfully. Waiting for signal from partner device.\n"); 3038 3039 return 0; 3040 3041 init_failed: 3042 release_sub_crqs(adapter, 1); 3043 out: 3044 adapter->state = VNIC_DOWN; 3045 return rc; 3046 } 3047 3048 static void __ibmvnic_reset(struct work_struct *work) 3049 { 3050 struct ibmvnic_adapter *adapter; 3051 unsigned int timeout = 5000; 3052 struct ibmvnic_rwi *tmprwi; 3053 bool saved_state = false; 3054 struct ibmvnic_rwi *rwi; 3055 unsigned long flags; 3056 struct device *dev; 3057 bool need_reset; 3058 int num_fails = 0; 3059 u32 reset_state; 3060 int rc = 0; 3061 3062 adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); 3063 dev = &adapter->vdev->dev; 3064 3065 /* Wait for ibmvnic_probe() to complete. If probe is taking too long 3066 * or if another reset is in progress, defer work for now. If probe 3067 * eventually fails it will flush and terminate our work. 3068 * 3069 * Three possibilities here: 3070 * 1. Adpater being removed - just return 3071 * 2. Timed out on probe or another reset in progress - delay the work 3072 * 3. Completed probe - perform any resets in queue 3073 */ 3074 if (adapter->state == VNIC_PROBING && 3075 !wait_for_completion_timeout(&adapter->probe_done, timeout)) { 3076 dev_err(dev, "Reset thread timed out on probe"); 3077 queue_delayed_work(system_long_wq, 3078 &adapter->ibmvnic_delayed_reset, 3079 IBMVNIC_RESET_DELAY); 3080 return; 3081 } 3082 3083 /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ 3084 if (adapter->state == VNIC_REMOVING) 3085 return; 3086 3087 /* ->rwi_list is stable now (no one else is removing entries) */ 3088 3089 /* ibmvnic_probe() may have purged the reset queue after we were 3090 * scheduled to process a reset so there maybe no resets to process. 3091 * Before setting the ->resetting bit though, we have to make sure 3092 * that there is infact a reset to process. Otherwise we may race 3093 * with ibmvnic_open() and end up leaving the vnic down: 3094 * 3095 * __ibmvnic_reset() ibmvnic_open() 3096 * ----------------- -------------- 3097 * 3098 * set ->resetting bit 3099 * find ->resetting bit is set 3100 * set ->state to IBMVNIC_OPEN (i.e 3101 * assume reset will open device) 3102 * return 3103 * find reset queue empty 3104 * return 3105 * 3106 * Neither performed vnic login/open and vnic stays down 3107 * 3108 * If we hold the lock and conditionally set the bit, either we 3109 * or ibmvnic_open() will complete the open. 3110 */ 3111 need_reset = false; 3112 spin_lock(&adapter->rwi_lock); 3113 if (!list_empty(&adapter->rwi_list)) { 3114 if (test_and_set_bit_lock(0, &adapter->resetting)) { 3115 queue_delayed_work(system_long_wq, 3116 &adapter->ibmvnic_delayed_reset, 3117 IBMVNIC_RESET_DELAY); 3118 } else { 3119 need_reset = true; 3120 } 3121 } 3122 spin_unlock(&adapter->rwi_lock); 3123 3124 if (!need_reset) 3125 return; 3126 3127 rwi = get_next_rwi(adapter); 3128 while (rwi) { 3129 spin_lock_irqsave(&adapter->state_lock, flags); 3130 3131 if (adapter->state == VNIC_REMOVING || 3132 adapter->state == VNIC_REMOVED) { 3133 spin_unlock_irqrestore(&adapter->state_lock, flags); 3134 kfree(rwi); 3135 rc = EBUSY; 3136 break; 3137 } 3138 3139 if (!saved_state) { 3140 reset_state = adapter->state; 3141 saved_state = true; 3142 } 3143 spin_unlock_irqrestore(&adapter->state_lock, flags); 3144 3145 if (rwi->reset_reason == VNIC_RESET_PASSIVE_INIT) { 3146 rtnl_lock(); 3147 rc = do_passive_init(adapter); 3148 rtnl_unlock(); 3149 if (!rc) 3150 netif_carrier_on(adapter->netdev); 3151 } else if (adapter->force_reset_recovery) { 3152 /* Since we are doing a hard reset now, clear the 3153 * failover_pending flag so we don't ignore any 3154 * future MOBILITY or other resets. 3155 */ 3156 adapter->failover_pending = false; 3157 3158 /* Transport event occurred during previous reset */ 3159 if (adapter->wait_for_reset) { 3160 /* Previous was CHANGE_PARAM; caller locked */ 3161 adapter->force_reset_recovery = false; 3162 rc = do_hard_reset(adapter, rwi, reset_state); 3163 } else { 3164 rtnl_lock(); 3165 adapter->force_reset_recovery = false; 3166 rc = do_hard_reset(adapter, rwi, reset_state); 3167 rtnl_unlock(); 3168 } 3169 if (rc) 3170 num_fails++; 3171 else 3172 num_fails = 0; 3173 3174 /* If auto-priority-failover is enabled we can get 3175 * back to back failovers during resets, resulting 3176 * in at least two failed resets (from high-priority 3177 * backing device to low-priority one and then back) 3178 * If resets continue to fail beyond that, give the 3179 * adapter some time to settle down before retrying. 3180 */ 3181 if (num_fails >= 3) { 3182 netdev_dbg(adapter->netdev, 3183 "[S:%s] Hard reset failed %d times, waiting 60 secs\n", 3184 adapter_state_to_string(adapter->state), 3185 num_fails); 3186 set_current_state(TASK_UNINTERRUPTIBLE); 3187 schedule_timeout(60 * HZ); 3188 } 3189 } else { 3190 rc = do_reset(adapter, rwi, reset_state); 3191 } 3192 tmprwi = rwi; 3193 adapter->last_reset_time = jiffies; 3194 3195 if (rc) 3196 netdev_dbg(adapter->netdev, "Reset failed, rc=%d\n", rc); 3197 3198 rwi = get_next_rwi(adapter); 3199 3200 /* 3201 * If there are no resets queued and the previous reset failed, 3202 * the adapter would be in an undefined state. So retry the 3203 * previous reset as a hard reset. 3204 * 3205 * Else, free the previous rwi and, if there is another reset 3206 * queued, process the new reset even if previous reset failed 3207 * (the previous reset could have failed because of a fail 3208 * over for instance, so process the fail over). 3209 */ 3210 if (!rwi && rc) 3211 rwi = tmprwi; 3212 else 3213 kfree(tmprwi); 3214 3215 if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || 3216 rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) 3217 adapter->force_reset_recovery = true; 3218 } 3219 3220 if (adapter->wait_for_reset) { 3221 adapter->reset_done_rc = rc; 3222 complete(&adapter->reset_done); 3223 } 3224 3225 clear_bit_unlock(0, &adapter->resetting); 3226 3227 netdev_dbg(adapter->netdev, 3228 "[S:%s FRR:%d WFR:%d] Done processing resets\n", 3229 adapter_state_to_string(adapter->state), 3230 adapter->force_reset_recovery, 3231 adapter->wait_for_reset); 3232 } 3233 3234 static void __ibmvnic_delayed_reset(struct work_struct *work) 3235 { 3236 struct ibmvnic_adapter *adapter; 3237 3238 adapter = container_of(work, struct ibmvnic_adapter, 3239 ibmvnic_delayed_reset.work); 3240 __ibmvnic_reset(&adapter->ibmvnic_reset); 3241 } 3242 3243 static void flush_reset_queue(struct ibmvnic_adapter *adapter) 3244 { 3245 struct list_head *entry, *tmp_entry; 3246 3247 if (!list_empty(&adapter->rwi_list)) { 3248 list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { 3249 list_del(entry); 3250 kfree(list_entry(entry, struct ibmvnic_rwi, list)); 3251 } 3252 } 3253 } 3254 3255 static int ibmvnic_reset(struct ibmvnic_adapter *adapter, 3256 enum ibmvnic_reset_reason reason) 3257 { 3258 struct net_device *netdev = adapter->netdev; 3259 struct ibmvnic_rwi *rwi, *tmp; 3260 unsigned long flags; 3261 int ret; 3262 3263 spin_lock_irqsave(&adapter->rwi_lock, flags); 3264 3265 /* If failover is pending don't schedule any other reset. 3266 * Instead let the failover complete. If there is already a 3267 * a failover reset scheduled, we will detect and drop the 3268 * duplicate reset when walking the ->rwi_list below. 3269 */ 3270 if (adapter->state == VNIC_REMOVING || 3271 adapter->state == VNIC_REMOVED || 3272 (adapter->failover_pending && reason != VNIC_RESET_FAILOVER)) { 3273 ret = EBUSY; 3274 netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n"); 3275 goto err; 3276 } 3277 3278 list_for_each_entry(tmp, &adapter->rwi_list, list) { 3279 if (tmp->reset_reason == reason) { 3280 netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", 3281 reset_reason_to_string(reason)); 3282 ret = EBUSY; 3283 goto err; 3284 } 3285 } 3286 3287 rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); 3288 if (!rwi) { 3289 ret = ENOMEM; 3290 goto err; 3291 } 3292 /* if we just received a transport event, 3293 * flush reset queue and process this reset 3294 */ 3295 if (adapter->force_reset_recovery) 3296 flush_reset_queue(adapter); 3297 3298 rwi->reset_reason = reason; 3299 list_add_tail(&rwi->list, &adapter->rwi_list); 3300 netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", 3301 reset_reason_to_string(reason)); 3302 queue_work(system_long_wq, &adapter->ibmvnic_reset); 3303 3304 ret = 0; 3305 err: 3306 /* ibmvnic_close() below can block, so drop the lock first */ 3307 spin_unlock_irqrestore(&adapter->rwi_lock, flags); 3308 3309 if (ret == ENOMEM) 3310 ibmvnic_close(netdev); 3311 3312 return -ret; 3313 } 3314 3315 static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue) 3316 { 3317 struct ibmvnic_adapter *adapter = netdev_priv(dev); 3318 3319 if (test_bit(0, &adapter->resetting)) { 3320 netdev_err(adapter->netdev, 3321 "Adapter is resetting, skip timeout reset\n"); 3322 return; 3323 } 3324 /* No queuing up reset until at least 5 seconds (default watchdog val) 3325 * after last reset 3326 */ 3327 if (time_before(jiffies, (adapter->last_reset_time + dev->watchdog_timeo))) { 3328 netdev_dbg(dev, "Not yet time to tx timeout.\n"); 3329 return; 3330 } 3331 ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT); 3332 } 3333 3334 static void remove_buff_from_pool(struct ibmvnic_adapter *adapter, 3335 struct ibmvnic_rx_buff *rx_buff) 3336 { 3337 struct ibmvnic_rx_pool *pool = &adapter->rx_pool[rx_buff->pool_index]; 3338 3339 rx_buff->skb = NULL; 3340 3341 pool->free_map[pool->next_alloc] = (int)(rx_buff - pool->rx_buff); 3342 pool->next_alloc = (pool->next_alloc + 1) % pool->size; 3343 3344 atomic_dec(&pool->available); 3345 } 3346 3347 static int ibmvnic_poll(struct napi_struct *napi, int budget) 3348 { 3349 struct ibmvnic_sub_crq_queue *rx_scrq; 3350 struct ibmvnic_adapter *adapter; 3351 struct net_device *netdev; 3352 int frames_processed; 3353 int scrq_num; 3354 3355 netdev = napi->dev; 3356 adapter = netdev_priv(netdev); 3357 scrq_num = (int)(napi - adapter->napi); 3358 frames_processed = 0; 3359 rx_scrq = adapter->rx_scrq[scrq_num]; 3360 3361 restart_poll: 3362 while (frames_processed < budget) { 3363 struct sk_buff *skb; 3364 struct ibmvnic_rx_buff *rx_buff; 3365 union sub_crq *next; 3366 u32 length; 3367 u16 offset; 3368 u8 flags = 0; 3369 3370 if (unlikely(test_bit(0, &adapter->resetting) && 3371 adapter->reset_reason != VNIC_RESET_NON_FATAL)) { 3372 enable_scrq_irq(adapter, rx_scrq); 3373 napi_complete_done(napi, frames_processed); 3374 return frames_processed; 3375 } 3376 3377 if (!pending_scrq(adapter, rx_scrq)) 3378 break; 3379 next = ibmvnic_next_scrq(adapter, rx_scrq); 3380 rx_buff = (struct ibmvnic_rx_buff *) 3381 be64_to_cpu(next->rx_comp.correlator); 3382 /* do error checking */ 3383 if (next->rx_comp.rc) { 3384 netdev_dbg(netdev, "rx buffer returned with rc %x\n", 3385 be16_to_cpu(next->rx_comp.rc)); 3386 /* free the entry */ 3387 next->rx_comp.first = 0; 3388 dev_kfree_skb_any(rx_buff->skb); 3389 remove_buff_from_pool(adapter, rx_buff); 3390 continue; 3391 } else if (!rx_buff->skb) { 3392 /* free the entry */ 3393 next->rx_comp.first = 0; 3394 remove_buff_from_pool(adapter, rx_buff); 3395 continue; 3396 } 3397 3398 length = be32_to_cpu(next->rx_comp.len); 3399 offset = be16_to_cpu(next->rx_comp.off_frame_data); 3400 flags = next->rx_comp.flags; 3401 skb = rx_buff->skb; 3402 /* load long_term_buff before copying to skb */ 3403 dma_rmb(); 3404 skb_copy_to_linear_data(skb, rx_buff->data + offset, 3405 length); 3406 3407 /* VLAN Header has been stripped by the system firmware and 3408 * needs to be inserted by the driver 3409 */ 3410 if (adapter->rx_vlan_header_insertion && 3411 (flags & IBMVNIC_VLAN_STRIPPED)) 3412 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 3413 ntohs(next->rx_comp.vlan_tci)); 3414 3415 /* free the entry */ 3416 next->rx_comp.first = 0; 3417 remove_buff_from_pool(adapter, rx_buff); 3418 3419 skb_put(skb, length); 3420 skb->protocol = eth_type_trans(skb, netdev); 3421 skb_record_rx_queue(skb, scrq_num); 3422 3423 if (flags & IBMVNIC_IP_CHKSUM_GOOD && 3424 flags & IBMVNIC_TCP_UDP_CHKSUM_GOOD) { 3425 skb->ip_summed = CHECKSUM_UNNECESSARY; 3426 } 3427 3428 length = skb->len; 3429 napi_gro_receive(napi, skb); /* send it up */ 3430 netdev->stats.rx_packets++; 3431 netdev->stats.rx_bytes += length; 3432 adapter->rx_stats_buffers[scrq_num].packets++; 3433 adapter->rx_stats_buffers[scrq_num].bytes += length; 3434 frames_processed++; 3435 } 3436 3437 if (adapter->state != VNIC_CLOSING && 3438 ((atomic_read(&adapter->rx_pool[scrq_num].available) < 3439 adapter->req_rx_add_entries_per_subcrq / 2) || 3440 frames_processed < budget)) 3441 replenish_rx_pool(adapter, &adapter->rx_pool[scrq_num]); 3442 if (frames_processed < budget) { 3443 if (napi_complete_done(napi, frames_processed)) { 3444 enable_scrq_irq(adapter, rx_scrq); 3445 if (pending_scrq(adapter, rx_scrq)) { 3446 if (napi_reschedule(napi)) { 3447 disable_scrq_irq(adapter, rx_scrq); 3448 goto restart_poll; 3449 } 3450 } 3451 } 3452 } 3453 return frames_processed; 3454 } 3455 3456 static int wait_for_reset(struct ibmvnic_adapter *adapter) 3457 { 3458 int rc, ret; 3459 3460 adapter->fallback.mtu = adapter->req_mtu; 3461 adapter->fallback.rx_queues = adapter->req_rx_queues; 3462 adapter->fallback.tx_queues = adapter->req_tx_queues; 3463 adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq; 3464 adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq; 3465 3466 reinit_completion(&adapter->reset_done); 3467 adapter->wait_for_reset = true; 3468 rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); 3469 3470 if (rc) { 3471 ret = rc; 3472 goto out; 3473 } 3474 rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 60000); 3475 if (rc) { 3476 ret = -ENODEV; 3477 goto out; 3478 } 3479 3480 ret = 0; 3481 if (adapter->reset_done_rc) { 3482 ret = -EIO; 3483 adapter->desired.mtu = adapter->fallback.mtu; 3484 adapter->desired.rx_queues = adapter->fallback.rx_queues; 3485 adapter->desired.tx_queues = adapter->fallback.tx_queues; 3486 adapter->desired.rx_entries = adapter->fallback.rx_entries; 3487 adapter->desired.tx_entries = adapter->fallback.tx_entries; 3488 3489 reinit_completion(&adapter->reset_done); 3490 adapter->wait_for_reset = true; 3491 rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); 3492 if (rc) { 3493 ret = rc; 3494 goto out; 3495 } 3496 rc = ibmvnic_wait_for_completion(adapter, &adapter->reset_done, 3497 60000); 3498 if (rc) { 3499 ret = -ENODEV; 3500 goto out; 3501 } 3502 } 3503 out: 3504 adapter->wait_for_reset = false; 3505 3506 return ret; 3507 } 3508 3509 static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) 3510 { 3511 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3512 3513 adapter->desired.mtu = new_mtu + ETH_HLEN; 3514 3515 return wait_for_reset(adapter); 3516 } 3517 3518 static netdev_features_t ibmvnic_features_check(struct sk_buff *skb, 3519 struct net_device *dev, 3520 netdev_features_t features) 3521 { 3522 /* Some backing hardware adapters can not 3523 * handle packets with a MSS less than 224 3524 * or with only one segment. 3525 */ 3526 if (skb_is_gso(skb)) { 3527 if (skb_shinfo(skb)->gso_size < 224 || 3528 skb_shinfo(skb)->gso_segs == 1) 3529 features &= ~NETIF_F_GSO_MASK; 3530 } 3531 3532 return features; 3533 } 3534 3535 static const struct net_device_ops ibmvnic_netdev_ops = { 3536 .ndo_open = ibmvnic_open, 3537 .ndo_stop = ibmvnic_close, 3538 .ndo_start_xmit = ibmvnic_xmit, 3539 .ndo_set_rx_mode = ibmvnic_set_multi, 3540 .ndo_set_mac_address = ibmvnic_set_mac, 3541 .ndo_validate_addr = eth_validate_addr, 3542 .ndo_tx_timeout = ibmvnic_tx_timeout, 3543 .ndo_change_mtu = ibmvnic_change_mtu, 3544 .ndo_features_check = ibmvnic_features_check, 3545 }; 3546 3547 /* ethtool functions */ 3548 3549 static int ibmvnic_get_link_ksettings(struct net_device *netdev, 3550 struct ethtool_link_ksettings *cmd) 3551 { 3552 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3553 int rc; 3554 3555 rc = send_query_phys_parms(adapter); 3556 if (rc) { 3557 adapter->speed = SPEED_UNKNOWN; 3558 adapter->duplex = DUPLEX_UNKNOWN; 3559 } 3560 cmd->base.speed = adapter->speed; 3561 cmd->base.duplex = adapter->duplex; 3562 cmd->base.port = PORT_FIBRE; 3563 cmd->base.phy_address = 0; 3564 cmd->base.autoneg = AUTONEG_ENABLE; 3565 3566 return 0; 3567 } 3568 3569 static void ibmvnic_get_drvinfo(struct net_device *netdev, 3570 struct ethtool_drvinfo *info) 3571 { 3572 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3573 3574 strscpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); 3575 strscpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); 3576 strscpy(info->fw_version, adapter->fw_version, 3577 sizeof(info->fw_version)); 3578 } 3579 3580 static u32 ibmvnic_get_msglevel(struct net_device *netdev) 3581 { 3582 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3583 3584 return adapter->msg_enable; 3585 } 3586 3587 static void ibmvnic_set_msglevel(struct net_device *netdev, u32 data) 3588 { 3589 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3590 3591 adapter->msg_enable = data; 3592 } 3593 3594 static u32 ibmvnic_get_link(struct net_device *netdev) 3595 { 3596 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3597 3598 /* Don't need to send a query because we request a logical link up at 3599 * init and then we wait for link state indications 3600 */ 3601 return adapter->logical_link_state; 3602 } 3603 3604 static void ibmvnic_get_ringparam(struct net_device *netdev, 3605 struct ethtool_ringparam *ring, 3606 struct kernel_ethtool_ringparam *kernel_ring, 3607 struct netlink_ext_ack *extack) 3608 { 3609 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3610 3611 ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; 3612 ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; 3613 ring->rx_mini_max_pending = 0; 3614 ring->rx_jumbo_max_pending = 0; 3615 ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; 3616 ring->tx_pending = adapter->req_tx_entries_per_subcrq; 3617 ring->rx_mini_pending = 0; 3618 ring->rx_jumbo_pending = 0; 3619 } 3620 3621 static int ibmvnic_set_ringparam(struct net_device *netdev, 3622 struct ethtool_ringparam *ring, 3623 struct kernel_ethtool_ringparam *kernel_ring, 3624 struct netlink_ext_ack *extack) 3625 { 3626 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3627 3628 if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || 3629 ring->tx_pending > adapter->max_tx_entries_per_subcrq) { 3630 netdev_err(netdev, "Invalid request.\n"); 3631 netdev_err(netdev, "Max tx buffers = %llu\n", 3632 adapter->max_rx_add_entries_per_subcrq); 3633 netdev_err(netdev, "Max rx buffers = %llu\n", 3634 adapter->max_tx_entries_per_subcrq); 3635 return -EINVAL; 3636 } 3637 3638 adapter->desired.rx_entries = ring->rx_pending; 3639 adapter->desired.tx_entries = ring->tx_pending; 3640 3641 return wait_for_reset(adapter); 3642 } 3643 3644 static void ibmvnic_get_channels(struct net_device *netdev, 3645 struct ethtool_channels *channels) 3646 { 3647 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3648 3649 channels->max_rx = adapter->max_rx_queues; 3650 channels->max_tx = adapter->max_tx_queues; 3651 channels->max_other = 0; 3652 channels->max_combined = 0; 3653 channels->rx_count = adapter->req_rx_queues; 3654 channels->tx_count = adapter->req_tx_queues; 3655 channels->other_count = 0; 3656 channels->combined_count = 0; 3657 } 3658 3659 static int ibmvnic_set_channels(struct net_device *netdev, 3660 struct ethtool_channels *channels) 3661 { 3662 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 3663 3664 adapter->desired.rx_queues = channels->rx_count; 3665 adapter->desired.tx_queues = channels->tx_count; 3666 3667 return wait_for_reset(adapter); 3668 } 3669 3670 static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) 3671 { 3672 struct ibmvnic_adapter *adapter = netdev_priv(dev); 3673 int i; 3674 3675 if (stringset != ETH_SS_STATS) 3676 return; 3677 3678 for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) 3679 memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); 3680 3681 for (i = 0; i < adapter->req_tx_queues; i++) { 3682 snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); 3683 data += ETH_GSTRING_LEN; 3684 3685 snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); 3686 data += ETH_GSTRING_LEN; 3687 3688 snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); 3689 data += ETH_GSTRING_LEN; 3690 } 3691 3692 for (i = 0; i < adapter->req_rx_queues; i++) { 3693 snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); 3694 data += ETH_GSTRING_LEN; 3695 3696 snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); 3697 data += ETH_GSTRING_LEN; 3698 3699 snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); 3700 data += ETH_GSTRING_LEN; 3701 } 3702 } 3703 3704 static int ibmvnic_get_sset_count(struct net_device *dev, int sset) 3705 { 3706 struct ibmvnic_adapter *adapter = netdev_priv(dev); 3707 3708 switch (sset) { 3709 case ETH_SS_STATS: 3710 return ARRAY_SIZE(ibmvnic_stats) + 3711 adapter->req_tx_queues * NUM_TX_STATS + 3712 adapter->req_rx_queues * NUM_RX_STATS; 3713 default: 3714 return -EOPNOTSUPP; 3715 } 3716 } 3717 3718 static void ibmvnic_get_ethtool_stats(struct net_device *dev, 3719 struct ethtool_stats *stats, u64 *data) 3720 { 3721 struct ibmvnic_adapter *adapter = netdev_priv(dev); 3722 union ibmvnic_crq crq; 3723 int i, j; 3724 int rc; 3725 3726 memset(&crq, 0, sizeof(crq)); 3727 crq.request_statistics.first = IBMVNIC_CRQ_CMD; 3728 crq.request_statistics.cmd = REQUEST_STATISTICS; 3729 crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); 3730 crq.request_statistics.len = 3731 cpu_to_be32(sizeof(struct ibmvnic_statistics)); 3732 3733 /* Wait for data to be written */ 3734 reinit_completion(&adapter->stats_done); 3735 rc = ibmvnic_send_crq(adapter, &crq); 3736 if (rc) 3737 return; 3738 rc = ibmvnic_wait_for_completion(adapter, &adapter->stats_done, 10000); 3739 if (rc) 3740 return; 3741 3742 for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) 3743 data[i] = be64_to_cpu(IBMVNIC_GET_STAT 3744 (adapter, ibmvnic_stats[i].offset)); 3745 3746 for (j = 0; j < adapter->req_tx_queues; j++) { 3747 data[i] = adapter->tx_stats_buffers[j].packets; 3748 i++; 3749 data[i] = adapter->tx_stats_buffers[j].bytes; 3750 i++; 3751 data[i] = adapter->tx_stats_buffers[j].dropped_packets; 3752 i++; 3753 } 3754 3755 for (j = 0; j < adapter->req_rx_queues; j++) { 3756 data[i] = adapter->rx_stats_buffers[j].packets; 3757 i++; 3758 data[i] = adapter->rx_stats_buffers[j].bytes; 3759 i++; 3760 data[i] = adapter->rx_stats_buffers[j].interrupts; 3761 i++; 3762 } 3763 } 3764 3765 static const struct ethtool_ops ibmvnic_ethtool_ops = { 3766 .get_drvinfo = ibmvnic_get_drvinfo, 3767 .get_msglevel = ibmvnic_get_msglevel, 3768 .set_msglevel = ibmvnic_set_msglevel, 3769 .get_link = ibmvnic_get_link, 3770 .get_ringparam = ibmvnic_get_ringparam, 3771 .set_ringparam = ibmvnic_set_ringparam, 3772 .get_channels = ibmvnic_get_channels, 3773 .set_channels = ibmvnic_set_channels, 3774 .get_strings = ibmvnic_get_strings, 3775 .get_sset_count = ibmvnic_get_sset_count, 3776 .get_ethtool_stats = ibmvnic_get_ethtool_stats, 3777 .get_link_ksettings = ibmvnic_get_link_ksettings, 3778 }; 3779 3780 /* Routines for managing CRQs/sCRQs */ 3781 3782 static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, 3783 struct ibmvnic_sub_crq_queue *scrq) 3784 { 3785 int rc; 3786 3787 if (!scrq) { 3788 netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); 3789 return -EINVAL; 3790 } 3791 3792 if (scrq->irq) { 3793 free_irq(scrq->irq, scrq); 3794 irq_dispose_mapping(scrq->irq); 3795 scrq->irq = 0; 3796 } 3797 3798 if (scrq->msgs) { 3799 memset(scrq->msgs, 0, 4 * PAGE_SIZE); 3800 atomic_set(&scrq->used, 0); 3801 scrq->cur = 0; 3802 scrq->ind_buf.index = 0; 3803 } else { 3804 netdev_dbg(adapter->netdev, "Invalid scrq reset\n"); 3805 return -EINVAL; 3806 } 3807 3808 rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 3809 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); 3810 return rc; 3811 } 3812 3813 static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) 3814 { 3815 int i, rc; 3816 3817 if (!adapter->tx_scrq || !adapter->rx_scrq) 3818 return -EINVAL; 3819 3820 ibmvnic_clean_affinity(adapter); 3821 3822 for (i = 0; i < adapter->req_tx_queues; i++) { 3823 netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); 3824 rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); 3825 if (rc) 3826 return rc; 3827 } 3828 3829 for (i = 0; i < adapter->req_rx_queues; i++) { 3830 netdev_dbg(adapter->netdev, "Re-setting rx_scrq[%d]\n", i); 3831 rc = reset_one_sub_crq_queue(adapter, adapter->rx_scrq[i]); 3832 if (rc) 3833 return rc; 3834 } 3835 3836 return rc; 3837 } 3838 3839 static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, 3840 struct ibmvnic_sub_crq_queue *scrq, 3841 bool do_h_free) 3842 { 3843 struct device *dev = &adapter->vdev->dev; 3844 long rc; 3845 3846 netdev_dbg(adapter->netdev, "Releasing sub-CRQ\n"); 3847 3848 if (do_h_free) { 3849 /* Close the sub-crqs */ 3850 do { 3851 rc = plpar_hcall_norets(H_FREE_SUB_CRQ, 3852 adapter->vdev->unit_address, 3853 scrq->crq_num); 3854 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); 3855 3856 if (rc) { 3857 netdev_err(adapter->netdev, 3858 "Failed to release sub-CRQ %16lx, rc = %ld\n", 3859 scrq->crq_num, rc); 3860 } 3861 } 3862 3863 dma_free_coherent(dev, 3864 IBMVNIC_IND_ARR_SZ, 3865 scrq->ind_buf.indir_arr, 3866 scrq->ind_buf.indir_dma); 3867 3868 dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, 3869 DMA_BIDIRECTIONAL); 3870 free_pages((unsigned long)scrq->msgs, 2); 3871 free_cpumask_var(scrq->affinity_mask); 3872 kfree(scrq); 3873 } 3874 3875 static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter 3876 *adapter) 3877 { 3878 struct device *dev = &adapter->vdev->dev; 3879 struct ibmvnic_sub_crq_queue *scrq; 3880 int rc; 3881 3882 scrq = kzalloc(sizeof(*scrq), GFP_KERNEL); 3883 if (!scrq) 3884 return NULL; 3885 3886 scrq->msgs = 3887 (union sub_crq *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 2); 3888 if (!scrq->msgs) { 3889 dev_warn(dev, "Couldn't allocate crq queue messages page\n"); 3890 goto zero_page_failed; 3891 } 3892 if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL)) 3893 goto cpumask_alloc_failed; 3894 3895 scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE, 3896 DMA_BIDIRECTIONAL); 3897 if (dma_mapping_error(dev, scrq->msg_token)) { 3898 dev_warn(dev, "Couldn't map crq queue messages page\n"); 3899 goto map_failed; 3900 } 3901 3902 rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token, 3903 4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq); 3904 3905 if (rc == H_RESOURCE) 3906 rc = ibmvnic_reset_crq(adapter); 3907 3908 if (rc == H_CLOSED) { 3909 dev_warn(dev, "Partner adapter not ready, waiting.\n"); 3910 } else if (rc) { 3911 dev_warn(dev, "Error %d registering sub-crq\n", rc); 3912 goto reg_failed; 3913 } 3914 3915 scrq->adapter = adapter; 3916 scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); 3917 scrq->ind_buf.index = 0; 3918 3919 scrq->ind_buf.indir_arr = 3920 dma_alloc_coherent(dev, 3921 IBMVNIC_IND_ARR_SZ, 3922 &scrq->ind_buf.indir_dma, 3923 GFP_KERNEL); 3924 3925 if (!scrq->ind_buf.indir_arr) 3926 goto indir_failed; 3927 3928 spin_lock_init(&scrq->lock); 3929 3930 netdev_dbg(adapter->netdev, 3931 "sub-crq initialized, num %lx, hw_irq=%lx, irq=%x\n", 3932 scrq->crq_num, scrq->hw_irq, scrq->irq); 3933 3934 return scrq; 3935 3936 indir_failed: 3937 do { 3938 rc = plpar_hcall_norets(H_FREE_SUB_CRQ, 3939 adapter->vdev->unit_address, 3940 scrq->crq_num); 3941 } while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc)); 3942 reg_failed: 3943 dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE, 3944 DMA_BIDIRECTIONAL); 3945 map_failed: 3946 free_cpumask_var(scrq->affinity_mask); 3947 cpumask_alloc_failed: 3948 free_pages((unsigned long)scrq->msgs, 2); 3949 zero_page_failed: 3950 kfree(scrq); 3951 3952 return NULL; 3953 } 3954 3955 static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) 3956 { 3957 int i; 3958 3959 ibmvnic_clean_affinity(adapter); 3960 if (adapter->tx_scrq) { 3961 for (i = 0; i < adapter->num_active_tx_scrqs; i++) { 3962 if (!adapter->tx_scrq[i]) 3963 continue; 3964 3965 netdev_dbg(adapter->netdev, "Releasing tx_scrq[%d]\n", 3966 i); 3967 ibmvnic_tx_scrq_clean_buffer(adapter, adapter->tx_scrq[i]); 3968 if (adapter->tx_scrq[i]->irq) { 3969 free_irq(adapter->tx_scrq[i]->irq, 3970 adapter->tx_scrq[i]); 3971 irq_dispose_mapping(adapter->tx_scrq[i]->irq); 3972 adapter->tx_scrq[i]->irq = 0; 3973 } 3974 3975 release_sub_crq_queue(adapter, adapter->tx_scrq[i], 3976 do_h_free); 3977 } 3978 3979 kfree(adapter->tx_scrq); 3980 adapter->tx_scrq = NULL; 3981 adapter->num_active_tx_scrqs = 0; 3982 } 3983 3984 if (adapter->rx_scrq) { 3985 for (i = 0; i < adapter->num_active_rx_scrqs; i++) { 3986 if (!adapter->rx_scrq[i]) 3987 continue; 3988 3989 netdev_dbg(adapter->netdev, "Releasing rx_scrq[%d]\n", 3990 i); 3991 if (adapter->rx_scrq[i]->irq) { 3992 free_irq(adapter->rx_scrq[i]->irq, 3993 adapter->rx_scrq[i]); 3994 irq_dispose_mapping(adapter->rx_scrq[i]->irq); 3995 adapter->rx_scrq[i]->irq = 0; 3996 } 3997 3998 release_sub_crq_queue(adapter, adapter->rx_scrq[i], 3999 do_h_free); 4000 } 4001 4002 kfree(adapter->rx_scrq); 4003 adapter->rx_scrq = NULL; 4004 adapter->num_active_rx_scrqs = 0; 4005 } 4006 } 4007 4008 static int disable_scrq_irq(struct ibmvnic_adapter *adapter, 4009 struct ibmvnic_sub_crq_queue *scrq) 4010 { 4011 struct device *dev = &adapter->vdev->dev; 4012 unsigned long rc; 4013 4014 rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, 4015 H_DISABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); 4016 if (rc) 4017 dev_err(dev, "Couldn't disable scrq irq 0x%lx. rc=%ld\n", 4018 scrq->hw_irq, rc); 4019 return rc; 4020 } 4021 4022 /* We can not use the IRQ chip EOI handler because that has the 4023 * unintended effect of changing the interrupt priority. 4024 */ 4025 static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) 4026 { 4027 u64 val = 0xff000000 | scrq->hw_irq; 4028 unsigned long rc; 4029 4030 rc = plpar_hcall_norets(H_EOI, val); 4031 if (rc) 4032 dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); 4033 } 4034 4035 /* Due to a firmware bug, the hypervisor can send an interrupt to a 4036 * transmit or receive queue just prior to a partition migration. 4037 * Force an EOI after migration. 4038 */ 4039 static void ibmvnic_clear_pending_interrupt(struct device *dev, 4040 struct ibmvnic_sub_crq_queue *scrq) 4041 { 4042 if (!xive_enabled()) 4043 ibmvnic_xics_eoi(dev, scrq); 4044 } 4045 4046 static int enable_scrq_irq(struct ibmvnic_adapter *adapter, 4047 struct ibmvnic_sub_crq_queue *scrq) 4048 { 4049 struct device *dev = &adapter->vdev->dev; 4050 unsigned long rc; 4051 4052 if (scrq->hw_irq > 0x100000000ULL) { 4053 dev_err(dev, "bad hw_irq = %lx\n", scrq->hw_irq); 4054 return 1; 4055 } 4056 4057 if (test_bit(0, &adapter->resetting) && 4058 adapter->reset_reason == VNIC_RESET_MOBILITY) { 4059 ibmvnic_clear_pending_interrupt(dev, scrq); 4060 } 4061 4062 rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, 4063 H_ENABLE_VIO_INTERRUPT, scrq->hw_irq, 0, 0); 4064 if (rc) 4065 dev_err(dev, "Couldn't enable scrq irq 0x%lx. rc=%ld\n", 4066 scrq->hw_irq, rc); 4067 return rc; 4068 } 4069 4070 static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, 4071 struct ibmvnic_sub_crq_queue *scrq) 4072 { 4073 struct device *dev = &adapter->vdev->dev; 4074 struct ibmvnic_tx_pool *tx_pool; 4075 struct ibmvnic_tx_buff *txbuff; 4076 struct netdev_queue *txq; 4077 union sub_crq *next; 4078 int index; 4079 int i; 4080 4081 restart_loop: 4082 while (pending_scrq(adapter, scrq)) { 4083 unsigned int pool = scrq->pool_index; 4084 int num_entries = 0; 4085 int total_bytes = 0; 4086 int num_packets = 0; 4087 4088 next = ibmvnic_next_scrq(adapter, scrq); 4089 for (i = 0; i < next->tx_comp.num_comps; i++) { 4090 index = be32_to_cpu(next->tx_comp.correlators[i]); 4091 if (index & IBMVNIC_TSO_POOL_MASK) { 4092 tx_pool = &adapter->tso_pool[pool]; 4093 index &= ~IBMVNIC_TSO_POOL_MASK; 4094 } else { 4095 tx_pool = &adapter->tx_pool[pool]; 4096 } 4097 4098 txbuff = &tx_pool->tx_buff[index]; 4099 num_packets++; 4100 num_entries += txbuff->num_entries; 4101 if (txbuff->skb) { 4102 total_bytes += txbuff->skb->len; 4103 if (next->tx_comp.rcs[i]) { 4104 dev_err(dev, "tx error %x\n", 4105 next->tx_comp.rcs[i]); 4106 dev_kfree_skb_irq(txbuff->skb); 4107 } else { 4108 dev_consume_skb_irq(txbuff->skb); 4109 } 4110 txbuff->skb = NULL; 4111 } else { 4112 netdev_warn(adapter->netdev, 4113 "TX completion received with NULL socket buffer\n"); 4114 } 4115 tx_pool->free_map[tx_pool->producer_index] = index; 4116 tx_pool->producer_index = 4117 (tx_pool->producer_index + 1) % 4118 tx_pool->num_buffers; 4119 } 4120 /* remove tx_comp scrq*/ 4121 next->tx_comp.first = 0; 4122 4123 txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index); 4124 netdev_tx_completed_queue(txq, num_packets, total_bytes); 4125 4126 if (atomic_sub_return(num_entries, &scrq->used) <= 4127 (adapter->req_tx_entries_per_subcrq / 2) && 4128 __netif_subqueue_stopped(adapter->netdev, 4129 scrq->pool_index)) { 4130 rcu_read_lock(); 4131 if (adapter->tx_queues_active) { 4132 netif_wake_subqueue(adapter->netdev, 4133 scrq->pool_index); 4134 netdev_dbg(adapter->netdev, 4135 "Started queue %d\n", 4136 scrq->pool_index); 4137 } 4138 rcu_read_unlock(); 4139 } 4140 } 4141 4142 enable_scrq_irq(adapter, scrq); 4143 4144 if (pending_scrq(adapter, scrq)) { 4145 disable_scrq_irq(adapter, scrq); 4146 goto restart_loop; 4147 } 4148 4149 return 0; 4150 } 4151 4152 static irqreturn_t ibmvnic_interrupt_tx(int irq, void *instance) 4153 { 4154 struct ibmvnic_sub_crq_queue *scrq = instance; 4155 struct ibmvnic_adapter *adapter = scrq->adapter; 4156 4157 disable_scrq_irq(adapter, scrq); 4158 ibmvnic_complete_tx(adapter, scrq); 4159 4160 return IRQ_HANDLED; 4161 } 4162 4163 static irqreturn_t ibmvnic_interrupt_rx(int irq, void *instance) 4164 { 4165 struct ibmvnic_sub_crq_queue *scrq = instance; 4166 struct ibmvnic_adapter *adapter = scrq->adapter; 4167 4168 /* When booting a kdump kernel we can hit pending interrupts 4169 * prior to completing driver initialization. 4170 */ 4171 if (unlikely(adapter->state != VNIC_OPEN)) 4172 return IRQ_NONE; 4173 4174 adapter->rx_stats_buffers[scrq->scrq_num].interrupts++; 4175 4176 if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) { 4177 disable_scrq_irq(adapter, scrq); 4178 __napi_schedule(&adapter->napi[scrq->scrq_num]); 4179 } 4180 4181 return IRQ_HANDLED; 4182 } 4183 4184 static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) 4185 { 4186 struct device *dev = &adapter->vdev->dev; 4187 struct ibmvnic_sub_crq_queue *scrq; 4188 int i = 0, j = 0; 4189 int rc = 0; 4190 4191 for (i = 0; i < adapter->req_tx_queues; i++) { 4192 netdev_dbg(adapter->netdev, "Initializing tx_scrq[%d] irq\n", 4193 i); 4194 scrq = adapter->tx_scrq[i]; 4195 scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); 4196 4197 if (!scrq->irq) { 4198 rc = -EINVAL; 4199 dev_err(dev, "Error mapping irq\n"); 4200 goto req_tx_irq_failed; 4201 } 4202 4203 snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-tx%d", 4204 adapter->vdev->unit_address, i); 4205 rc = request_irq(scrq->irq, ibmvnic_interrupt_tx, 4206 0, scrq->name, scrq); 4207 4208 if (rc) { 4209 dev_err(dev, "Couldn't register tx irq 0x%x. rc=%d\n", 4210 scrq->irq, rc); 4211 irq_dispose_mapping(scrq->irq); 4212 goto req_tx_irq_failed; 4213 } 4214 } 4215 4216 for (i = 0; i < adapter->req_rx_queues; i++) { 4217 netdev_dbg(adapter->netdev, "Initializing rx_scrq[%d] irq\n", 4218 i); 4219 scrq = adapter->rx_scrq[i]; 4220 scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); 4221 if (!scrq->irq) { 4222 rc = -EINVAL; 4223 dev_err(dev, "Error mapping irq\n"); 4224 goto req_rx_irq_failed; 4225 } 4226 snprintf(scrq->name, sizeof(scrq->name), "ibmvnic-%x-rx%d", 4227 adapter->vdev->unit_address, i); 4228 rc = request_irq(scrq->irq, ibmvnic_interrupt_rx, 4229 0, scrq->name, scrq); 4230 if (rc) { 4231 dev_err(dev, "Couldn't register rx irq 0x%x. rc=%d\n", 4232 scrq->irq, rc); 4233 irq_dispose_mapping(scrq->irq); 4234 goto req_rx_irq_failed; 4235 } 4236 } 4237 4238 cpus_read_lock(); 4239 ibmvnic_set_affinity(adapter); 4240 cpus_read_unlock(); 4241 4242 return rc; 4243 4244 req_rx_irq_failed: 4245 for (j = 0; j < i; j++) { 4246 free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]); 4247 irq_dispose_mapping(adapter->rx_scrq[j]->irq); 4248 } 4249 i = adapter->req_tx_queues; 4250 req_tx_irq_failed: 4251 for (j = 0; j < i; j++) { 4252 free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); 4253 irq_dispose_mapping(adapter->tx_scrq[j]->irq); 4254 } 4255 release_sub_crqs(adapter, 1); 4256 return rc; 4257 } 4258 4259 static int init_sub_crqs(struct ibmvnic_adapter *adapter) 4260 { 4261 struct device *dev = &adapter->vdev->dev; 4262 struct ibmvnic_sub_crq_queue **allqueues; 4263 int registered_queues = 0; 4264 int total_queues; 4265 int more = 0; 4266 int i; 4267 4268 total_queues = adapter->req_tx_queues + adapter->req_rx_queues; 4269 4270 allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); 4271 if (!allqueues) 4272 return -ENOMEM; 4273 4274 for (i = 0; i < total_queues; i++) { 4275 allqueues[i] = init_sub_crq_queue(adapter); 4276 if (!allqueues[i]) { 4277 dev_warn(dev, "Couldn't allocate all sub-crqs\n"); 4278 break; 4279 } 4280 registered_queues++; 4281 } 4282 4283 /* Make sure we were able to register the minimum number of queues */ 4284 if (registered_queues < 4285 adapter->min_tx_queues + adapter->min_rx_queues) { 4286 dev_err(dev, "Fatal: Couldn't init min number of sub-crqs\n"); 4287 goto tx_failed; 4288 } 4289 4290 /* Distribute the failed allocated queues*/ 4291 for (i = 0; i < total_queues - registered_queues + more ; i++) { 4292 netdev_dbg(adapter->netdev, "Reducing number of queues\n"); 4293 switch (i % 3) { 4294 case 0: 4295 if (adapter->req_rx_queues > adapter->min_rx_queues) 4296 adapter->req_rx_queues--; 4297 else 4298 more++; 4299 break; 4300 case 1: 4301 if (adapter->req_tx_queues > adapter->min_tx_queues) 4302 adapter->req_tx_queues--; 4303 else 4304 more++; 4305 break; 4306 } 4307 } 4308 4309 adapter->tx_scrq = kcalloc(adapter->req_tx_queues, 4310 sizeof(*adapter->tx_scrq), GFP_KERNEL); 4311 if (!adapter->tx_scrq) 4312 goto tx_failed; 4313 4314 for (i = 0; i < adapter->req_tx_queues; i++) { 4315 adapter->tx_scrq[i] = allqueues[i]; 4316 adapter->tx_scrq[i]->pool_index = i; 4317 adapter->num_active_tx_scrqs++; 4318 } 4319 4320 adapter->rx_scrq = kcalloc(adapter->req_rx_queues, 4321 sizeof(*adapter->rx_scrq), GFP_KERNEL); 4322 if (!adapter->rx_scrq) 4323 goto rx_failed; 4324 4325 for (i = 0; i < adapter->req_rx_queues; i++) { 4326 adapter->rx_scrq[i] = allqueues[i + adapter->req_tx_queues]; 4327 adapter->rx_scrq[i]->scrq_num = i; 4328 adapter->num_active_rx_scrqs++; 4329 } 4330 4331 kfree(allqueues); 4332 return 0; 4333 4334 rx_failed: 4335 kfree(adapter->tx_scrq); 4336 adapter->tx_scrq = NULL; 4337 tx_failed: 4338 for (i = 0; i < registered_queues; i++) 4339 release_sub_crq_queue(adapter, allqueues[i], 1); 4340 kfree(allqueues); 4341 return -ENOMEM; 4342 } 4343 4344 static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) 4345 { 4346 struct device *dev = &adapter->vdev->dev; 4347 union ibmvnic_crq crq; 4348 int max_entries; 4349 int cap_reqs; 4350 4351 /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on 4352 * the PROMISC flag). Initialize this count upfront. When the tasklet 4353 * receives a response to all of these, it will send the next protocol 4354 * message (QUERY_IP_OFFLOAD). 4355 */ 4356 if (!(adapter->netdev->flags & IFF_PROMISC) || 4357 adapter->promisc_supported) 4358 cap_reqs = 7; 4359 else 4360 cap_reqs = 6; 4361 4362 if (!retry) { 4363 /* Sub-CRQ entries are 32 byte long */ 4364 int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); 4365 4366 atomic_set(&adapter->running_cap_crqs, cap_reqs); 4367 4368 if (adapter->min_tx_entries_per_subcrq > entries_page || 4369 adapter->min_rx_add_entries_per_subcrq > entries_page) { 4370 dev_err(dev, "Fatal, invalid entries per sub-crq\n"); 4371 return; 4372 } 4373 4374 if (adapter->desired.mtu) 4375 adapter->req_mtu = adapter->desired.mtu; 4376 else 4377 adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; 4378 4379 if (!adapter->desired.tx_entries) 4380 adapter->desired.tx_entries = 4381 adapter->max_tx_entries_per_subcrq; 4382 if (!adapter->desired.rx_entries) 4383 adapter->desired.rx_entries = 4384 adapter->max_rx_add_entries_per_subcrq; 4385 4386 max_entries = IBMVNIC_LTB_SET_SIZE / 4387 (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); 4388 4389 if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * 4390 adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { 4391 adapter->desired.tx_entries = max_entries; 4392 } 4393 4394 if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * 4395 adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { 4396 adapter->desired.rx_entries = max_entries; 4397 } 4398 4399 if (adapter->desired.tx_entries) 4400 adapter->req_tx_entries_per_subcrq = 4401 adapter->desired.tx_entries; 4402 else 4403 adapter->req_tx_entries_per_subcrq = 4404 adapter->max_tx_entries_per_subcrq; 4405 4406 if (adapter->desired.rx_entries) 4407 adapter->req_rx_add_entries_per_subcrq = 4408 adapter->desired.rx_entries; 4409 else 4410 adapter->req_rx_add_entries_per_subcrq = 4411 adapter->max_rx_add_entries_per_subcrq; 4412 4413 if (adapter->desired.tx_queues) 4414 adapter->req_tx_queues = 4415 adapter->desired.tx_queues; 4416 else 4417 adapter->req_tx_queues = 4418 adapter->opt_tx_comp_sub_queues; 4419 4420 if (adapter->desired.rx_queues) 4421 adapter->req_rx_queues = 4422 adapter->desired.rx_queues; 4423 else 4424 adapter->req_rx_queues = 4425 adapter->opt_rx_comp_queues; 4426 4427 adapter->req_rx_add_queues = adapter->max_rx_add_queues; 4428 } else { 4429 atomic_add(cap_reqs, &adapter->running_cap_crqs); 4430 } 4431 memset(&crq, 0, sizeof(crq)); 4432 crq.request_capability.first = IBMVNIC_CRQ_CMD; 4433 crq.request_capability.cmd = REQUEST_CAPABILITY; 4434 4435 crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); 4436 crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); 4437 cap_reqs--; 4438 ibmvnic_send_crq(adapter, &crq); 4439 4440 crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); 4441 crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); 4442 cap_reqs--; 4443 ibmvnic_send_crq(adapter, &crq); 4444 4445 crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); 4446 crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); 4447 cap_reqs--; 4448 ibmvnic_send_crq(adapter, &crq); 4449 4450 crq.request_capability.capability = 4451 cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); 4452 crq.request_capability.number = 4453 cpu_to_be64(adapter->req_tx_entries_per_subcrq); 4454 cap_reqs--; 4455 ibmvnic_send_crq(adapter, &crq); 4456 4457 crq.request_capability.capability = 4458 cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); 4459 crq.request_capability.number = 4460 cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); 4461 cap_reqs--; 4462 ibmvnic_send_crq(adapter, &crq); 4463 4464 crq.request_capability.capability = cpu_to_be16(REQ_MTU); 4465 crq.request_capability.number = cpu_to_be64(adapter->req_mtu); 4466 cap_reqs--; 4467 ibmvnic_send_crq(adapter, &crq); 4468 4469 if (adapter->netdev->flags & IFF_PROMISC) { 4470 if (adapter->promisc_supported) { 4471 crq.request_capability.capability = 4472 cpu_to_be16(PROMISC_REQUESTED); 4473 crq.request_capability.number = cpu_to_be64(1); 4474 cap_reqs--; 4475 ibmvnic_send_crq(adapter, &crq); 4476 } 4477 } else { 4478 crq.request_capability.capability = 4479 cpu_to_be16(PROMISC_REQUESTED); 4480 crq.request_capability.number = cpu_to_be64(0); 4481 cap_reqs--; 4482 ibmvnic_send_crq(adapter, &crq); 4483 } 4484 4485 /* Keep at end to catch any discrepancy between expected and actual 4486 * CRQs sent. 4487 */ 4488 WARN_ON(cap_reqs != 0); 4489 } 4490 4491 static int pending_scrq(struct ibmvnic_adapter *adapter, 4492 struct ibmvnic_sub_crq_queue *scrq) 4493 { 4494 union sub_crq *entry = &scrq->msgs[scrq->cur]; 4495 int rc; 4496 4497 rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); 4498 4499 /* Ensure that the SCRQ valid flag is loaded prior to loading the 4500 * contents of the SCRQ descriptor 4501 */ 4502 dma_rmb(); 4503 4504 return rc; 4505 } 4506 4507 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, 4508 struct ibmvnic_sub_crq_queue *scrq) 4509 { 4510 union sub_crq *entry; 4511 unsigned long flags; 4512 4513 spin_lock_irqsave(&scrq->lock, flags); 4514 entry = &scrq->msgs[scrq->cur]; 4515 if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) { 4516 if (++scrq->cur == scrq->size) 4517 scrq->cur = 0; 4518 } else { 4519 entry = NULL; 4520 } 4521 spin_unlock_irqrestore(&scrq->lock, flags); 4522 4523 /* Ensure that the SCRQ valid flag is loaded prior to loading the 4524 * contents of the SCRQ descriptor 4525 */ 4526 dma_rmb(); 4527 4528 return entry; 4529 } 4530 4531 static union ibmvnic_crq *ibmvnic_next_crq(struct ibmvnic_adapter *adapter) 4532 { 4533 struct ibmvnic_crq_queue *queue = &adapter->crq; 4534 union ibmvnic_crq *crq; 4535 4536 crq = &queue->msgs[queue->cur]; 4537 if (crq->generic.first & IBMVNIC_CRQ_CMD_RSP) { 4538 if (++queue->cur == queue->size) 4539 queue->cur = 0; 4540 } else { 4541 crq = NULL; 4542 } 4543 4544 return crq; 4545 } 4546 4547 static void print_subcrq_error(struct device *dev, int rc, const char *func) 4548 { 4549 switch (rc) { 4550 case H_PARAMETER: 4551 dev_warn_ratelimited(dev, 4552 "%s failed: Send request is malformed or adapter failover pending. (rc=%d)\n", 4553 func, rc); 4554 break; 4555 case H_CLOSED: 4556 dev_warn_ratelimited(dev, 4557 "%s failed: Backing queue closed. Adapter is down or failover pending. (rc=%d)\n", 4558 func, rc); 4559 break; 4560 default: 4561 dev_err_ratelimited(dev, "%s failed: (rc=%d)\n", func, rc); 4562 break; 4563 } 4564 } 4565 4566 static int send_subcrq_indirect(struct ibmvnic_adapter *adapter, 4567 u64 remote_handle, u64 ioba, u64 num_entries) 4568 { 4569 unsigned int ua = adapter->vdev->unit_address; 4570 struct device *dev = &adapter->vdev->dev; 4571 int rc; 4572 4573 /* Make sure the hypervisor sees the complete request */ 4574 dma_wmb(); 4575 rc = plpar_hcall_norets(H_SEND_SUB_CRQ_INDIRECT, ua, 4576 cpu_to_be64(remote_handle), 4577 ioba, num_entries); 4578 4579 if (rc) 4580 print_subcrq_error(dev, rc, __func__); 4581 4582 return rc; 4583 } 4584 4585 static int ibmvnic_send_crq(struct ibmvnic_adapter *adapter, 4586 union ibmvnic_crq *crq) 4587 { 4588 unsigned int ua = adapter->vdev->unit_address; 4589 struct device *dev = &adapter->vdev->dev; 4590 u64 *u64_crq = (u64 *)crq; 4591 int rc; 4592 4593 netdev_dbg(adapter->netdev, "Sending CRQ: %016lx %016lx\n", 4594 (unsigned long)cpu_to_be64(u64_crq[0]), 4595 (unsigned long)cpu_to_be64(u64_crq[1])); 4596 4597 if (!adapter->crq.active && 4598 crq->generic.first != IBMVNIC_CRQ_INIT_CMD) { 4599 dev_warn(dev, "Invalid request detected while CRQ is inactive, possible device state change during reset\n"); 4600 return -EINVAL; 4601 } 4602 4603 /* Make sure the hypervisor sees the complete request */ 4604 dma_wmb(); 4605 4606 rc = plpar_hcall_norets(H_SEND_CRQ, ua, 4607 cpu_to_be64(u64_crq[0]), 4608 cpu_to_be64(u64_crq[1])); 4609 4610 if (rc) { 4611 if (rc == H_CLOSED) { 4612 dev_warn(dev, "CRQ Queue closed\n"); 4613 /* do not reset, report the fail, wait for passive init from server */ 4614 } 4615 4616 dev_warn(dev, "Send error (rc=%d)\n", rc); 4617 } 4618 4619 return rc; 4620 } 4621 4622 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *adapter) 4623 { 4624 struct device *dev = &adapter->vdev->dev; 4625 union ibmvnic_crq crq; 4626 int retries = 100; 4627 int rc; 4628 4629 memset(&crq, 0, sizeof(crq)); 4630 crq.generic.first = IBMVNIC_CRQ_INIT_CMD; 4631 crq.generic.cmd = IBMVNIC_CRQ_INIT; 4632 netdev_dbg(adapter->netdev, "Sending CRQ init\n"); 4633 4634 do { 4635 rc = ibmvnic_send_crq(adapter, &crq); 4636 if (rc != H_CLOSED) 4637 break; 4638 retries--; 4639 msleep(50); 4640 4641 } while (retries > 0); 4642 4643 if (rc) { 4644 dev_err(dev, "Failed to send init request, rc = %d\n", rc); 4645 return rc; 4646 } 4647 4648 return 0; 4649 } 4650 4651 struct vnic_login_client_data { 4652 u8 type; 4653 __be16 len; 4654 char name[]; 4655 } __packed; 4656 4657 static int vnic_client_data_len(struct ibmvnic_adapter *adapter) 4658 { 4659 int len; 4660 4661 /* Calculate the amount of buffer space needed for the 4662 * vnic client data in the login buffer. There are four entries, 4663 * OS name, LPAR name, device name, and a null last entry. 4664 */ 4665 len = 4 * sizeof(struct vnic_login_client_data); 4666 len += 6; /* "Linux" plus NULL */ 4667 len += strlen(utsname()->nodename) + 1; 4668 len += strlen(adapter->netdev->name) + 1; 4669 4670 return len; 4671 } 4672 4673 static void vnic_add_client_data(struct ibmvnic_adapter *adapter, 4674 struct vnic_login_client_data *vlcd) 4675 { 4676 const char *os_name = "Linux"; 4677 int len; 4678 4679 /* Type 1 - LPAR OS */ 4680 vlcd->type = 1; 4681 len = strlen(os_name) + 1; 4682 vlcd->len = cpu_to_be16(len); 4683 strscpy(vlcd->name, os_name, len); 4684 vlcd = (struct vnic_login_client_data *)(vlcd->name + len); 4685 4686 /* Type 2 - LPAR name */ 4687 vlcd->type = 2; 4688 len = strlen(utsname()->nodename) + 1; 4689 vlcd->len = cpu_to_be16(len); 4690 strscpy(vlcd->name, utsname()->nodename, len); 4691 vlcd = (struct vnic_login_client_data *)(vlcd->name + len); 4692 4693 /* Type 3 - device name */ 4694 vlcd->type = 3; 4695 len = strlen(adapter->netdev->name) + 1; 4696 vlcd->len = cpu_to_be16(len); 4697 strscpy(vlcd->name, adapter->netdev->name, len); 4698 } 4699 4700 static int send_login(struct ibmvnic_adapter *adapter) 4701 { 4702 struct ibmvnic_login_rsp_buffer *login_rsp_buffer; 4703 struct ibmvnic_login_buffer *login_buffer; 4704 struct device *dev = &adapter->vdev->dev; 4705 struct vnic_login_client_data *vlcd; 4706 dma_addr_t rsp_buffer_token; 4707 dma_addr_t buffer_token; 4708 size_t rsp_buffer_size; 4709 union ibmvnic_crq crq; 4710 int client_data_len; 4711 size_t buffer_size; 4712 __be64 *tx_list_p; 4713 __be64 *rx_list_p; 4714 int rc; 4715 int i; 4716 4717 if (!adapter->tx_scrq || !adapter->rx_scrq) { 4718 netdev_err(adapter->netdev, 4719 "RX or TX queues are not allocated, device login failed\n"); 4720 return -ENOMEM; 4721 } 4722 4723 release_login_buffer(adapter); 4724 release_login_rsp_buffer(adapter); 4725 4726 client_data_len = vnic_client_data_len(adapter); 4727 4728 buffer_size = 4729 sizeof(struct ibmvnic_login_buffer) + 4730 sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + 4731 client_data_len; 4732 4733 login_buffer = kzalloc(buffer_size, GFP_ATOMIC); 4734 if (!login_buffer) 4735 goto buf_alloc_failed; 4736 4737 buffer_token = dma_map_single(dev, login_buffer, buffer_size, 4738 DMA_TO_DEVICE); 4739 if (dma_mapping_error(dev, buffer_token)) { 4740 dev_err(dev, "Couldn't map login buffer\n"); 4741 goto buf_map_failed; 4742 } 4743 4744 rsp_buffer_size = sizeof(struct ibmvnic_login_rsp_buffer) + 4745 sizeof(u64) * adapter->req_tx_queues + 4746 sizeof(u64) * adapter->req_rx_queues + 4747 sizeof(u64) * adapter->req_rx_queues + 4748 sizeof(u8) * IBMVNIC_TX_DESC_VERSIONS; 4749 4750 login_rsp_buffer = kmalloc(rsp_buffer_size, GFP_ATOMIC); 4751 if (!login_rsp_buffer) 4752 goto buf_rsp_alloc_failed; 4753 4754 rsp_buffer_token = dma_map_single(dev, login_rsp_buffer, 4755 rsp_buffer_size, DMA_FROM_DEVICE); 4756 if (dma_mapping_error(dev, rsp_buffer_token)) { 4757 dev_err(dev, "Couldn't map login rsp buffer\n"); 4758 goto buf_rsp_map_failed; 4759 } 4760 4761 adapter->login_buf = login_buffer; 4762 adapter->login_buf_token = buffer_token; 4763 adapter->login_buf_sz = buffer_size; 4764 adapter->login_rsp_buf = login_rsp_buffer; 4765 adapter->login_rsp_buf_token = rsp_buffer_token; 4766 adapter->login_rsp_buf_sz = rsp_buffer_size; 4767 4768 login_buffer->len = cpu_to_be32(buffer_size); 4769 login_buffer->version = cpu_to_be32(INITIAL_VERSION_LB); 4770 login_buffer->num_txcomp_subcrqs = cpu_to_be32(adapter->req_tx_queues); 4771 login_buffer->off_txcomp_subcrqs = 4772 cpu_to_be32(sizeof(struct ibmvnic_login_buffer)); 4773 login_buffer->num_rxcomp_subcrqs = cpu_to_be32(adapter->req_rx_queues); 4774 login_buffer->off_rxcomp_subcrqs = 4775 cpu_to_be32(sizeof(struct ibmvnic_login_buffer) + 4776 sizeof(u64) * adapter->req_tx_queues); 4777 login_buffer->login_rsp_ioba = cpu_to_be32(rsp_buffer_token); 4778 login_buffer->login_rsp_len = cpu_to_be32(rsp_buffer_size); 4779 4780 tx_list_p = (__be64 *)((char *)login_buffer + 4781 sizeof(struct ibmvnic_login_buffer)); 4782 rx_list_p = (__be64 *)((char *)login_buffer + 4783 sizeof(struct ibmvnic_login_buffer) + 4784 sizeof(u64) * adapter->req_tx_queues); 4785 4786 for (i = 0; i < adapter->req_tx_queues; i++) { 4787 if (adapter->tx_scrq[i]) { 4788 tx_list_p[i] = 4789 cpu_to_be64(adapter->tx_scrq[i]->crq_num); 4790 } 4791 } 4792 4793 for (i = 0; i < adapter->req_rx_queues; i++) { 4794 if (adapter->rx_scrq[i]) { 4795 rx_list_p[i] = 4796 cpu_to_be64(adapter->rx_scrq[i]->crq_num); 4797 } 4798 } 4799 4800 /* Insert vNIC login client data */ 4801 vlcd = (struct vnic_login_client_data *) 4802 ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); 4803 login_buffer->client_data_offset = 4804 cpu_to_be32((char *)vlcd - (char *)login_buffer); 4805 login_buffer->client_data_len = cpu_to_be32(client_data_len); 4806 4807 vnic_add_client_data(adapter, vlcd); 4808 4809 netdev_dbg(adapter->netdev, "Login Buffer:\n"); 4810 for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { 4811 netdev_dbg(adapter->netdev, "%016lx\n", 4812 ((unsigned long *)(adapter->login_buf))[i]); 4813 } 4814 4815 memset(&crq, 0, sizeof(crq)); 4816 crq.login.first = IBMVNIC_CRQ_CMD; 4817 crq.login.cmd = LOGIN; 4818 crq.login.ioba = cpu_to_be32(buffer_token); 4819 crq.login.len = cpu_to_be32(buffer_size); 4820 4821 adapter->login_pending = true; 4822 rc = ibmvnic_send_crq(adapter, &crq); 4823 if (rc) { 4824 adapter->login_pending = false; 4825 netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc); 4826 goto buf_rsp_map_failed; 4827 } 4828 4829 return 0; 4830 4831 buf_rsp_map_failed: 4832 kfree(login_rsp_buffer); 4833 adapter->login_rsp_buf = NULL; 4834 buf_rsp_alloc_failed: 4835 dma_unmap_single(dev, buffer_token, buffer_size, DMA_TO_DEVICE); 4836 buf_map_failed: 4837 kfree(login_buffer); 4838 adapter->login_buf = NULL; 4839 buf_alloc_failed: 4840 return -ENOMEM; 4841 } 4842 4843 static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, 4844 u32 len, u8 map_id) 4845 { 4846 union ibmvnic_crq crq; 4847 4848 memset(&crq, 0, sizeof(crq)); 4849 crq.request_map.first = IBMVNIC_CRQ_CMD; 4850 crq.request_map.cmd = REQUEST_MAP; 4851 crq.request_map.map_id = map_id; 4852 crq.request_map.ioba = cpu_to_be32(addr); 4853 crq.request_map.len = cpu_to_be32(len); 4854 return ibmvnic_send_crq(adapter, &crq); 4855 } 4856 4857 static int send_request_unmap(struct ibmvnic_adapter *adapter, u8 map_id) 4858 { 4859 union ibmvnic_crq crq; 4860 4861 memset(&crq, 0, sizeof(crq)); 4862 crq.request_unmap.first = IBMVNIC_CRQ_CMD; 4863 crq.request_unmap.cmd = REQUEST_UNMAP; 4864 crq.request_unmap.map_id = map_id; 4865 return ibmvnic_send_crq(adapter, &crq); 4866 } 4867 4868 static void send_query_map(struct ibmvnic_adapter *adapter) 4869 { 4870 union ibmvnic_crq crq; 4871 4872 memset(&crq, 0, sizeof(crq)); 4873 crq.query_map.first = IBMVNIC_CRQ_CMD; 4874 crq.query_map.cmd = QUERY_MAP; 4875 ibmvnic_send_crq(adapter, &crq); 4876 } 4877 4878 /* Send a series of CRQs requesting various capabilities of the VNIC server */ 4879 static void send_query_cap(struct ibmvnic_adapter *adapter) 4880 { 4881 union ibmvnic_crq crq; 4882 int cap_reqs; 4883 4884 /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count 4885 * upfront. When the tasklet receives a response to all of these, it 4886 * can send out the next protocol messaage (REQUEST_CAPABILITY). 4887 */ 4888 cap_reqs = 25; 4889 4890 atomic_set(&adapter->running_cap_crqs, cap_reqs); 4891 4892 memset(&crq, 0, sizeof(crq)); 4893 crq.query_capability.first = IBMVNIC_CRQ_CMD; 4894 crq.query_capability.cmd = QUERY_CAPABILITY; 4895 4896 crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); 4897 ibmvnic_send_crq(adapter, &crq); 4898 cap_reqs--; 4899 4900 crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); 4901 ibmvnic_send_crq(adapter, &crq); 4902 cap_reqs--; 4903 4904 crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); 4905 ibmvnic_send_crq(adapter, &crq); 4906 cap_reqs--; 4907 4908 crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); 4909 ibmvnic_send_crq(adapter, &crq); 4910 cap_reqs--; 4911 4912 crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); 4913 ibmvnic_send_crq(adapter, &crq); 4914 cap_reqs--; 4915 4916 crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); 4917 ibmvnic_send_crq(adapter, &crq); 4918 cap_reqs--; 4919 4920 crq.query_capability.capability = 4921 cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); 4922 ibmvnic_send_crq(adapter, &crq); 4923 cap_reqs--; 4924 4925 crq.query_capability.capability = 4926 cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); 4927 ibmvnic_send_crq(adapter, &crq); 4928 cap_reqs--; 4929 4930 crq.query_capability.capability = 4931 cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); 4932 ibmvnic_send_crq(adapter, &crq); 4933 cap_reqs--; 4934 4935 crq.query_capability.capability = 4936 cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); 4937 ibmvnic_send_crq(adapter, &crq); 4938 cap_reqs--; 4939 4940 crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); 4941 ibmvnic_send_crq(adapter, &crq); 4942 cap_reqs--; 4943 4944 crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); 4945 ibmvnic_send_crq(adapter, &crq); 4946 cap_reqs--; 4947 4948 crq.query_capability.capability = cpu_to_be16(MIN_MTU); 4949 ibmvnic_send_crq(adapter, &crq); 4950 cap_reqs--; 4951 4952 crq.query_capability.capability = cpu_to_be16(MAX_MTU); 4953 ibmvnic_send_crq(adapter, &crq); 4954 cap_reqs--; 4955 4956 crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); 4957 ibmvnic_send_crq(adapter, &crq); 4958 cap_reqs--; 4959 4960 crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); 4961 ibmvnic_send_crq(adapter, &crq); 4962 cap_reqs--; 4963 4964 crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); 4965 ibmvnic_send_crq(adapter, &crq); 4966 cap_reqs--; 4967 4968 crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); 4969 ibmvnic_send_crq(adapter, &crq); 4970 cap_reqs--; 4971 4972 crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); 4973 ibmvnic_send_crq(adapter, &crq); 4974 cap_reqs--; 4975 4976 crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); 4977 ibmvnic_send_crq(adapter, &crq); 4978 cap_reqs--; 4979 4980 crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); 4981 ibmvnic_send_crq(adapter, &crq); 4982 cap_reqs--; 4983 4984 crq.query_capability.capability = 4985 cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); 4986 ibmvnic_send_crq(adapter, &crq); 4987 cap_reqs--; 4988 4989 crq.query_capability.capability = 4990 cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); 4991 ibmvnic_send_crq(adapter, &crq); 4992 cap_reqs--; 4993 4994 crq.query_capability.capability = 4995 cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); 4996 ibmvnic_send_crq(adapter, &crq); 4997 cap_reqs--; 4998 4999 crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); 5000 5001 ibmvnic_send_crq(adapter, &crq); 5002 cap_reqs--; 5003 5004 /* Keep at end to catch any discrepancy between expected and actual 5005 * CRQs sent. 5006 */ 5007 WARN_ON(cap_reqs != 0); 5008 } 5009 5010 static void send_query_ip_offload(struct ibmvnic_adapter *adapter) 5011 { 5012 int buf_sz = sizeof(struct ibmvnic_query_ip_offload_buffer); 5013 struct device *dev = &adapter->vdev->dev; 5014 union ibmvnic_crq crq; 5015 5016 adapter->ip_offload_tok = 5017 dma_map_single(dev, 5018 &adapter->ip_offload_buf, 5019 buf_sz, 5020 DMA_FROM_DEVICE); 5021 5022 if (dma_mapping_error(dev, adapter->ip_offload_tok)) { 5023 if (!firmware_has_feature(FW_FEATURE_CMO)) 5024 dev_err(dev, "Couldn't map offload buffer\n"); 5025 return; 5026 } 5027 5028 memset(&crq, 0, sizeof(crq)); 5029 crq.query_ip_offload.first = IBMVNIC_CRQ_CMD; 5030 crq.query_ip_offload.cmd = QUERY_IP_OFFLOAD; 5031 crq.query_ip_offload.len = cpu_to_be32(buf_sz); 5032 crq.query_ip_offload.ioba = 5033 cpu_to_be32(adapter->ip_offload_tok); 5034 5035 ibmvnic_send_crq(adapter, &crq); 5036 } 5037 5038 static void send_control_ip_offload(struct ibmvnic_adapter *adapter) 5039 { 5040 struct ibmvnic_control_ip_offload_buffer *ctrl_buf = &adapter->ip_offload_ctrl; 5041 struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; 5042 struct device *dev = &adapter->vdev->dev; 5043 netdev_features_t old_hw_features = 0; 5044 union ibmvnic_crq crq; 5045 5046 adapter->ip_offload_ctrl_tok = 5047 dma_map_single(dev, 5048 ctrl_buf, 5049 sizeof(adapter->ip_offload_ctrl), 5050 DMA_TO_DEVICE); 5051 5052 if (dma_mapping_error(dev, adapter->ip_offload_ctrl_tok)) { 5053 dev_err(dev, "Couldn't map ip offload control buffer\n"); 5054 return; 5055 } 5056 5057 ctrl_buf->len = cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); 5058 ctrl_buf->version = cpu_to_be32(INITIAL_VERSION_IOB); 5059 ctrl_buf->ipv4_chksum = buf->ipv4_chksum; 5060 ctrl_buf->ipv6_chksum = buf->ipv6_chksum; 5061 ctrl_buf->tcp_ipv4_chksum = buf->tcp_ipv4_chksum; 5062 ctrl_buf->udp_ipv4_chksum = buf->udp_ipv4_chksum; 5063 ctrl_buf->tcp_ipv6_chksum = buf->tcp_ipv6_chksum; 5064 ctrl_buf->udp_ipv6_chksum = buf->udp_ipv6_chksum; 5065 ctrl_buf->large_tx_ipv4 = buf->large_tx_ipv4; 5066 ctrl_buf->large_tx_ipv6 = buf->large_tx_ipv6; 5067 5068 /* large_rx disabled for now, additional features needed */ 5069 ctrl_buf->large_rx_ipv4 = 0; 5070 ctrl_buf->large_rx_ipv6 = 0; 5071 5072 if (adapter->state != VNIC_PROBING) { 5073 old_hw_features = adapter->netdev->hw_features; 5074 adapter->netdev->hw_features = 0; 5075 } 5076 5077 adapter->netdev->hw_features = NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO; 5078 5079 if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) 5080 adapter->netdev->hw_features |= NETIF_F_IP_CSUM; 5081 5082 if (buf->tcp_ipv6_chksum || buf->udp_ipv6_chksum) 5083 adapter->netdev->hw_features |= NETIF_F_IPV6_CSUM; 5084 5085 if ((adapter->netdev->features & 5086 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) 5087 adapter->netdev->hw_features |= NETIF_F_RXCSUM; 5088 5089 if (buf->large_tx_ipv4) 5090 adapter->netdev->hw_features |= NETIF_F_TSO; 5091 if (buf->large_tx_ipv6) 5092 adapter->netdev->hw_features |= NETIF_F_TSO6; 5093 5094 if (adapter->state == VNIC_PROBING) { 5095 adapter->netdev->features |= adapter->netdev->hw_features; 5096 } else if (old_hw_features != adapter->netdev->hw_features) { 5097 netdev_features_t tmp = 0; 5098 5099 /* disable features no longer supported */ 5100 adapter->netdev->features &= adapter->netdev->hw_features; 5101 /* turn on features now supported if previously enabled */ 5102 tmp = (old_hw_features ^ adapter->netdev->hw_features) & 5103 adapter->netdev->hw_features; 5104 adapter->netdev->features |= 5105 tmp & adapter->netdev->wanted_features; 5106 } 5107 5108 memset(&crq, 0, sizeof(crq)); 5109 crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; 5110 crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; 5111 crq.control_ip_offload.len = 5112 cpu_to_be32(sizeof(adapter->ip_offload_ctrl)); 5113 crq.control_ip_offload.ioba = cpu_to_be32(adapter->ip_offload_ctrl_tok); 5114 ibmvnic_send_crq(adapter, &crq); 5115 } 5116 5117 static void handle_vpd_size_rsp(union ibmvnic_crq *crq, 5118 struct ibmvnic_adapter *adapter) 5119 { 5120 struct device *dev = &adapter->vdev->dev; 5121 5122 if (crq->get_vpd_size_rsp.rc.code) { 5123 dev_err(dev, "Error retrieving VPD size, rc=%x\n", 5124 crq->get_vpd_size_rsp.rc.code); 5125 complete(&adapter->fw_done); 5126 return; 5127 } 5128 5129 adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len); 5130 complete(&adapter->fw_done); 5131 } 5132 5133 static void handle_vpd_rsp(union ibmvnic_crq *crq, 5134 struct ibmvnic_adapter *adapter) 5135 { 5136 struct device *dev = &adapter->vdev->dev; 5137 unsigned char *substr = NULL; 5138 u8 fw_level_len = 0; 5139 5140 memset(adapter->fw_version, 0, 32); 5141 5142 dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len, 5143 DMA_FROM_DEVICE); 5144 5145 if (crq->get_vpd_rsp.rc.code) { 5146 dev_err(dev, "Error retrieving VPD from device, rc=%x\n", 5147 crq->get_vpd_rsp.rc.code); 5148 goto complete; 5149 } 5150 5151 /* get the position of the firmware version info 5152 * located after the ASCII 'RM' substring in the buffer 5153 */ 5154 substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len); 5155 if (!substr) { 5156 dev_info(dev, "Warning - No FW level has been provided in the VPD buffer by the VIOS Server\n"); 5157 goto complete; 5158 } 5159 5160 /* get length of firmware level ASCII substring */ 5161 if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) { 5162 fw_level_len = *(substr + 2); 5163 } else { 5164 dev_info(dev, "Length of FW substr extrapolated VDP buff\n"); 5165 goto complete; 5166 } 5167 5168 /* copy firmware version string from vpd into adapter */ 5169 if ((substr + 3 + fw_level_len) < 5170 (adapter->vpd->buff + adapter->vpd->len)) { 5171 strncpy((char *)adapter->fw_version, substr + 3, fw_level_len); 5172 } else { 5173 dev_info(dev, "FW substr extrapolated VPD buff\n"); 5174 } 5175 5176 complete: 5177 if (adapter->fw_version[0] == '\0') 5178 strscpy((char *)adapter->fw_version, "N/A", sizeof(adapter->fw_version)); 5179 complete(&adapter->fw_done); 5180 } 5181 5182 static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) 5183 { 5184 struct device *dev = &adapter->vdev->dev; 5185 struct ibmvnic_query_ip_offload_buffer *buf = &adapter->ip_offload_buf; 5186 int i; 5187 5188 dma_unmap_single(dev, adapter->ip_offload_tok, 5189 sizeof(adapter->ip_offload_buf), DMA_FROM_DEVICE); 5190 5191 netdev_dbg(adapter->netdev, "Query IP Offload Buffer:\n"); 5192 for (i = 0; i < (sizeof(adapter->ip_offload_buf) - 1) / 8 + 1; i++) 5193 netdev_dbg(adapter->netdev, "%016lx\n", 5194 ((unsigned long *)(buf))[i]); 5195 5196 netdev_dbg(adapter->netdev, "ipv4_chksum = %d\n", buf->ipv4_chksum); 5197 netdev_dbg(adapter->netdev, "ipv6_chksum = %d\n", buf->ipv6_chksum); 5198 netdev_dbg(adapter->netdev, "tcp_ipv4_chksum = %d\n", 5199 buf->tcp_ipv4_chksum); 5200 netdev_dbg(adapter->netdev, "tcp_ipv6_chksum = %d\n", 5201 buf->tcp_ipv6_chksum); 5202 netdev_dbg(adapter->netdev, "udp_ipv4_chksum = %d\n", 5203 buf->udp_ipv4_chksum); 5204 netdev_dbg(adapter->netdev, "udp_ipv6_chksum = %d\n", 5205 buf->udp_ipv6_chksum); 5206 netdev_dbg(adapter->netdev, "large_tx_ipv4 = %d\n", 5207 buf->large_tx_ipv4); 5208 netdev_dbg(adapter->netdev, "large_tx_ipv6 = %d\n", 5209 buf->large_tx_ipv6); 5210 netdev_dbg(adapter->netdev, "large_rx_ipv4 = %d\n", 5211 buf->large_rx_ipv4); 5212 netdev_dbg(adapter->netdev, "large_rx_ipv6 = %d\n", 5213 buf->large_rx_ipv6); 5214 netdev_dbg(adapter->netdev, "max_ipv4_hdr_sz = %d\n", 5215 buf->max_ipv4_header_size); 5216 netdev_dbg(adapter->netdev, "max_ipv6_hdr_sz = %d\n", 5217 buf->max_ipv6_header_size); 5218 netdev_dbg(adapter->netdev, "max_tcp_hdr_size = %d\n", 5219 buf->max_tcp_header_size); 5220 netdev_dbg(adapter->netdev, "max_udp_hdr_size = %d\n", 5221 buf->max_udp_header_size); 5222 netdev_dbg(adapter->netdev, "max_large_tx_size = %d\n", 5223 buf->max_large_tx_size); 5224 netdev_dbg(adapter->netdev, "max_large_rx_size = %d\n", 5225 buf->max_large_rx_size); 5226 netdev_dbg(adapter->netdev, "ipv6_ext_hdr = %d\n", 5227 buf->ipv6_extension_header); 5228 netdev_dbg(adapter->netdev, "tcp_pseudosum_req = %d\n", 5229 buf->tcp_pseudosum_req); 5230 netdev_dbg(adapter->netdev, "num_ipv6_ext_hd = %d\n", 5231 buf->num_ipv6_ext_headers); 5232 netdev_dbg(adapter->netdev, "off_ipv6_ext_hd = %d\n", 5233 buf->off_ipv6_ext_headers); 5234 5235 send_control_ip_offload(adapter); 5236 } 5237 5238 static const char *ibmvnic_fw_err_cause(u16 cause) 5239 { 5240 switch (cause) { 5241 case ADAPTER_PROBLEM: 5242 return "adapter problem"; 5243 case BUS_PROBLEM: 5244 return "bus problem"; 5245 case FW_PROBLEM: 5246 return "firmware problem"; 5247 case DD_PROBLEM: 5248 return "device driver problem"; 5249 case EEH_RECOVERY: 5250 return "EEH recovery"; 5251 case FW_UPDATED: 5252 return "firmware updated"; 5253 case LOW_MEMORY: 5254 return "low Memory"; 5255 default: 5256 return "unknown"; 5257 } 5258 } 5259 5260 static void handle_error_indication(union ibmvnic_crq *crq, 5261 struct ibmvnic_adapter *adapter) 5262 { 5263 struct device *dev = &adapter->vdev->dev; 5264 u16 cause; 5265 5266 cause = be16_to_cpu(crq->error_indication.error_cause); 5267 5268 dev_warn_ratelimited(dev, 5269 "Firmware reports %serror, cause: %s. Starting recovery...\n", 5270 crq->error_indication.flags 5271 & IBMVNIC_FATAL_ERROR ? "FATAL " : "", 5272 ibmvnic_fw_err_cause(cause)); 5273 5274 if (crq->error_indication.flags & IBMVNIC_FATAL_ERROR) 5275 ibmvnic_reset(adapter, VNIC_RESET_FATAL); 5276 else 5277 ibmvnic_reset(adapter, VNIC_RESET_NON_FATAL); 5278 } 5279 5280 static int handle_change_mac_rsp(union ibmvnic_crq *crq, 5281 struct ibmvnic_adapter *adapter) 5282 { 5283 struct net_device *netdev = adapter->netdev; 5284 struct device *dev = &adapter->vdev->dev; 5285 long rc; 5286 5287 rc = crq->change_mac_addr_rsp.rc.code; 5288 if (rc) { 5289 dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); 5290 goto out; 5291 } 5292 /* crq->change_mac_addr.mac_addr is the requested one 5293 * crq->change_mac_addr_rsp.mac_addr is the returned valid one. 5294 */ 5295 eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]); 5296 ether_addr_copy(adapter->mac_addr, 5297 &crq->change_mac_addr_rsp.mac_addr[0]); 5298 out: 5299 complete(&adapter->fw_done); 5300 return rc; 5301 } 5302 5303 static void handle_request_cap_rsp(union ibmvnic_crq *crq, 5304 struct ibmvnic_adapter *adapter) 5305 { 5306 struct device *dev = &adapter->vdev->dev; 5307 u64 *req_value; 5308 char *name; 5309 5310 atomic_dec(&adapter->running_cap_crqs); 5311 netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", 5312 atomic_read(&adapter->running_cap_crqs)); 5313 switch (be16_to_cpu(crq->request_capability_rsp.capability)) { 5314 case REQ_TX_QUEUES: 5315 req_value = &adapter->req_tx_queues; 5316 name = "tx"; 5317 break; 5318 case REQ_RX_QUEUES: 5319 req_value = &adapter->req_rx_queues; 5320 name = "rx"; 5321 break; 5322 case REQ_RX_ADD_QUEUES: 5323 req_value = &adapter->req_rx_add_queues; 5324 name = "rx_add"; 5325 break; 5326 case REQ_TX_ENTRIES_PER_SUBCRQ: 5327 req_value = &adapter->req_tx_entries_per_subcrq; 5328 name = "tx_entries_per_subcrq"; 5329 break; 5330 case REQ_RX_ADD_ENTRIES_PER_SUBCRQ: 5331 req_value = &adapter->req_rx_add_entries_per_subcrq; 5332 name = "rx_add_entries_per_subcrq"; 5333 break; 5334 case REQ_MTU: 5335 req_value = &adapter->req_mtu; 5336 name = "mtu"; 5337 break; 5338 case PROMISC_REQUESTED: 5339 req_value = &adapter->promisc; 5340 name = "promisc"; 5341 break; 5342 default: 5343 dev_err(dev, "Got invalid cap request rsp %d\n", 5344 crq->request_capability.capability); 5345 return; 5346 } 5347 5348 switch (crq->request_capability_rsp.rc.code) { 5349 case SUCCESS: 5350 break; 5351 case PARTIALSUCCESS: 5352 dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", 5353 *req_value, 5354 (long)be64_to_cpu(crq->request_capability_rsp.number), 5355 name); 5356 5357 if (be16_to_cpu(crq->request_capability_rsp.capability) == 5358 REQ_MTU) { 5359 pr_err("mtu of %llu is not supported. Reverting.\n", 5360 *req_value); 5361 *req_value = adapter->fallback.mtu; 5362 } else { 5363 *req_value = 5364 be64_to_cpu(crq->request_capability_rsp.number); 5365 } 5366 5367 send_request_cap(adapter, 1); 5368 return; 5369 default: 5370 dev_err(dev, "Error %d in request cap rsp\n", 5371 crq->request_capability_rsp.rc.code); 5372 return; 5373 } 5374 5375 /* Done receiving requested capabilities, query IP offload support */ 5376 if (atomic_read(&adapter->running_cap_crqs) == 0) 5377 send_query_ip_offload(adapter); 5378 } 5379 5380 static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, 5381 struct ibmvnic_adapter *adapter) 5382 { 5383 struct device *dev = &adapter->vdev->dev; 5384 struct net_device *netdev = adapter->netdev; 5385 struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; 5386 struct ibmvnic_login_buffer *login = adapter->login_buf; 5387 u64 *tx_handle_array; 5388 u64 *rx_handle_array; 5389 int num_tx_pools; 5390 int num_rx_pools; 5391 u64 *size_array; 5392 int i; 5393 5394 /* CHECK: Test/set of login_pending does not need to be atomic 5395 * because only ibmvnic_tasklet tests/clears this. 5396 */ 5397 if (!adapter->login_pending) { 5398 netdev_warn(netdev, "Ignoring unexpected login response\n"); 5399 return 0; 5400 } 5401 adapter->login_pending = false; 5402 5403 dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, 5404 DMA_TO_DEVICE); 5405 dma_unmap_single(dev, adapter->login_rsp_buf_token, 5406 adapter->login_rsp_buf_sz, DMA_FROM_DEVICE); 5407 5408 /* If the number of queues requested can't be allocated by the 5409 * server, the login response will return with code 1. We will need 5410 * to resend the login buffer with fewer queues requested. 5411 */ 5412 if (login_rsp_crq->generic.rc.code) { 5413 adapter->init_done_rc = login_rsp_crq->generic.rc.code; 5414 complete(&adapter->init_done); 5415 return 0; 5416 } 5417 5418 if (adapter->failover_pending) { 5419 adapter->init_done_rc = -EAGAIN; 5420 netdev_dbg(netdev, "Failover pending, ignoring login response\n"); 5421 complete(&adapter->init_done); 5422 /* login response buffer will be released on reset */ 5423 return 0; 5424 } 5425 5426 netdev->mtu = adapter->req_mtu - ETH_HLEN; 5427 5428 netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); 5429 for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { 5430 netdev_dbg(adapter->netdev, "%016lx\n", 5431 ((unsigned long *)(adapter->login_rsp_buf))[i]); 5432 } 5433 5434 /* Sanity checks */ 5435 if (login->num_txcomp_subcrqs != login_rsp->num_txsubm_subcrqs || 5436 (be32_to_cpu(login->num_rxcomp_subcrqs) * 5437 adapter->req_rx_add_queues != 5438 be32_to_cpu(login_rsp->num_rxadd_subcrqs))) { 5439 dev_err(dev, "FATAL: Inconsistent login and login rsp\n"); 5440 ibmvnic_reset(adapter, VNIC_RESET_FATAL); 5441 return -EIO; 5442 } 5443 size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 5444 be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); 5445 /* variable buffer sizes are not supported, so just read the 5446 * first entry. 5447 */ 5448 adapter->cur_rx_buf_sz = be64_to_cpu(size_array[0]); 5449 5450 num_tx_pools = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); 5451 num_rx_pools = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs); 5452 5453 tx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 5454 be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); 5455 rx_handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + 5456 be32_to_cpu(adapter->login_rsp_buf->off_rxadd_subcrqs)); 5457 5458 for (i = 0; i < num_tx_pools; i++) 5459 adapter->tx_scrq[i]->handle = tx_handle_array[i]; 5460 5461 for (i = 0; i < num_rx_pools; i++) 5462 adapter->rx_scrq[i]->handle = rx_handle_array[i]; 5463 5464 adapter->num_active_tx_scrqs = num_tx_pools; 5465 adapter->num_active_rx_scrqs = num_rx_pools; 5466 release_login_rsp_buffer(adapter); 5467 release_login_buffer(adapter); 5468 complete(&adapter->init_done); 5469 5470 return 0; 5471 } 5472 5473 static void handle_request_unmap_rsp(union ibmvnic_crq *crq, 5474 struct ibmvnic_adapter *adapter) 5475 { 5476 struct device *dev = &adapter->vdev->dev; 5477 long rc; 5478 5479 rc = crq->request_unmap_rsp.rc.code; 5480 if (rc) 5481 dev_err(dev, "Error %ld in REQUEST_UNMAP_RSP\n", rc); 5482 } 5483 5484 static void handle_query_map_rsp(union ibmvnic_crq *crq, 5485 struct ibmvnic_adapter *adapter) 5486 { 5487 struct net_device *netdev = adapter->netdev; 5488 struct device *dev = &adapter->vdev->dev; 5489 long rc; 5490 5491 rc = crq->query_map_rsp.rc.code; 5492 if (rc) { 5493 dev_err(dev, "Error %ld in QUERY_MAP_RSP\n", rc); 5494 return; 5495 } 5496 netdev_dbg(netdev, "page_size = %d\ntot_pages = %u\nfree_pages = %u\n", 5497 crq->query_map_rsp.page_size, 5498 __be32_to_cpu(crq->query_map_rsp.tot_pages), 5499 __be32_to_cpu(crq->query_map_rsp.free_pages)); 5500 } 5501 5502 static void handle_query_cap_rsp(union ibmvnic_crq *crq, 5503 struct ibmvnic_adapter *adapter) 5504 { 5505 struct net_device *netdev = adapter->netdev; 5506 struct device *dev = &adapter->vdev->dev; 5507 long rc; 5508 5509 atomic_dec(&adapter->running_cap_crqs); 5510 netdev_dbg(netdev, "Outstanding queries: %d\n", 5511 atomic_read(&adapter->running_cap_crqs)); 5512 rc = crq->query_capability.rc.code; 5513 if (rc) { 5514 dev_err(dev, "Error %ld in QUERY_CAP_RSP\n", rc); 5515 goto out; 5516 } 5517 5518 switch (be16_to_cpu(crq->query_capability.capability)) { 5519 case MIN_TX_QUEUES: 5520 adapter->min_tx_queues = 5521 be64_to_cpu(crq->query_capability.number); 5522 netdev_dbg(netdev, "min_tx_queues = %lld\n", 5523 adapter->min_tx_queues); 5524 break; 5525 case MIN_RX_QUEUES: 5526 adapter->min_rx_queues = 5527 be64_to_cpu(crq->query_capability.number); 5528 netdev_dbg(netdev, "min_rx_queues = %lld\n", 5529 adapter->min_rx_queues); 5530 break; 5531 case MIN_RX_ADD_QUEUES: 5532 adapter->min_rx_add_queues = 5533 be64_to_cpu(crq->query_capability.number); 5534 netdev_dbg(netdev, "min_rx_add_queues = %lld\n", 5535 adapter->min_rx_add_queues); 5536 break; 5537 case MAX_TX_QUEUES: 5538 adapter->max_tx_queues = 5539 be64_to_cpu(crq->query_capability.number); 5540 netdev_dbg(netdev, "max_tx_queues = %lld\n", 5541 adapter->max_tx_queues); 5542 break; 5543 case MAX_RX_QUEUES: 5544 adapter->max_rx_queues = 5545 be64_to_cpu(crq->query_capability.number); 5546 netdev_dbg(netdev, "max_rx_queues = %lld\n", 5547 adapter->max_rx_queues); 5548 break; 5549 case MAX_RX_ADD_QUEUES: 5550 adapter->max_rx_add_queues = 5551 be64_to_cpu(crq->query_capability.number); 5552 netdev_dbg(netdev, "max_rx_add_queues = %lld\n", 5553 adapter->max_rx_add_queues); 5554 break; 5555 case MIN_TX_ENTRIES_PER_SUBCRQ: 5556 adapter->min_tx_entries_per_subcrq = 5557 be64_to_cpu(crq->query_capability.number); 5558 netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n", 5559 adapter->min_tx_entries_per_subcrq); 5560 break; 5561 case MIN_RX_ADD_ENTRIES_PER_SUBCRQ: 5562 adapter->min_rx_add_entries_per_subcrq = 5563 be64_to_cpu(crq->query_capability.number); 5564 netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n", 5565 adapter->min_rx_add_entries_per_subcrq); 5566 break; 5567 case MAX_TX_ENTRIES_PER_SUBCRQ: 5568 adapter->max_tx_entries_per_subcrq = 5569 be64_to_cpu(crq->query_capability.number); 5570 netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n", 5571 adapter->max_tx_entries_per_subcrq); 5572 break; 5573 case MAX_RX_ADD_ENTRIES_PER_SUBCRQ: 5574 adapter->max_rx_add_entries_per_subcrq = 5575 be64_to_cpu(crq->query_capability.number); 5576 netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n", 5577 adapter->max_rx_add_entries_per_subcrq); 5578 break; 5579 case TCP_IP_OFFLOAD: 5580 adapter->tcp_ip_offload = 5581 be64_to_cpu(crq->query_capability.number); 5582 netdev_dbg(netdev, "tcp_ip_offload = %lld\n", 5583 adapter->tcp_ip_offload); 5584 break; 5585 case PROMISC_SUPPORTED: 5586 adapter->promisc_supported = 5587 be64_to_cpu(crq->query_capability.number); 5588 netdev_dbg(netdev, "promisc_supported = %lld\n", 5589 adapter->promisc_supported); 5590 break; 5591 case MIN_MTU: 5592 adapter->min_mtu = be64_to_cpu(crq->query_capability.number); 5593 netdev->min_mtu = adapter->min_mtu - ETH_HLEN; 5594 netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); 5595 break; 5596 case MAX_MTU: 5597 adapter->max_mtu = be64_to_cpu(crq->query_capability.number); 5598 netdev->max_mtu = adapter->max_mtu - ETH_HLEN; 5599 netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); 5600 break; 5601 case MAX_MULTICAST_FILTERS: 5602 adapter->max_multicast_filters = 5603 be64_to_cpu(crq->query_capability.number); 5604 netdev_dbg(netdev, "max_multicast_filters = %lld\n", 5605 adapter->max_multicast_filters); 5606 break; 5607 case VLAN_HEADER_INSERTION: 5608 adapter->vlan_header_insertion = 5609 be64_to_cpu(crq->query_capability.number); 5610 if (adapter->vlan_header_insertion) 5611 netdev->features |= NETIF_F_HW_VLAN_STAG_TX; 5612 netdev_dbg(netdev, "vlan_header_insertion = %lld\n", 5613 adapter->vlan_header_insertion); 5614 break; 5615 case RX_VLAN_HEADER_INSERTION: 5616 adapter->rx_vlan_header_insertion = 5617 be64_to_cpu(crq->query_capability.number); 5618 netdev_dbg(netdev, "rx_vlan_header_insertion = %lld\n", 5619 adapter->rx_vlan_header_insertion); 5620 break; 5621 case MAX_TX_SG_ENTRIES: 5622 adapter->max_tx_sg_entries = 5623 be64_to_cpu(crq->query_capability.number); 5624 netdev_dbg(netdev, "max_tx_sg_entries = %lld\n", 5625 adapter->max_tx_sg_entries); 5626 break; 5627 case RX_SG_SUPPORTED: 5628 adapter->rx_sg_supported = 5629 be64_to_cpu(crq->query_capability.number); 5630 netdev_dbg(netdev, "rx_sg_supported = %lld\n", 5631 adapter->rx_sg_supported); 5632 break; 5633 case OPT_TX_COMP_SUB_QUEUES: 5634 adapter->opt_tx_comp_sub_queues = 5635 be64_to_cpu(crq->query_capability.number); 5636 netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n", 5637 adapter->opt_tx_comp_sub_queues); 5638 break; 5639 case OPT_RX_COMP_QUEUES: 5640 adapter->opt_rx_comp_queues = 5641 be64_to_cpu(crq->query_capability.number); 5642 netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n", 5643 adapter->opt_rx_comp_queues); 5644 break; 5645 case OPT_RX_BUFADD_Q_PER_RX_COMP_Q: 5646 adapter->opt_rx_bufadd_q_per_rx_comp_q = 5647 be64_to_cpu(crq->query_capability.number); 5648 netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n", 5649 adapter->opt_rx_bufadd_q_per_rx_comp_q); 5650 break; 5651 case OPT_TX_ENTRIES_PER_SUBCRQ: 5652 adapter->opt_tx_entries_per_subcrq = 5653 be64_to_cpu(crq->query_capability.number); 5654 netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n", 5655 adapter->opt_tx_entries_per_subcrq); 5656 break; 5657 case OPT_RXBA_ENTRIES_PER_SUBCRQ: 5658 adapter->opt_rxba_entries_per_subcrq = 5659 be64_to_cpu(crq->query_capability.number); 5660 netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n", 5661 adapter->opt_rxba_entries_per_subcrq); 5662 break; 5663 case TX_RX_DESC_REQ: 5664 adapter->tx_rx_desc_req = crq->query_capability.number; 5665 netdev_dbg(netdev, "tx_rx_desc_req = %llx\n", 5666 adapter->tx_rx_desc_req); 5667 break; 5668 5669 default: 5670 netdev_err(netdev, "Got invalid cap rsp %d\n", 5671 crq->query_capability.capability); 5672 } 5673 5674 out: 5675 if (atomic_read(&adapter->running_cap_crqs) == 0) 5676 send_request_cap(adapter, 0); 5677 } 5678 5679 static int send_query_phys_parms(struct ibmvnic_adapter *adapter) 5680 { 5681 union ibmvnic_crq crq; 5682 int rc; 5683 5684 memset(&crq, 0, sizeof(crq)); 5685 crq.query_phys_parms.first = IBMVNIC_CRQ_CMD; 5686 crq.query_phys_parms.cmd = QUERY_PHYS_PARMS; 5687 5688 mutex_lock(&adapter->fw_lock); 5689 adapter->fw_done_rc = 0; 5690 reinit_completion(&adapter->fw_done); 5691 5692 rc = ibmvnic_send_crq(adapter, &crq); 5693 if (rc) { 5694 mutex_unlock(&adapter->fw_lock); 5695 return rc; 5696 } 5697 5698 rc = ibmvnic_wait_for_completion(adapter, &adapter->fw_done, 10000); 5699 if (rc) { 5700 mutex_unlock(&adapter->fw_lock); 5701 return rc; 5702 } 5703 5704 mutex_unlock(&adapter->fw_lock); 5705 return adapter->fw_done_rc ? -EIO : 0; 5706 } 5707 5708 static int handle_query_phys_parms_rsp(union ibmvnic_crq *crq, 5709 struct ibmvnic_adapter *adapter) 5710 { 5711 struct net_device *netdev = adapter->netdev; 5712 int rc; 5713 __be32 rspeed = cpu_to_be32(crq->query_phys_parms_rsp.speed); 5714 5715 rc = crq->query_phys_parms_rsp.rc.code; 5716 if (rc) { 5717 netdev_err(netdev, "Error %d in QUERY_PHYS_PARMS\n", rc); 5718 return rc; 5719 } 5720 switch (rspeed) { 5721 case IBMVNIC_10MBPS: 5722 adapter->speed = SPEED_10; 5723 break; 5724 case IBMVNIC_100MBPS: 5725 adapter->speed = SPEED_100; 5726 break; 5727 case IBMVNIC_1GBPS: 5728 adapter->speed = SPEED_1000; 5729 break; 5730 case IBMVNIC_10GBPS: 5731 adapter->speed = SPEED_10000; 5732 break; 5733 case IBMVNIC_25GBPS: 5734 adapter->speed = SPEED_25000; 5735 break; 5736 case IBMVNIC_40GBPS: 5737 adapter->speed = SPEED_40000; 5738 break; 5739 case IBMVNIC_50GBPS: 5740 adapter->speed = SPEED_50000; 5741 break; 5742 case IBMVNIC_100GBPS: 5743 adapter->speed = SPEED_100000; 5744 break; 5745 case IBMVNIC_200GBPS: 5746 adapter->speed = SPEED_200000; 5747 break; 5748 default: 5749 if (netif_carrier_ok(netdev)) 5750 netdev_warn(netdev, "Unknown speed 0x%08x\n", rspeed); 5751 adapter->speed = SPEED_UNKNOWN; 5752 } 5753 if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_FULL_DUPLEX) 5754 adapter->duplex = DUPLEX_FULL; 5755 else if (crq->query_phys_parms_rsp.flags1 & IBMVNIC_HALF_DUPLEX) 5756 adapter->duplex = DUPLEX_HALF; 5757 else 5758 adapter->duplex = DUPLEX_UNKNOWN; 5759 5760 return rc; 5761 } 5762 5763 static void ibmvnic_handle_crq(union ibmvnic_crq *crq, 5764 struct ibmvnic_adapter *adapter) 5765 { 5766 struct ibmvnic_generic_crq *gen_crq = &crq->generic; 5767 struct net_device *netdev = adapter->netdev; 5768 struct device *dev = &adapter->vdev->dev; 5769 u64 *u64_crq = (u64 *)crq; 5770 long rc; 5771 5772 netdev_dbg(netdev, "Handling CRQ: %016lx %016lx\n", 5773 (unsigned long)cpu_to_be64(u64_crq[0]), 5774 (unsigned long)cpu_to_be64(u64_crq[1])); 5775 switch (gen_crq->first) { 5776 case IBMVNIC_CRQ_INIT_RSP: 5777 switch (gen_crq->cmd) { 5778 case IBMVNIC_CRQ_INIT: 5779 dev_info(dev, "Partner initialized\n"); 5780 adapter->from_passive_init = true; 5781 /* Discard any stale login responses from prev reset. 5782 * CHECK: should we clear even on INIT_COMPLETE? 5783 */ 5784 adapter->login_pending = false; 5785 5786 if (adapter->state == VNIC_DOWN) 5787 rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT); 5788 else 5789 rc = ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); 5790 5791 if (rc && rc != -EBUSY) { 5792 /* We were unable to schedule the failover 5793 * reset either because the adapter was still 5794 * probing (eg: during kexec) or we could not 5795 * allocate memory. Clear the failover_pending 5796 * flag since no one else will. We ignore 5797 * EBUSY because it means either FAILOVER reset 5798 * is already scheduled or the adapter is 5799 * being removed. 5800 */ 5801 netdev_err(netdev, 5802 "Error %ld scheduling failover reset\n", 5803 rc); 5804 adapter->failover_pending = false; 5805 } 5806 5807 if (!completion_done(&adapter->init_done)) { 5808 if (!adapter->init_done_rc) 5809 adapter->init_done_rc = -EAGAIN; 5810 complete(&adapter->init_done); 5811 } 5812 5813 break; 5814 case IBMVNIC_CRQ_INIT_COMPLETE: 5815 dev_info(dev, "Partner initialization complete\n"); 5816 adapter->crq.active = true; 5817 send_version_xchg(adapter); 5818 break; 5819 default: 5820 dev_err(dev, "Unknown crq cmd: %d\n", gen_crq->cmd); 5821 } 5822 return; 5823 case IBMVNIC_CRQ_XPORT_EVENT: 5824 netif_carrier_off(netdev); 5825 adapter->crq.active = false; 5826 /* terminate any thread waiting for a response 5827 * from the device 5828 */ 5829 if (!completion_done(&adapter->fw_done)) { 5830 adapter->fw_done_rc = -EIO; 5831 complete(&adapter->fw_done); 5832 } 5833 5834 /* if we got here during crq-init, retry crq-init */ 5835 if (!completion_done(&adapter->init_done)) { 5836 adapter->init_done_rc = -EAGAIN; 5837 complete(&adapter->init_done); 5838 } 5839 5840 if (!completion_done(&adapter->stats_done)) 5841 complete(&adapter->stats_done); 5842 if (test_bit(0, &adapter->resetting)) 5843 adapter->force_reset_recovery = true; 5844 if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { 5845 dev_info(dev, "Migrated, re-enabling adapter\n"); 5846 ibmvnic_reset(adapter, VNIC_RESET_MOBILITY); 5847 } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { 5848 dev_info(dev, "Backing device failover detected\n"); 5849 adapter->failover_pending = true; 5850 } else { 5851 /* The adapter lost the connection */ 5852 dev_err(dev, "Virtual Adapter failed (rc=%d)\n", 5853 gen_crq->cmd); 5854 ibmvnic_reset(adapter, VNIC_RESET_FATAL); 5855 } 5856 return; 5857 case IBMVNIC_CRQ_CMD_RSP: 5858 break; 5859 default: 5860 dev_err(dev, "Got an invalid msg type 0x%02x\n", 5861 gen_crq->first); 5862 return; 5863 } 5864 5865 switch (gen_crq->cmd) { 5866 case VERSION_EXCHANGE_RSP: 5867 rc = crq->version_exchange_rsp.rc.code; 5868 if (rc) { 5869 dev_err(dev, "Error %ld in VERSION_EXCHG_RSP\n", rc); 5870 break; 5871 } 5872 ibmvnic_version = 5873 be16_to_cpu(crq->version_exchange_rsp.version); 5874 dev_info(dev, "Partner protocol version is %d\n", 5875 ibmvnic_version); 5876 send_query_cap(adapter); 5877 break; 5878 case QUERY_CAPABILITY_RSP: 5879 handle_query_cap_rsp(crq, adapter); 5880 break; 5881 case QUERY_MAP_RSP: 5882 handle_query_map_rsp(crq, adapter); 5883 break; 5884 case REQUEST_MAP_RSP: 5885 adapter->fw_done_rc = crq->request_map_rsp.rc.code; 5886 complete(&adapter->fw_done); 5887 break; 5888 case REQUEST_UNMAP_RSP: 5889 handle_request_unmap_rsp(crq, adapter); 5890 break; 5891 case REQUEST_CAPABILITY_RSP: 5892 handle_request_cap_rsp(crq, adapter); 5893 break; 5894 case LOGIN_RSP: 5895 netdev_dbg(netdev, "Got Login Response\n"); 5896 handle_login_rsp(crq, adapter); 5897 break; 5898 case LOGICAL_LINK_STATE_RSP: 5899 netdev_dbg(netdev, 5900 "Got Logical Link State Response, state: %d rc: %d\n", 5901 crq->logical_link_state_rsp.link_state, 5902 crq->logical_link_state_rsp.rc.code); 5903 adapter->logical_link_state = 5904 crq->logical_link_state_rsp.link_state; 5905 adapter->init_done_rc = crq->logical_link_state_rsp.rc.code; 5906 complete(&adapter->init_done); 5907 break; 5908 case LINK_STATE_INDICATION: 5909 netdev_dbg(netdev, "Got Logical Link State Indication\n"); 5910 adapter->phys_link_state = 5911 crq->link_state_indication.phys_link_state; 5912 adapter->logical_link_state = 5913 crq->link_state_indication.logical_link_state; 5914 if (adapter->phys_link_state && adapter->logical_link_state) 5915 netif_carrier_on(netdev); 5916 else 5917 netif_carrier_off(netdev); 5918 break; 5919 case CHANGE_MAC_ADDR_RSP: 5920 netdev_dbg(netdev, "Got MAC address change Response\n"); 5921 adapter->fw_done_rc = handle_change_mac_rsp(crq, adapter); 5922 break; 5923 case ERROR_INDICATION: 5924 netdev_dbg(netdev, "Got Error Indication\n"); 5925 handle_error_indication(crq, adapter); 5926 break; 5927 case REQUEST_STATISTICS_RSP: 5928 netdev_dbg(netdev, "Got Statistics Response\n"); 5929 complete(&adapter->stats_done); 5930 break; 5931 case QUERY_IP_OFFLOAD_RSP: 5932 netdev_dbg(netdev, "Got Query IP offload Response\n"); 5933 handle_query_ip_offload_rsp(adapter); 5934 break; 5935 case MULTICAST_CTRL_RSP: 5936 netdev_dbg(netdev, "Got multicast control Response\n"); 5937 break; 5938 case CONTROL_IP_OFFLOAD_RSP: 5939 netdev_dbg(netdev, "Got Control IP offload Response\n"); 5940 dma_unmap_single(dev, adapter->ip_offload_ctrl_tok, 5941 sizeof(adapter->ip_offload_ctrl), 5942 DMA_TO_DEVICE); 5943 complete(&adapter->init_done); 5944 break; 5945 case COLLECT_FW_TRACE_RSP: 5946 netdev_dbg(netdev, "Got Collect firmware trace Response\n"); 5947 complete(&adapter->fw_done); 5948 break; 5949 case GET_VPD_SIZE_RSP: 5950 handle_vpd_size_rsp(crq, adapter); 5951 break; 5952 case GET_VPD_RSP: 5953 handle_vpd_rsp(crq, adapter); 5954 break; 5955 case QUERY_PHYS_PARMS_RSP: 5956 adapter->fw_done_rc = handle_query_phys_parms_rsp(crq, adapter); 5957 complete(&adapter->fw_done); 5958 break; 5959 default: 5960 netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", 5961 gen_crq->cmd); 5962 } 5963 } 5964 5965 static irqreturn_t ibmvnic_interrupt(int irq, void *instance) 5966 { 5967 struct ibmvnic_adapter *adapter = instance; 5968 5969 tasklet_schedule(&adapter->tasklet); 5970 return IRQ_HANDLED; 5971 } 5972 5973 static void ibmvnic_tasklet(struct tasklet_struct *t) 5974 { 5975 struct ibmvnic_adapter *adapter = from_tasklet(adapter, t, tasklet); 5976 struct ibmvnic_crq_queue *queue = &adapter->crq; 5977 union ibmvnic_crq *crq; 5978 unsigned long flags; 5979 5980 spin_lock_irqsave(&queue->lock, flags); 5981 5982 /* Pull all the valid messages off the CRQ */ 5983 while ((crq = ibmvnic_next_crq(adapter)) != NULL) { 5984 /* This barrier makes sure ibmvnic_next_crq()'s 5985 * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded 5986 * before ibmvnic_handle_crq()'s 5987 * switch(gen_crq->first) and switch(gen_crq->cmd). 5988 */ 5989 dma_rmb(); 5990 ibmvnic_handle_crq(crq, adapter); 5991 crq->generic.first = 0; 5992 } 5993 5994 spin_unlock_irqrestore(&queue->lock, flags); 5995 } 5996 5997 static int ibmvnic_reenable_crq_queue(struct ibmvnic_adapter *adapter) 5998 { 5999 struct vio_dev *vdev = adapter->vdev; 6000 int rc; 6001 6002 do { 6003 rc = plpar_hcall_norets(H_ENABLE_CRQ, vdev->unit_address); 6004 } while (rc == H_IN_PROGRESS || rc == H_BUSY || H_IS_LONG_BUSY(rc)); 6005 6006 if (rc) 6007 dev_err(&vdev->dev, "Error enabling adapter (rc=%d)\n", rc); 6008 6009 return rc; 6010 } 6011 6012 static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) 6013 { 6014 struct ibmvnic_crq_queue *crq = &adapter->crq; 6015 struct device *dev = &adapter->vdev->dev; 6016 struct vio_dev *vdev = adapter->vdev; 6017 int rc; 6018 6019 /* Close the CRQ */ 6020 do { 6021 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); 6022 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); 6023 6024 /* Clean out the queue */ 6025 if (!crq->msgs) 6026 return -EINVAL; 6027 6028 memset(crq->msgs, 0, PAGE_SIZE); 6029 crq->cur = 0; 6030 crq->active = false; 6031 6032 /* And re-open it again */ 6033 rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, 6034 crq->msg_token, PAGE_SIZE); 6035 6036 if (rc == H_CLOSED) 6037 /* Adapter is good, but other end is not ready */ 6038 dev_warn(dev, "Partner adapter not ready\n"); 6039 else if (rc != 0) 6040 dev_warn(dev, "Couldn't register crq (rc=%d)\n", rc); 6041 6042 return rc; 6043 } 6044 6045 static void release_crq_queue(struct ibmvnic_adapter *adapter) 6046 { 6047 struct ibmvnic_crq_queue *crq = &adapter->crq; 6048 struct vio_dev *vdev = adapter->vdev; 6049 long rc; 6050 6051 if (!crq->msgs) 6052 return; 6053 6054 netdev_dbg(adapter->netdev, "Releasing CRQ\n"); 6055 free_irq(vdev->irq, adapter); 6056 tasklet_kill(&adapter->tasklet); 6057 do { 6058 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); 6059 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); 6060 6061 dma_unmap_single(&vdev->dev, crq->msg_token, PAGE_SIZE, 6062 DMA_BIDIRECTIONAL); 6063 free_page((unsigned long)crq->msgs); 6064 crq->msgs = NULL; 6065 crq->active = false; 6066 } 6067 6068 static int init_crq_queue(struct ibmvnic_adapter *adapter) 6069 { 6070 struct ibmvnic_crq_queue *crq = &adapter->crq; 6071 struct device *dev = &adapter->vdev->dev; 6072 struct vio_dev *vdev = adapter->vdev; 6073 int rc, retrc = -ENOMEM; 6074 6075 if (crq->msgs) 6076 return 0; 6077 6078 crq->msgs = (union ibmvnic_crq *)get_zeroed_page(GFP_KERNEL); 6079 /* Should we allocate more than one page? */ 6080 6081 if (!crq->msgs) 6082 return -ENOMEM; 6083 6084 crq->size = PAGE_SIZE / sizeof(*crq->msgs); 6085 crq->msg_token = dma_map_single(dev, crq->msgs, PAGE_SIZE, 6086 DMA_BIDIRECTIONAL); 6087 if (dma_mapping_error(dev, crq->msg_token)) 6088 goto map_failed; 6089 6090 rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address, 6091 crq->msg_token, PAGE_SIZE); 6092 6093 if (rc == H_RESOURCE) 6094 /* maybe kexecing and resource is busy. try a reset */ 6095 rc = ibmvnic_reset_crq(adapter); 6096 retrc = rc; 6097 6098 if (rc == H_CLOSED) { 6099 dev_warn(dev, "Partner adapter not ready\n"); 6100 } else if (rc) { 6101 dev_warn(dev, "Error %d opening adapter\n", rc); 6102 goto reg_crq_failed; 6103 } 6104 6105 retrc = 0; 6106 6107 tasklet_setup(&adapter->tasklet, (void *)ibmvnic_tasklet); 6108 6109 netdev_dbg(adapter->netdev, "registering irq 0x%x\n", vdev->irq); 6110 snprintf(crq->name, sizeof(crq->name), "ibmvnic-%x", 6111 adapter->vdev->unit_address); 6112 rc = request_irq(vdev->irq, ibmvnic_interrupt, 0, crq->name, adapter); 6113 if (rc) { 6114 dev_err(dev, "Couldn't register irq 0x%x. rc=%d\n", 6115 vdev->irq, rc); 6116 goto req_irq_failed; 6117 } 6118 6119 rc = vio_enable_interrupts(vdev); 6120 if (rc) { 6121 dev_err(dev, "Error %d enabling interrupts\n", rc); 6122 goto req_irq_failed; 6123 } 6124 6125 crq->cur = 0; 6126 spin_lock_init(&crq->lock); 6127 6128 /* process any CRQs that were queued before we enabled interrupts */ 6129 tasklet_schedule(&adapter->tasklet); 6130 6131 return retrc; 6132 6133 req_irq_failed: 6134 tasklet_kill(&adapter->tasklet); 6135 do { 6136 rc = plpar_hcall_norets(H_FREE_CRQ, vdev->unit_address); 6137 } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); 6138 reg_crq_failed: 6139 dma_unmap_single(dev, crq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL); 6140 map_failed: 6141 free_page((unsigned long)crq->msgs); 6142 crq->msgs = NULL; 6143 return retrc; 6144 } 6145 6146 static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) 6147 { 6148 struct device *dev = &adapter->vdev->dev; 6149 unsigned long timeout = msecs_to_jiffies(20000); 6150 u64 old_num_rx_queues = adapter->req_rx_queues; 6151 u64 old_num_tx_queues = adapter->req_tx_queues; 6152 int rc; 6153 6154 adapter->from_passive_init = false; 6155 6156 rc = ibmvnic_send_crq_init(adapter); 6157 if (rc) { 6158 dev_err(dev, "Send crq init failed with error %d\n", rc); 6159 return rc; 6160 } 6161 6162 if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { 6163 dev_err(dev, "Initialization sequence timed out\n"); 6164 return -ETIMEDOUT; 6165 } 6166 6167 if (adapter->init_done_rc) { 6168 release_crq_queue(adapter); 6169 dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); 6170 return adapter->init_done_rc; 6171 } 6172 6173 if (adapter->from_passive_init) { 6174 adapter->state = VNIC_OPEN; 6175 adapter->from_passive_init = false; 6176 dev_err(dev, "CRQ-init failed, passive-init\n"); 6177 return -EINVAL; 6178 } 6179 6180 if (reset && 6181 test_bit(0, &adapter->resetting) && !adapter->wait_for_reset && 6182 adapter->reset_reason != VNIC_RESET_MOBILITY) { 6183 if (adapter->req_rx_queues != old_num_rx_queues || 6184 adapter->req_tx_queues != old_num_tx_queues) { 6185 release_sub_crqs(adapter, 0); 6186 rc = init_sub_crqs(adapter); 6187 } else { 6188 /* no need to reinitialize completely, but we do 6189 * need to clean up transmits that were in flight 6190 * when we processed the reset. Failure to do so 6191 * will confound the upper layer, usually TCP, by 6192 * creating the illusion of transmits that are 6193 * awaiting completion. 6194 */ 6195 clean_tx_pools(adapter); 6196 6197 rc = reset_sub_crq_queues(adapter); 6198 } 6199 } else { 6200 rc = init_sub_crqs(adapter); 6201 } 6202 6203 if (rc) { 6204 dev_err(dev, "Initialization of sub crqs failed\n"); 6205 release_crq_queue(adapter); 6206 return rc; 6207 } 6208 6209 rc = init_sub_crq_irqs(adapter); 6210 if (rc) { 6211 dev_err(dev, "Failed to initialize sub crq irqs\n"); 6212 release_crq_queue(adapter); 6213 } 6214 6215 return rc; 6216 } 6217 6218 static struct device_attribute dev_attr_failover; 6219 6220 static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) 6221 { 6222 struct ibmvnic_adapter *adapter; 6223 struct net_device *netdev; 6224 unsigned char *mac_addr_p; 6225 unsigned long flags; 6226 bool init_success; 6227 int rc; 6228 6229 dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n", 6230 dev->unit_address); 6231 6232 mac_addr_p = (unsigned char *)vio_get_attribute(dev, 6233 VETH_MAC_ADDR, NULL); 6234 if (!mac_addr_p) { 6235 dev_err(&dev->dev, 6236 "(%s:%3.3d) ERROR: Can't find MAC_ADDR attribute\n", 6237 __FILE__, __LINE__); 6238 return 0; 6239 } 6240 6241 netdev = alloc_etherdev_mq(sizeof(struct ibmvnic_adapter), 6242 IBMVNIC_MAX_QUEUES); 6243 if (!netdev) 6244 return -ENOMEM; 6245 6246 adapter = netdev_priv(netdev); 6247 adapter->state = VNIC_PROBING; 6248 dev_set_drvdata(&dev->dev, netdev); 6249 adapter->vdev = dev; 6250 adapter->netdev = netdev; 6251 adapter->login_pending = false; 6252 memset(&adapter->map_ids, 0, sizeof(adapter->map_ids)); 6253 /* map_ids start at 1, so ensure map_id 0 is always "in-use" */ 6254 bitmap_set(adapter->map_ids, 0, 1); 6255 6256 ether_addr_copy(adapter->mac_addr, mac_addr_p); 6257 eth_hw_addr_set(netdev, adapter->mac_addr); 6258 netdev->irq = dev->irq; 6259 netdev->netdev_ops = &ibmvnic_netdev_ops; 6260 netdev->ethtool_ops = &ibmvnic_ethtool_ops; 6261 SET_NETDEV_DEV(netdev, &dev->dev); 6262 6263 INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset); 6264 INIT_DELAYED_WORK(&adapter->ibmvnic_delayed_reset, 6265 __ibmvnic_delayed_reset); 6266 INIT_LIST_HEAD(&adapter->rwi_list); 6267 spin_lock_init(&adapter->rwi_lock); 6268 spin_lock_init(&adapter->state_lock); 6269 mutex_init(&adapter->fw_lock); 6270 init_completion(&adapter->probe_done); 6271 init_completion(&adapter->init_done); 6272 init_completion(&adapter->fw_done); 6273 init_completion(&adapter->reset_done); 6274 init_completion(&adapter->stats_done); 6275 clear_bit(0, &adapter->resetting); 6276 adapter->prev_rx_buf_sz = 0; 6277 adapter->prev_mtu = 0; 6278 6279 init_success = false; 6280 do { 6281 reinit_init_done(adapter); 6282 6283 /* clear any failovers we got in the previous pass 6284 * since we are reinitializing the CRQ 6285 */ 6286 adapter->failover_pending = false; 6287 6288 /* If we had already initialized CRQ, we may have one or 6289 * more resets queued already. Discard those and release 6290 * the CRQ before initializing the CRQ again. 6291 */ 6292 release_crq_queue(adapter); 6293 6294 /* Since we are still in PROBING state, __ibmvnic_reset() 6295 * will not access the ->rwi_list and since we released CRQ, 6296 * we won't get _new_ transport events. But there maybe an 6297 * ongoing ibmvnic_reset() call. So serialize access to 6298 * rwi_list. If we win the race, ibvmnic_reset() could add 6299 * a reset after we purged but thats ok - we just may end 6300 * up with an extra reset (i.e similar to having two or more 6301 * resets in the queue at once). 6302 * CHECK. 6303 */ 6304 spin_lock_irqsave(&adapter->rwi_lock, flags); 6305 flush_reset_queue(adapter); 6306 spin_unlock_irqrestore(&adapter->rwi_lock, flags); 6307 6308 rc = init_crq_queue(adapter); 6309 if (rc) { 6310 dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", 6311 rc); 6312 goto ibmvnic_init_fail; 6313 } 6314 6315 rc = ibmvnic_reset_init(adapter, false); 6316 } while (rc == -EAGAIN); 6317 6318 /* We are ignoring the error from ibmvnic_reset_init() assuming that the 6319 * partner is not ready. CRQ is not active. When the partner becomes 6320 * ready, we will do the passive init reset. 6321 */ 6322 6323 if (!rc) 6324 init_success = true; 6325 6326 rc = init_stats_buffers(adapter); 6327 if (rc) 6328 goto ibmvnic_init_fail; 6329 6330 rc = init_stats_token(adapter); 6331 if (rc) 6332 goto ibmvnic_stats_fail; 6333 6334 rc = device_create_file(&dev->dev, &dev_attr_failover); 6335 if (rc) 6336 goto ibmvnic_dev_file_err; 6337 6338 netif_carrier_off(netdev); 6339 6340 if (init_success) { 6341 adapter->state = VNIC_PROBED; 6342 netdev->mtu = adapter->req_mtu - ETH_HLEN; 6343 netdev->min_mtu = adapter->min_mtu - ETH_HLEN; 6344 netdev->max_mtu = adapter->max_mtu - ETH_HLEN; 6345 } else { 6346 adapter->state = VNIC_DOWN; 6347 } 6348 6349 adapter->wait_for_reset = false; 6350 adapter->last_reset_time = jiffies; 6351 6352 rc = register_netdev(netdev); 6353 if (rc) { 6354 dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); 6355 goto ibmvnic_register_fail; 6356 } 6357 dev_info(&dev->dev, "ibmvnic registered\n"); 6358 6359 rc = ibmvnic_cpu_notif_add(adapter); 6360 if (rc) { 6361 netdev_err(netdev, "Registering cpu notifier failed\n"); 6362 goto cpu_notif_add_failed; 6363 } 6364 6365 complete(&adapter->probe_done); 6366 6367 return 0; 6368 6369 cpu_notif_add_failed: 6370 unregister_netdev(netdev); 6371 6372 ibmvnic_register_fail: 6373 device_remove_file(&dev->dev, &dev_attr_failover); 6374 6375 ibmvnic_dev_file_err: 6376 release_stats_token(adapter); 6377 6378 ibmvnic_stats_fail: 6379 release_stats_buffers(adapter); 6380 6381 ibmvnic_init_fail: 6382 release_sub_crqs(adapter, 1); 6383 release_crq_queue(adapter); 6384 6385 /* cleanup worker thread after releasing CRQ so we don't get 6386 * transport events (i.e new work items for the worker thread). 6387 */ 6388 adapter->state = VNIC_REMOVING; 6389 complete(&adapter->probe_done); 6390 flush_work(&adapter->ibmvnic_reset); 6391 flush_delayed_work(&adapter->ibmvnic_delayed_reset); 6392 6393 flush_reset_queue(adapter); 6394 6395 mutex_destroy(&adapter->fw_lock); 6396 free_netdev(netdev); 6397 6398 return rc; 6399 } 6400 6401 static void ibmvnic_remove(struct vio_dev *dev) 6402 { 6403 struct net_device *netdev = dev_get_drvdata(&dev->dev); 6404 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 6405 unsigned long flags; 6406 6407 spin_lock_irqsave(&adapter->state_lock, flags); 6408 6409 /* If ibmvnic_reset() is scheduling a reset, wait for it to 6410 * finish. Then, set the state to REMOVING to prevent it from 6411 * scheduling any more work and to have reset functions ignore 6412 * any resets that have already been scheduled. Drop the lock 6413 * after setting state, so __ibmvnic_reset() which is called 6414 * from the flush_work() below, can make progress. 6415 */ 6416 spin_lock(&adapter->rwi_lock); 6417 adapter->state = VNIC_REMOVING; 6418 spin_unlock(&adapter->rwi_lock); 6419 6420 spin_unlock_irqrestore(&adapter->state_lock, flags); 6421 6422 ibmvnic_cpu_notif_remove(adapter); 6423 6424 flush_work(&adapter->ibmvnic_reset); 6425 flush_delayed_work(&adapter->ibmvnic_delayed_reset); 6426 6427 rtnl_lock(); 6428 unregister_netdevice(netdev); 6429 6430 release_resources(adapter); 6431 release_rx_pools(adapter); 6432 release_tx_pools(adapter); 6433 release_sub_crqs(adapter, 1); 6434 release_crq_queue(adapter); 6435 6436 release_stats_token(adapter); 6437 release_stats_buffers(adapter); 6438 6439 adapter->state = VNIC_REMOVED; 6440 6441 rtnl_unlock(); 6442 mutex_destroy(&adapter->fw_lock); 6443 device_remove_file(&dev->dev, &dev_attr_failover); 6444 free_netdev(netdev); 6445 dev_set_drvdata(&dev->dev, NULL); 6446 } 6447 6448 static ssize_t failover_store(struct device *dev, struct device_attribute *attr, 6449 const char *buf, size_t count) 6450 { 6451 struct net_device *netdev = dev_get_drvdata(dev); 6452 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 6453 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 6454 __be64 session_token; 6455 long rc; 6456 6457 if (!sysfs_streq(buf, "1")) 6458 return -EINVAL; 6459 6460 rc = plpar_hcall(H_VIOCTL, retbuf, adapter->vdev->unit_address, 6461 H_GET_SESSION_TOKEN, 0, 0, 0); 6462 if (rc) { 6463 netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", 6464 rc); 6465 goto last_resort; 6466 } 6467 6468 session_token = (__be64)retbuf[0]; 6469 netdev_dbg(netdev, "Initiating client failover, session id %llx\n", 6470 be64_to_cpu(session_token)); 6471 rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, 6472 H_SESSION_ERR_DETECTED, session_token, 0, 0); 6473 if (rc) { 6474 netdev_err(netdev, 6475 "H_VIOCTL initiated failover failed, rc %ld\n", 6476 rc); 6477 goto last_resort; 6478 } 6479 6480 return count; 6481 6482 last_resort: 6483 netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); 6484 ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); 6485 6486 return count; 6487 } 6488 static DEVICE_ATTR_WO(failover); 6489 6490 static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) 6491 { 6492 struct net_device *netdev = dev_get_drvdata(&vdev->dev); 6493 struct ibmvnic_adapter *adapter; 6494 struct iommu_table *tbl; 6495 unsigned long ret = 0; 6496 int i; 6497 6498 tbl = get_iommu_table_base(&vdev->dev); 6499 6500 /* netdev inits at probe time along with the structures we need below*/ 6501 if (!netdev) 6502 return IOMMU_PAGE_ALIGN(IBMVNIC_IO_ENTITLEMENT_DEFAULT, tbl); 6503 6504 adapter = netdev_priv(netdev); 6505 6506 ret += PAGE_SIZE; /* the crq message queue */ 6507 ret += IOMMU_PAGE_ALIGN(sizeof(struct ibmvnic_statistics), tbl); 6508 6509 for (i = 0; i < adapter->req_tx_queues + adapter->req_rx_queues; i++) 6510 ret += 4 * PAGE_SIZE; /* the scrq message queue */ 6511 6512 for (i = 0; i < adapter->num_active_rx_pools; i++) 6513 ret += adapter->rx_pool[i].size * 6514 IOMMU_PAGE_ALIGN(adapter->rx_pool[i].buff_size, tbl); 6515 6516 return ret; 6517 } 6518 6519 static int ibmvnic_resume(struct device *dev) 6520 { 6521 struct net_device *netdev = dev_get_drvdata(dev); 6522 struct ibmvnic_adapter *adapter = netdev_priv(netdev); 6523 6524 if (adapter->state != VNIC_OPEN) 6525 return 0; 6526 6527 tasklet_schedule(&adapter->tasklet); 6528 6529 return 0; 6530 } 6531 6532 static const struct vio_device_id ibmvnic_device_table[] = { 6533 {"network", "IBM,vnic"}, 6534 {"", "" } 6535 }; 6536 MODULE_DEVICE_TABLE(vio, ibmvnic_device_table); 6537 6538 static const struct dev_pm_ops ibmvnic_pm_ops = { 6539 .resume = ibmvnic_resume 6540 }; 6541 6542 static struct vio_driver ibmvnic_driver = { 6543 .id_table = ibmvnic_device_table, 6544 .probe = ibmvnic_probe, 6545 .remove = ibmvnic_remove, 6546 .get_desired_dma = ibmvnic_get_desired_dma, 6547 .name = ibmvnic_driver_name, 6548 .pm = &ibmvnic_pm_ops, 6549 }; 6550 6551 /* module functions */ 6552 static int __init ibmvnic_module_init(void) 6553 { 6554 int ret; 6555 6556 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online", 6557 ibmvnic_cpu_online, 6558 ibmvnic_cpu_down_prep); 6559 if (ret < 0) 6560 goto out; 6561 ibmvnic_online = ret; 6562 ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead", 6563 NULL, ibmvnic_cpu_dead); 6564 if (ret) 6565 goto err_dead; 6566 6567 ret = vio_register_driver(&ibmvnic_driver); 6568 if (ret) 6569 goto err_vio_register; 6570 6571 pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string, 6572 IBMVNIC_DRIVER_VERSION); 6573 6574 return 0; 6575 err_vio_register: 6576 cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); 6577 err_dead: 6578 cpuhp_remove_multi_state(ibmvnic_online); 6579 out: 6580 return ret; 6581 } 6582 6583 static void __exit ibmvnic_module_exit(void) 6584 { 6585 vio_unregister_driver(&ibmvnic_driver); 6586 cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD); 6587 cpuhp_remove_multi_state(ibmvnic_online); 6588 } 6589 6590 module_init(ibmvnic_module_init); 6591 module_exit(ibmvnic_module_exit); 6592