xref: /linux/drivers/net/ethernet/emulex/benet/be_main.c (revision 9ee0034b8f49aaaa7e7c2da8db1038915db99c19)
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279 				     mac)) {
280 			/* mac already added, skip addition */
281 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282 			return 0;
283 		}
284 	}
285 
286 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287 			       &adapter->pmac_id[0], 0);
288 }
289 
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292 	int i;
293 
294 	/* Skip deletion if the programmed mac is
295 	 * being used in uc-list
296 	 */
297 	for (i = 0; i < adapter->uc_macs; i++) {
298 		if (adapter->pmac_id[i + 1] == pmac_id)
299 			return;
300 	}
301 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303 
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306 	struct be_adapter *adapter = netdev_priv(netdev);
307 	struct device *dev = &adapter->pdev->dev;
308 	struct sockaddr *addr = p;
309 	int status;
310 	u8 mac[ETH_ALEN];
311 	u32 old_pmac_id = adapter->pmac_id[0];
312 
313 	if (!is_valid_ether_addr(addr->sa_data))
314 		return -EADDRNOTAVAIL;
315 
316 	/* Proceed further only if, User provided MAC is different
317 	 * from active MAC
318 	 */
319 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320 		return 0;
321 
322 	/* if device is not running, copy MAC to netdev->dev_addr */
323 	if (!netif_running(netdev))
324 		goto done;
325 
326 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327 	 * privilege or if PF did not provision the new MAC address.
328 	 * On BE3, this cmd will always fail if the VF doesn't have the
329 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
330 	 * the MAC for the VF.
331 	 */
332 	mutex_lock(&adapter->rx_filter_lock);
333 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334 	if (!status) {
335 
336 		/* Delete the old programmed MAC. This call may fail if the
337 		 * old MAC was already deleted by the PF driver.
338 		 */
339 		if (adapter->pmac_id[0] != old_pmac_id)
340 			be_dev_mac_del(adapter, old_pmac_id);
341 	}
342 
343 	mutex_unlock(&adapter->rx_filter_lock);
344 	/* Decide if the new MAC is successfully activated only after
345 	 * querying the FW
346 	 */
347 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348 				       adapter->if_handle, true, 0);
349 	if (status)
350 		goto err;
351 
352 	/* The MAC change did not happen, either due to lack of privilege
353 	 * or PF didn't pre-provision.
354 	 */
355 	if (!ether_addr_equal(addr->sa_data, mac)) {
356 		status = -EPERM;
357 		goto err;
358 	}
359 done:
360 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
361 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
362 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363 	return 0;
364 err:
365 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366 	return status;
367 }
368 
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372 	if (BE2_chip(adapter)) {
373 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374 
375 		return &cmd->hw_stats;
376 	} else if (BE3_chip(adapter)) {
377 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378 
379 		return &cmd->hw_stats;
380 	} else {
381 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	}
385 }
386 
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390 	if (BE2_chip(adapter)) {
391 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392 
393 		return &hw_stats->erx;
394 	} else if (BE3_chip(adapter)) {
395 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396 
397 		return &hw_stats->erx;
398 	} else {
399 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	}
403 }
404 
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410 	struct be_port_rxf_stats_v0 *port_stats =
411 					&rxf_stats->port[adapter->port_num];
412 	struct be_drv_stats *drvs = &adapter->drv_stats;
413 
414 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
416 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
417 	drvs->rx_control_frames = port_stats->rx_control_frames;
418 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430 	drvs->rx_dropped_header_too_small =
431 		port_stats->rx_dropped_header_too_small;
432 	drvs->rx_address_filtered =
433 					port_stats->rx_address_filtered +
434 					port_stats->rx_vlan_filtered;
435 	drvs->rx_alignment_symbol_errors =
436 		port_stats->rx_alignment_symbol_errors;
437 
438 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
439 	drvs->tx_controlframes = port_stats->tx_controlframes;
440 
441 	if (adapter->port_num)
442 		drvs->jabber_events = rxf_stats->port1_jabber_events;
443 	else
444 		drvs->jabber_events = rxf_stats->port0_jabber_events;
445 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453 
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459 	struct be_port_rxf_stats_v1 *port_stats =
460 					&rxf_stats->port[adapter->port_num];
461 	struct be_drv_stats *drvs = &adapter->drv_stats;
462 
463 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 	drvs->rx_control_frames = port_stats->rx_control_frames;
469 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 	drvs->rx_dropped_header_too_small =
480 		port_stats->rx_dropped_header_too_small;
481 	drvs->rx_input_fifo_overflow_drop =
482 		port_stats->rx_input_fifo_overflow_drop;
483 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 	drvs->rx_alignment_symbol_errors =
485 		port_stats->rx_alignment_symbol_errors;
486 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 	drvs->tx_controlframes = port_stats->tx_controlframes;
489 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 	drvs->jabber_events = port_stats->jabber_events;
491 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499 
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505 	struct be_port_rxf_stats_v2 *port_stats =
506 					&rxf_stats->port[adapter->port_num];
507 	struct be_drv_stats *drvs = &adapter->drv_stats;
508 
509 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
513 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
514 	drvs->rx_control_frames = port_stats->rx_control_frames;
515 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525 	drvs->rx_dropped_header_too_small =
526 		port_stats->rx_dropped_header_too_small;
527 	drvs->rx_input_fifo_overflow_drop =
528 		port_stats->rx_input_fifo_overflow_drop;
529 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
530 	drvs->rx_alignment_symbol_errors =
531 		port_stats->rx_alignment_symbol_errors;
532 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
534 	drvs->tx_controlframes = port_stats->tx_controlframes;
535 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536 	drvs->jabber_events = port_stats->jabber_events;
537 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
540 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544 	if (be_roce_supported(adapter)) {
545 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547 		drvs->rx_roce_frames = port_stats->roce_frames_received;
548 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
549 		drvs->roce_drops_payload_len =
550 			port_stats->roce_drops_payload_len;
551 	}
552 }
553 
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556 	struct be_drv_stats *drvs = &adapter->drv_stats;
557 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558 
559 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569 	drvs->rx_dropped_tcp_length =
570 				pport_stats->rx_dropped_invalid_tcp_length;
571 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574 	drvs->rx_dropped_header_too_small =
575 				pport_stats->rx_dropped_header_too_small;
576 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577 	drvs->rx_address_filtered =
578 					pport_stats->rx_address_filtered +
579 					pport_stats->rx_vlan_filtered;
580 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584 	drvs->jabber_events = pport_stats->rx_jabbers;
585 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
586 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587 	drvs->rx_drops_too_many_frags =
588 				pport_stats->rx_drops_too_many_frags_lo;
589 }
590 
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)			(x & 0xFFFF)
594 #define hi(x)			(x & 0xFFFF0000)
595 	bool wrapped = val < lo(*acc);
596 	u32 newacc = hi(*acc) + val;
597 
598 	if (wrapped)
599 		newacc += 65536;
600 	ACCESS_ONCE(*acc) = newacc;
601 }
602 
603 static void populate_erx_stats(struct be_adapter *adapter,
604 			       struct be_rx_obj *rxo, u32 erx_stat)
605 {
606 	if (!BEx_chip(adapter))
607 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608 	else
609 		/* below erx HW counter can actually wrap around after
610 		 * 65535. Driver accumulates a 32-bit value
611 		 */
612 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613 				     (u16)erx_stat);
614 }
615 
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619 	struct be_rx_obj *rxo;
620 	int i;
621 	u32 erx_stat;
622 
623 	if (lancer_chip(adapter)) {
624 		populate_lancer_stats(adapter);
625 	} else {
626 		if (BE2_chip(adapter))
627 			populate_be_v0_stats(adapter);
628 		else if (BE3_chip(adapter))
629 			/* for BE3 */
630 			populate_be_v1_stats(adapter);
631 		else
632 			populate_be_v2_stats(adapter);
633 
634 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635 		for_all_rx_queues(adapter, rxo, i) {
636 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637 			populate_erx_stats(adapter, rxo, erx_stat);
638 		}
639 	}
640 }
641 
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643 						struct rtnl_link_stats64 *stats)
644 {
645 	struct be_adapter *adapter = netdev_priv(netdev);
646 	struct be_drv_stats *drvs = &adapter->drv_stats;
647 	struct be_rx_obj *rxo;
648 	struct be_tx_obj *txo;
649 	u64 pkts, bytes;
650 	unsigned int start;
651 	int i;
652 
653 	for_all_rx_queues(adapter, rxo, i) {
654 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
655 
656 		do {
657 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658 			pkts = rx_stats(rxo)->rx_pkts;
659 			bytes = rx_stats(rxo)->rx_bytes;
660 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661 		stats->rx_packets += pkts;
662 		stats->rx_bytes += bytes;
663 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665 					rx_stats(rxo)->rx_drops_no_frags;
666 	}
667 
668 	for_all_tx_queues(adapter, txo, i) {
669 		const struct be_tx_stats *tx_stats = tx_stats(txo);
670 
671 		do {
672 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673 			pkts = tx_stats(txo)->tx_pkts;
674 			bytes = tx_stats(txo)->tx_bytes;
675 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676 		stats->tx_packets += pkts;
677 		stats->tx_bytes += bytes;
678 	}
679 
680 	/* bad pkts received */
681 	stats->rx_errors = drvs->rx_crc_errors +
682 		drvs->rx_alignment_symbol_errors +
683 		drvs->rx_in_range_errors +
684 		drvs->rx_out_range_errors +
685 		drvs->rx_frame_too_long +
686 		drvs->rx_dropped_too_small +
687 		drvs->rx_dropped_too_short +
688 		drvs->rx_dropped_header_too_small +
689 		drvs->rx_dropped_tcp_length +
690 		drvs->rx_dropped_runt;
691 
692 	/* detailed rx errors */
693 	stats->rx_length_errors = drvs->rx_in_range_errors +
694 		drvs->rx_out_range_errors +
695 		drvs->rx_frame_too_long;
696 
697 	stats->rx_crc_errors = drvs->rx_crc_errors;
698 
699 	/* frame alignment errors */
700 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701 
702 	/* receiver fifo overrun */
703 	/* drops_no_pbuf is no per i/f, it's per BE card */
704 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705 				drvs->rx_input_fifo_overflow_drop +
706 				drvs->rx_drops_no_pbuf;
707 	return stats;
708 }
709 
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712 	struct net_device *netdev = adapter->netdev;
713 
714 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715 		netif_carrier_off(netdev);
716 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717 	}
718 
719 	if (link_status)
720 		netif_carrier_on(netdev);
721 	else
722 		netif_carrier_off(netdev);
723 
724 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726 
727 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
728 {
729 	struct be_tx_stats *stats = tx_stats(txo);
730 	u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
731 
732 	u64_stats_update_begin(&stats->sync);
733 	stats->tx_reqs++;
734 	stats->tx_bytes += skb->len;
735 	stats->tx_pkts += tx_pkts;
736 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
737 		stats->tx_vxlan_offload_pkts += tx_pkts;
738 	u64_stats_update_end(&stats->sync);
739 }
740 
741 /* Returns number of WRBs needed for the skb */
742 static u32 skb_wrb_cnt(struct sk_buff *skb)
743 {
744 	/* +1 for the header wrb */
745 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
746 }
747 
748 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
749 {
750 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
751 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
752 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
753 	wrb->rsvd0 = 0;
754 }
755 
756 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
757  * to avoid the swap and shift/mask operations in wrb_fill().
758  */
759 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
760 {
761 	wrb->frag_pa_hi = 0;
762 	wrb->frag_pa_lo = 0;
763 	wrb->frag_len = 0;
764 	wrb->rsvd0 = 0;
765 }
766 
767 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
768 				     struct sk_buff *skb)
769 {
770 	u8 vlan_prio;
771 	u16 vlan_tag;
772 
773 	vlan_tag = skb_vlan_tag_get(skb);
774 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
775 	/* If vlan priority provided by OS is NOT in available bmap */
776 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
777 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
778 				adapter->recommended_prio_bits;
779 
780 	return vlan_tag;
781 }
782 
783 /* Used only for IP tunnel packets */
784 static u16 skb_inner_ip_proto(struct sk_buff *skb)
785 {
786 	return (inner_ip_hdr(skb)->version == 4) ?
787 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
788 }
789 
790 static u16 skb_ip_proto(struct sk_buff *skb)
791 {
792 	return (ip_hdr(skb)->version == 4) ?
793 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
794 }
795 
796 static inline bool be_is_txq_full(struct be_tx_obj *txo)
797 {
798 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
799 }
800 
801 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
802 {
803 	return atomic_read(&txo->q.used) < txo->q.len / 2;
804 }
805 
806 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
807 {
808 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
809 }
810 
811 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
812 				       struct sk_buff *skb,
813 				       struct be_wrb_params *wrb_params)
814 {
815 	u16 proto;
816 
817 	if (skb_is_gso(skb)) {
818 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
819 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
820 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
821 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
822 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
823 		if (skb->encapsulation) {
824 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
825 			proto = skb_inner_ip_proto(skb);
826 		} else {
827 			proto = skb_ip_proto(skb);
828 		}
829 		if (proto == IPPROTO_TCP)
830 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
831 		else if (proto == IPPROTO_UDP)
832 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
833 	}
834 
835 	if (skb_vlan_tag_present(skb)) {
836 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
837 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
838 	}
839 
840 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
841 }
842 
843 static void wrb_fill_hdr(struct be_adapter *adapter,
844 			 struct be_eth_hdr_wrb *hdr,
845 			 struct be_wrb_params *wrb_params,
846 			 struct sk_buff *skb)
847 {
848 	memset(hdr, 0, sizeof(*hdr));
849 
850 	SET_TX_WRB_HDR_BITS(crc, hdr,
851 			    BE_WRB_F_GET(wrb_params->features, CRC));
852 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
853 			    BE_WRB_F_GET(wrb_params->features, IPCS));
854 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
855 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
856 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
857 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
858 
859 	SET_TX_WRB_HDR_BITS(lso, hdr,
860 			    BE_WRB_F_GET(wrb_params->features, LSO));
861 	SET_TX_WRB_HDR_BITS(lso6, hdr,
862 			    BE_WRB_F_GET(wrb_params->features, LSO6));
863 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
864 
865 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
866 	 * hack is not needed, the evt bit is set while ringing DB.
867 	 */
868 	SET_TX_WRB_HDR_BITS(event, hdr,
869 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
870 	SET_TX_WRB_HDR_BITS(vlan, hdr,
871 			    BE_WRB_F_GET(wrb_params->features, VLAN));
872 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
873 
874 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
875 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
876 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
878 }
879 
880 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
881 			  bool unmap_single)
882 {
883 	dma_addr_t dma;
884 	u32 frag_len = le32_to_cpu(wrb->frag_len);
885 
886 
887 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
888 		(u64)le32_to_cpu(wrb->frag_pa_lo);
889 	if (frag_len) {
890 		if (unmap_single)
891 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
892 		else
893 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
894 	}
895 }
896 
897 /* Grab a WRB header for xmit */
898 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
899 {
900 	u32 head = txo->q.head;
901 
902 	queue_head_inc(&txo->q);
903 	return head;
904 }
905 
906 /* Set up the WRB header for xmit */
907 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
908 				struct be_tx_obj *txo,
909 				struct be_wrb_params *wrb_params,
910 				struct sk_buff *skb, u16 head)
911 {
912 	u32 num_frags = skb_wrb_cnt(skb);
913 	struct be_queue_info *txq = &txo->q;
914 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
915 
916 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
917 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
918 
919 	BUG_ON(txo->sent_skb_list[head]);
920 	txo->sent_skb_list[head] = skb;
921 	txo->last_req_hdr = head;
922 	atomic_add(num_frags, &txq->used);
923 	txo->last_req_wrb_cnt = num_frags;
924 	txo->pend_wrb_cnt += num_frags;
925 }
926 
927 /* Setup a WRB fragment (buffer descriptor) for xmit */
928 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
929 				 int len)
930 {
931 	struct be_eth_wrb *wrb;
932 	struct be_queue_info *txq = &txo->q;
933 
934 	wrb = queue_head_node(txq);
935 	wrb_fill(wrb, busaddr, len);
936 	queue_head_inc(txq);
937 }
938 
939 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
940  * was invoked. The producer index is restored to the previous packet and the
941  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
942  */
943 static void be_xmit_restore(struct be_adapter *adapter,
944 			    struct be_tx_obj *txo, u32 head, bool map_single,
945 			    u32 copied)
946 {
947 	struct device *dev;
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	dev = &adapter->pdev->dev;
952 	txq->head = head;
953 
954 	/* skip the first wrb (hdr); it's not mapped */
955 	queue_head_inc(txq);
956 	while (copied) {
957 		wrb = queue_head_node(txq);
958 		unmap_tx_frag(dev, wrb, map_single);
959 		map_single = false;
960 		copied -= le32_to_cpu(wrb->frag_len);
961 		queue_head_inc(txq);
962 	}
963 
964 	txq->head = head;
965 }
966 
967 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
968  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
969  * of WRBs used up by the packet.
970  */
971 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
972 			   struct sk_buff *skb,
973 			   struct be_wrb_params *wrb_params)
974 {
975 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
976 	struct device *dev = &adapter->pdev->dev;
977 	struct be_queue_info *txq = &txo->q;
978 	bool map_single = false;
979 	u32 head = txq->head;
980 	dma_addr_t busaddr;
981 	int len;
982 
983 	head = be_tx_get_wrb_hdr(txo);
984 
985 	if (skb->len > skb->data_len) {
986 		len = skb_headlen(skb);
987 
988 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
989 		if (dma_mapping_error(dev, busaddr))
990 			goto dma_err;
991 		map_single = true;
992 		be_tx_setup_wrb_frag(txo, busaddr, len);
993 		copied += len;
994 	}
995 
996 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
997 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
998 		len = skb_frag_size(frag);
999 
1000 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1001 		if (dma_mapping_error(dev, busaddr))
1002 			goto dma_err;
1003 		be_tx_setup_wrb_frag(txo, busaddr, len);
1004 		copied += len;
1005 	}
1006 
1007 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1008 
1009 	be_tx_stats_update(txo, skb);
1010 	return wrb_cnt;
1011 
1012 dma_err:
1013 	adapter->drv_stats.dma_map_errors++;
1014 	be_xmit_restore(adapter, txo, head, map_single, copied);
1015 	return 0;
1016 }
1017 
1018 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1019 {
1020 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1021 }
1022 
1023 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1024 					     struct sk_buff *skb,
1025 					     struct be_wrb_params
1026 					     *wrb_params)
1027 {
1028 	u16 vlan_tag = 0;
1029 
1030 	skb = skb_share_check(skb, GFP_ATOMIC);
1031 	if (unlikely(!skb))
1032 		return skb;
1033 
1034 	if (skb_vlan_tag_present(skb))
1035 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1036 
1037 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1038 		if (!vlan_tag)
1039 			vlan_tag = adapter->pvid;
1040 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1041 		 * skip VLAN insertion
1042 		 */
1043 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1044 	}
1045 
1046 	if (vlan_tag) {
1047 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1048 						vlan_tag);
1049 		if (unlikely(!skb))
1050 			return skb;
1051 		skb->vlan_tci = 0;
1052 	}
1053 
1054 	/* Insert the outer VLAN, if any */
1055 	if (adapter->qnq_vid) {
1056 		vlan_tag = adapter->qnq_vid;
1057 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058 						vlan_tag);
1059 		if (unlikely(!skb))
1060 			return skb;
1061 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062 	}
1063 
1064 	return skb;
1065 }
1066 
1067 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1068 {
1069 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1070 	u16 offset = ETH_HLEN;
1071 
1072 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1073 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1074 
1075 		offset += sizeof(struct ipv6hdr);
1076 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1077 		    ip6h->nexthdr != NEXTHDR_UDP) {
1078 			struct ipv6_opt_hdr *ehdr =
1079 				(struct ipv6_opt_hdr *)(skb->data + offset);
1080 
1081 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1082 			if (ehdr->hdrlen == 0xff)
1083 				return true;
1084 		}
1085 	}
1086 	return false;
1087 }
1088 
1089 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1090 {
1091 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1092 }
1093 
1094 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1095 {
1096 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1097 }
1098 
1099 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1100 						  struct sk_buff *skb,
1101 						  struct be_wrb_params
1102 						  *wrb_params)
1103 {
1104 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1105 	unsigned int eth_hdr_len;
1106 	struct iphdr *ip;
1107 
1108 	/* For padded packets, BE HW modifies tot_len field in IP header
1109 	 * incorrecly when VLAN tag is inserted by HW.
1110 	 * For padded packets, Lancer computes incorrect checksum.
1111 	 */
1112 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1113 						VLAN_ETH_HLEN : ETH_HLEN;
1114 	if (skb->len <= 60 &&
1115 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1116 	    is_ipv4_pkt(skb)) {
1117 		ip = (struct iphdr *)ip_hdr(skb);
1118 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1119 	}
1120 
1121 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1122 	 * tagging in pvid-tagging mode
1123 	 */
1124 	if (be_pvid_tagging_enabled(adapter) &&
1125 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1126 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1127 
1128 	/* HW has a bug wherein it will calculate CSUM for VLAN
1129 	 * pkts even though it is disabled.
1130 	 * Manually insert VLAN in pkt.
1131 	 */
1132 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1133 	    skb_vlan_tag_present(skb)) {
1134 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1135 		if (unlikely(!skb))
1136 			goto err;
1137 	}
1138 
1139 	/* HW may lockup when VLAN HW tagging is requested on
1140 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1141 	 * skip HW tagging is not enabled by FW.
1142 	 */
1143 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1144 		     (adapter->pvid || adapter->qnq_vid) &&
1145 		     !qnq_async_evt_rcvd(adapter)))
1146 		goto tx_drop;
1147 
1148 	/* Manual VLAN tag insertion to prevent:
1149 	 * ASIC lockup when the ASIC inserts VLAN tag into
1150 	 * certain ipv6 packets. Insert VLAN tags in driver,
1151 	 * and set event, completion, vlan bits accordingly
1152 	 * in the Tx WRB.
1153 	 */
1154 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1155 	    be_vlan_tag_tx_chk(adapter, skb)) {
1156 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1157 		if (unlikely(!skb))
1158 			goto err;
1159 	}
1160 
1161 	return skb;
1162 tx_drop:
1163 	dev_kfree_skb_any(skb);
1164 err:
1165 	return NULL;
1166 }
1167 
1168 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1169 					   struct sk_buff *skb,
1170 					   struct be_wrb_params *wrb_params)
1171 {
1172 	int err;
1173 
1174 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1175 	 * packets that are 32b or less may cause a transmit stall
1176 	 * on that port. The workaround is to pad such packets
1177 	 * (len <= 32 bytes) to a minimum length of 36b.
1178 	 */
1179 	if (skb->len <= 32) {
1180 		if (skb_put_padto(skb, 36))
1181 			return NULL;
1182 	}
1183 
1184 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1185 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1186 		if (!skb)
1187 			return NULL;
1188 	}
1189 
1190 	/* The stack can send us skbs with length greater than
1191 	 * what the HW can handle. Trim the extra bytes.
1192 	 */
1193 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1194 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1195 	WARN_ON(err);
1196 
1197 	return skb;
1198 }
1199 
1200 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1201 {
1202 	struct be_queue_info *txq = &txo->q;
1203 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1204 
1205 	/* Mark the last request eventable if it hasn't been marked already */
1206 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1207 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1208 
1209 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1210 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1211 		wrb_fill_dummy(queue_head_node(txq));
1212 		queue_head_inc(txq);
1213 		atomic_inc(&txq->used);
1214 		txo->pend_wrb_cnt++;
1215 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1216 					   TX_HDR_WRB_NUM_SHIFT);
1217 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1218 					  TX_HDR_WRB_NUM_SHIFT);
1219 	}
1220 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1221 	txo->pend_wrb_cnt = 0;
1222 }
1223 
1224 /* OS2BMC related */
1225 
1226 #define DHCP_CLIENT_PORT	68
1227 #define DHCP_SERVER_PORT	67
1228 #define NET_BIOS_PORT1		137
1229 #define NET_BIOS_PORT2		138
1230 #define DHCPV6_RAS_PORT		547
1231 
1232 #define is_mc_allowed_on_bmc(adapter, eh)	\
1233 	(!is_multicast_filt_enabled(adapter) &&	\
1234 	 is_multicast_ether_addr(eh->h_dest) &&	\
1235 	 !is_broadcast_ether_addr(eh->h_dest))
1236 
1237 #define is_bc_allowed_on_bmc(adapter, eh)	\
1238 	(!is_broadcast_filt_enabled(adapter) &&	\
1239 	 is_broadcast_ether_addr(eh->h_dest))
1240 
1241 #define is_arp_allowed_on_bmc(adapter, skb)	\
1242 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1243 
1244 #define is_broadcast_packet(eh, adapter)	\
1245 		(is_multicast_ether_addr(eh->h_dest) && \
1246 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1247 
1248 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1249 
1250 #define is_arp_filt_enabled(adapter)	\
1251 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1252 
1253 #define is_dhcp_client_filt_enabled(adapter)	\
1254 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1255 
1256 #define is_dhcp_srvr_filt_enabled(adapter)	\
1257 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1258 
1259 #define is_nbios_filt_enabled(adapter)	\
1260 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1261 
1262 #define is_ipv6_na_filt_enabled(adapter)	\
1263 		(adapter->bmc_filt_mask &	\
1264 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1265 
1266 #define is_ipv6_ra_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1268 
1269 #define is_ipv6_ras_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1271 
1272 #define is_broadcast_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1274 
1275 #define is_multicast_filt_enabled(adapter)	\
1276 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1277 
1278 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1279 			       struct sk_buff **skb)
1280 {
1281 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1282 	bool os2bmc = false;
1283 
1284 	if (!be_is_os2bmc_enabled(adapter))
1285 		goto done;
1286 
1287 	if (!is_multicast_ether_addr(eh->h_dest))
1288 		goto done;
1289 
1290 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1291 	    is_bc_allowed_on_bmc(adapter, eh) ||
1292 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1293 		os2bmc = true;
1294 		goto done;
1295 	}
1296 
1297 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1298 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1299 		u8 nexthdr = hdr->nexthdr;
1300 
1301 		if (nexthdr == IPPROTO_ICMPV6) {
1302 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1303 
1304 			switch (icmp6->icmp6_type) {
1305 			case NDISC_ROUTER_ADVERTISEMENT:
1306 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1307 				goto done;
1308 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1309 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1310 				goto done;
1311 			default:
1312 				break;
1313 			}
1314 		}
1315 	}
1316 
1317 	if (is_udp_pkt((*skb))) {
1318 		struct udphdr *udp = udp_hdr((*skb));
1319 
1320 		switch (ntohs(udp->dest)) {
1321 		case DHCP_CLIENT_PORT:
1322 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1323 			goto done;
1324 		case DHCP_SERVER_PORT:
1325 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1326 			goto done;
1327 		case NET_BIOS_PORT1:
1328 		case NET_BIOS_PORT2:
1329 			os2bmc = is_nbios_filt_enabled(adapter);
1330 			goto done;
1331 		case DHCPV6_RAS_PORT:
1332 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1333 			goto done;
1334 		default:
1335 			break;
1336 		}
1337 	}
1338 done:
1339 	/* For packets over a vlan, which are destined
1340 	 * to BMC, asic expects the vlan to be inline in the packet.
1341 	 */
1342 	if (os2bmc)
1343 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1344 
1345 	return os2bmc;
1346 }
1347 
1348 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1349 {
1350 	struct be_adapter *adapter = netdev_priv(netdev);
1351 	u16 q_idx = skb_get_queue_mapping(skb);
1352 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1353 	struct be_wrb_params wrb_params = { 0 };
1354 	bool flush = !skb->xmit_more;
1355 	u16 wrb_cnt;
1356 
1357 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1358 	if (unlikely(!skb))
1359 		goto drop;
1360 
1361 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1362 
1363 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1364 	if (unlikely(!wrb_cnt)) {
1365 		dev_kfree_skb_any(skb);
1366 		goto drop;
1367 	}
1368 
1369 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1370 	 * enqueue the pkt a 2nd time with mgmt bit set.
1371 	 */
1372 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1373 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1374 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1375 		if (unlikely(!wrb_cnt))
1376 			goto drop;
1377 		else
1378 			skb_get(skb);
1379 	}
1380 
1381 	if (be_is_txq_full(txo)) {
1382 		netif_stop_subqueue(netdev, q_idx);
1383 		tx_stats(txo)->tx_stops++;
1384 	}
1385 
1386 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1387 		be_xmit_flush(adapter, txo);
1388 
1389 	return NETDEV_TX_OK;
1390 drop:
1391 	tx_stats(txo)->tx_drv_drops++;
1392 	/* Flush the already enqueued tx requests */
1393 	if (flush && txo->pend_wrb_cnt)
1394 		be_xmit_flush(adapter, txo);
1395 
1396 	return NETDEV_TX_OK;
1397 }
1398 
1399 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1400 {
1401 	struct be_adapter *adapter = netdev_priv(netdev);
1402 	struct device *dev = &adapter->pdev->dev;
1403 
1404 	if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1405 		dev_info(dev, "MTU must be between %d and %d bytes\n",
1406 			 BE_MIN_MTU, BE_MAX_MTU);
1407 		return -EINVAL;
1408 	}
1409 
1410 	dev_info(dev, "MTU changed from %d to %d bytes\n",
1411 		 netdev->mtu, new_mtu);
1412 	netdev->mtu = new_mtu;
1413 	return 0;
1414 }
1415 
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421 
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424 	struct device *dev = &adapter->pdev->dev;
1425 	int status;
1426 
1427 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428 		return 0;
1429 
1430 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431 	if (!status) {
1432 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434 	} else {
1435 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436 	}
1437 	return status;
1438 }
1439 
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442 	struct device *dev = &adapter->pdev->dev;
1443 	int status;
1444 
1445 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446 	if (!status) {
1447 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449 	}
1450 	return status;
1451 }
1452 
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459 	struct device *dev = &adapter->pdev->dev;
1460 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1461 	u16 num = 0, i = 0;
1462 	int status = 0;
1463 
1464 	/* No need to change the VLAN state if the I/F is in promiscuous */
1465 	if (adapter->netdev->flags & IFF_PROMISC)
1466 		return 0;
1467 
1468 	if (adapter->vlans_added > be_max_vlans(adapter))
1469 		return be_set_vlan_promisc(adapter);
1470 
1471 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472 		status = be_clear_vlan_promisc(adapter);
1473 		if (status)
1474 			return status;
1475 	}
1476 	/* Construct VLAN Table to give to HW */
1477 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478 		vids[num++] = cpu_to_le16(i);
1479 
1480 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481 	if (status) {
1482 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1483 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1484 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485 		    addl_status(status) ==
1486 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487 			return be_set_vlan_promisc(adapter);
1488 	}
1489 	return status;
1490 }
1491 
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494 	struct be_adapter *adapter = netdev_priv(netdev);
1495 	int status = 0;
1496 
1497 	mutex_lock(&adapter->rx_filter_lock);
1498 
1499 	/* Packets with VID 0 are always received by Lancer by default */
1500 	if (lancer_chip(adapter) && vid == 0)
1501 		goto done;
1502 
1503 	if (test_bit(vid, adapter->vids))
1504 		goto done;
1505 
1506 	set_bit(vid, adapter->vids);
1507 	adapter->vlans_added++;
1508 
1509 	status = be_vid_config(adapter);
1510 done:
1511 	mutex_unlock(&adapter->rx_filter_lock);
1512 	return status;
1513 }
1514 
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517 	struct be_adapter *adapter = netdev_priv(netdev);
1518 	int status = 0;
1519 
1520 	mutex_lock(&adapter->rx_filter_lock);
1521 
1522 	/* Packets with VID 0 are always received by Lancer by default */
1523 	if (lancer_chip(adapter) && vid == 0)
1524 		goto done;
1525 
1526 	if (!test_bit(vid, adapter->vids))
1527 		goto done;
1528 
1529 	clear_bit(vid, adapter->vids);
1530 	adapter->vlans_added--;
1531 
1532 	status = be_vid_config(adapter);
1533 done:
1534 	mutex_unlock(&adapter->rx_filter_lock);
1535 	return status;
1536 }
1537 
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543 
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546 	int status;
1547 
1548 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549 		return;
1550 
1551 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552 	if (!status)
1553 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555 
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558 	int status;
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561 		return;
1562 
1563 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564 	if (!status)
1565 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567 
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570 	int status;
1571 
1572 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573 		return;
1574 
1575 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576 	if (!status)
1577 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579 
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586 			     const unsigned char *addr)
1587 {
1588 	struct be_adapter *adapter = netdev_priv(netdev);
1589 
1590 	adapter->update_uc_list = true;
1591 	return 0;
1592 }
1593 
1594 static int be_mc_list_update(struct net_device *netdev,
1595 			     const unsigned char *addr)
1596 {
1597 	struct be_adapter *adapter = netdev_priv(netdev);
1598 
1599 	adapter->update_mc_list = true;
1600 	return 0;
1601 }
1602 
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605 	struct net_device *netdev = adapter->netdev;
1606 	struct netdev_hw_addr *ha;
1607 	bool mc_promisc = false;
1608 	int status;
1609 
1610 	netif_addr_lock_bh(netdev);
1611 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612 
1613 	if (netdev->flags & IFF_PROMISC) {
1614 		adapter->update_mc_list = false;
1615 	} else if (netdev->flags & IFF_ALLMULTI ||
1616 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617 		/* Enable multicast promisc if num configured exceeds
1618 		 * what we support
1619 		 */
1620 		mc_promisc = true;
1621 		adapter->update_mc_list = false;
1622 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623 		/* Update mc-list unconditionally if the iface was previously
1624 		 * in mc-promisc mode and now is out of that mode.
1625 		 */
1626 		adapter->update_mc_list = true;
1627 	}
1628 
1629 	if (adapter->update_mc_list) {
1630 		int i = 0;
1631 
1632 		/* cache the mc-list in adapter */
1633 		netdev_for_each_mc_addr(ha, netdev) {
1634 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635 			i++;
1636 		}
1637 		adapter->mc_count = netdev_mc_count(netdev);
1638 	}
1639 	netif_addr_unlock_bh(netdev);
1640 
1641 	if (mc_promisc) {
1642 		be_set_mc_promisc(adapter);
1643 	} else if (adapter->update_mc_list) {
1644 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645 		if (!status)
1646 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647 		else
1648 			be_set_mc_promisc(adapter);
1649 
1650 		adapter->update_mc_list = false;
1651 	}
1652 }
1653 
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656 	struct net_device *netdev = adapter->netdev;
1657 
1658 	__dev_mc_unsync(netdev, NULL);
1659 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660 	adapter->mc_count = 0;
1661 }
1662 
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665 	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666 			     adapter->dev_mac)) {
1667 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668 		return 0;
1669 	}
1670 
1671 	return be_cmd_pmac_add(adapter,
1672 			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1673 			       adapter->if_handle,
1674 			       &adapter->pmac_id[uc_idx + 1], 0);
1675 }
1676 
1677 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1678 {
1679 	if (pmac_id == adapter->pmac_id[0])
1680 		return;
1681 
1682 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1683 }
1684 
1685 static void be_set_uc_list(struct be_adapter *adapter)
1686 {
1687 	struct net_device *netdev = adapter->netdev;
1688 	struct netdev_hw_addr *ha;
1689 	bool uc_promisc = false;
1690 	int curr_uc_macs = 0, i;
1691 
1692 	netif_addr_lock_bh(netdev);
1693 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1694 
1695 	if (netdev->flags & IFF_PROMISC) {
1696 		adapter->update_uc_list = false;
1697 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1698 		uc_promisc = true;
1699 		adapter->update_uc_list = false;
1700 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1701 		/* Update uc-list unconditionally if the iface was previously
1702 		 * in uc-promisc mode and now is out of that mode.
1703 		 */
1704 		adapter->update_uc_list = true;
1705 	}
1706 
1707 	if (adapter->update_uc_list) {
1708 		i = 1; /* First slot is claimed by the Primary MAC */
1709 
1710 		/* cache the uc-list in adapter array */
1711 		netdev_for_each_uc_addr(ha, netdev) {
1712 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1713 			i++;
1714 		}
1715 		curr_uc_macs = netdev_uc_count(netdev);
1716 	}
1717 	netif_addr_unlock_bh(netdev);
1718 
1719 	if (uc_promisc) {
1720 		be_set_uc_promisc(adapter);
1721 	} else if (adapter->update_uc_list) {
1722 		be_clear_uc_promisc(adapter);
1723 
1724 		for (i = 0; i < adapter->uc_macs; i++)
1725 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1726 
1727 		for (i = 0; i < curr_uc_macs; i++)
1728 			be_uc_mac_add(adapter, i);
1729 		adapter->uc_macs = curr_uc_macs;
1730 		adapter->update_uc_list = false;
1731 	}
1732 }
1733 
1734 static void be_clear_uc_list(struct be_adapter *adapter)
1735 {
1736 	struct net_device *netdev = adapter->netdev;
1737 	int i;
1738 
1739 	__dev_uc_unsync(netdev, NULL);
1740 	for (i = 0; i < adapter->uc_macs; i++)
1741 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1742 
1743 	adapter->uc_macs = 0;
1744 }
1745 
1746 static void __be_set_rx_mode(struct be_adapter *adapter)
1747 {
1748 	struct net_device *netdev = adapter->netdev;
1749 
1750 	mutex_lock(&adapter->rx_filter_lock);
1751 
1752 	if (netdev->flags & IFF_PROMISC) {
1753 		if (!be_in_all_promisc(adapter))
1754 			be_set_all_promisc(adapter);
1755 	} else if (be_in_all_promisc(adapter)) {
1756 		/* We need to re-program the vlan-list or clear
1757 		 * vlan-promisc mode (if needed) when the interface
1758 		 * comes out of promisc mode.
1759 		 */
1760 		be_vid_config(adapter);
1761 	}
1762 
1763 	be_set_uc_list(adapter);
1764 	be_set_mc_list(adapter);
1765 
1766 	mutex_unlock(&adapter->rx_filter_lock);
1767 }
1768 
1769 static void be_work_set_rx_mode(struct work_struct *work)
1770 {
1771 	struct be_cmd_work *cmd_work =
1772 				container_of(work, struct be_cmd_work, work);
1773 
1774 	__be_set_rx_mode(cmd_work->adapter);
1775 	kfree(cmd_work);
1776 }
1777 
1778 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1779 {
1780 	struct be_adapter *adapter = netdev_priv(netdev);
1781 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1782 	int status;
1783 
1784 	if (!sriov_enabled(adapter))
1785 		return -EPERM;
1786 
1787 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1788 		return -EINVAL;
1789 
1790 	/* Proceed further only if user provided MAC is different
1791 	 * from active MAC
1792 	 */
1793 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1794 		return 0;
1795 
1796 	if (BEx_chip(adapter)) {
1797 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1798 				vf + 1);
1799 
1800 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1801 					 &vf_cfg->pmac_id, vf + 1);
1802 	} else {
1803 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1804 					vf + 1);
1805 	}
1806 
1807 	if (status) {
1808 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1809 			mac, vf, status);
1810 		return be_cmd_status(status);
1811 	}
1812 
1813 	ether_addr_copy(vf_cfg->mac_addr, mac);
1814 
1815 	return 0;
1816 }
1817 
1818 static int be_get_vf_config(struct net_device *netdev, int vf,
1819 			    struct ifla_vf_info *vi)
1820 {
1821 	struct be_adapter *adapter = netdev_priv(netdev);
1822 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1823 
1824 	if (!sriov_enabled(adapter))
1825 		return -EPERM;
1826 
1827 	if (vf >= adapter->num_vfs)
1828 		return -EINVAL;
1829 
1830 	vi->vf = vf;
1831 	vi->max_tx_rate = vf_cfg->tx_rate;
1832 	vi->min_tx_rate = 0;
1833 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1834 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1835 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1836 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1837 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1838 
1839 	return 0;
1840 }
1841 
1842 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1843 {
1844 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1845 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1846 	int vf_if_id = vf_cfg->if_handle;
1847 	int status;
1848 
1849 	/* Enable Transparent VLAN Tagging */
1850 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1851 	if (status)
1852 		return status;
1853 
1854 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1855 	vids[0] = 0;
1856 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1857 	if (!status)
1858 		dev_info(&adapter->pdev->dev,
1859 			 "Cleared guest VLANs on VF%d", vf);
1860 
1861 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1862 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1863 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1864 						  ~BE_PRIV_FILTMGMT, vf + 1);
1865 		if (!status)
1866 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1867 	}
1868 	return 0;
1869 }
1870 
1871 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1872 {
1873 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1874 	struct device *dev = &adapter->pdev->dev;
1875 	int status;
1876 
1877 	/* Reset Transparent VLAN Tagging. */
1878 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1879 				       vf_cfg->if_handle, 0, 0);
1880 	if (status)
1881 		return status;
1882 
1883 	/* Allow VFs to program VLAN filtering */
1884 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1885 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1886 						  BE_PRIV_FILTMGMT, vf + 1);
1887 		if (!status) {
1888 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1889 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1890 		}
1891 	}
1892 
1893 	dev_info(dev,
1894 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1895 	return 0;
1896 }
1897 
1898 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1899 {
1900 	struct be_adapter *adapter = netdev_priv(netdev);
1901 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1902 	int status;
1903 
1904 	if (!sriov_enabled(adapter))
1905 		return -EPERM;
1906 
1907 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1908 		return -EINVAL;
1909 
1910 	if (vlan || qos) {
1911 		vlan |= qos << VLAN_PRIO_SHIFT;
1912 		status = be_set_vf_tvt(adapter, vf, vlan);
1913 	} else {
1914 		status = be_clear_vf_tvt(adapter, vf);
1915 	}
1916 
1917 	if (status) {
1918 		dev_err(&adapter->pdev->dev,
1919 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1920 			status);
1921 		return be_cmd_status(status);
1922 	}
1923 
1924 	vf_cfg->vlan_tag = vlan;
1925 	return 0;
1926 }
1927 
1928 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1929 			     int min_tx_rate, int max_tx_rate)
1930 {
1931 	struct be_adapter *adapter = netdev_priv(netdev);
1932 	struct device *dev = &adapter->pdev->dev;
1933 	int percent_rate, status = 0;
1934 	u16 link_speed = 0;
1935 	u8 link_status;
1936 
1937 	if (!sriov_enabled(adapter))
1938 		return -EPERM;
1939 
1940 	if (vf >= adapter->num_vfs)
1941 		return -EINVAL;
1942 
1943 	if (min_tx_rate)
1944 		return -EINVAL;
1945 
1946 	if (!max_tx_rate)
1947 		goto config_qos;
1948 
1949 	status = be_cmd_link_status_query(adapter, &link_speed,
1950 					  &link_status, 0);
1951 	if (status)
1952 		goto err;
1953 
1954 	if (!link_status) {
1955 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1956 		status = -ENETDOWN;
1957 		goto err;
1958 	}
1959 
1960 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1961 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1962 			link_speed);
1963 		status = -EINVAL;
1964 		goto err;
1965 	}
1966 
1967 	/* On Skyhawk the QOS setting must be done only as a % value */
1968 	percent_rate = link_speed / 100;
1969 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1970 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1971 			percent_rate);
1972 		status = -EINVAL;
1973 		goto err;
1974 	}
1975 
1976 config_qos:
1977 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1978 	if (status)
1979 		goto err;
1980 
1981 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1982 	return 0;
1983 
1984 err:
1985 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1986 		max_tx_rate, vf);
1987 	return be_cmd_status(status);
1988 }
1989 
1990 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1991 				int link_state)
1992 {
1993 	struct be_adapter *adapter = netdev_priv(netdev);
1994 	int status;
1995 
1996 	if (!sriov_enabled(adapter))
1997 		return -EPERM;
1998 
1999 	if (vf >= adapter->num_vfs)
2000 		return -EINVAL;
2001 
2002 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2003 	if (status) {
2004 		dev_err(&adapter->pdev->dev,
2005 			"Link state change on VF %d failed: %#x\n", vf, status);
2006 		return be_cmd_status(status);
2007 	}
2008 
2009 	adapter->vf_cfg[vf].plink_tracking = link_state;
2010 
2011 	return 0;
2012 }
2013 
2014 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2015 {
2016 	struct be_adapter *adapter = netdev_priv(netdev);
2017 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2018 	u8 spoofchk;
2019 	int status;
2020 
2021 	if (!sriov_enabled(adapter))
2022 		return -EPERM;
2023 
2024 	if (vf >= adapter->num_vfs)
2025 		return -EINVAL;
2026 
2027 	if (BEx_chip(adapter))
2028 		return -EOPNOTSUPP;
2029 
2030 	if (enable == vf_cfg->spoofchk)
2031 		return 0;
2032 
2033 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2034 
2035 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2036 				       0, spoofchk);
2037 	if (status) {
2038 		dev_err(&adapter->pdev->dev,
2039 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2040 		return be_cmd_status(status);
2041 	}
2042 
2043 	vf_cfg->spoofchk = enable;
2044 	return 0;
2045 }
2046 
2047 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2048 			  ulong now)
2049 {
2050 	aic->rx_pkts_prev = rx_pkts;
2051 	aic->tx_reqs_prev = tx_pkts;
2052 	aic->jiffies = now;
2053 }
2054 
2055 static int be_get_new_eqd(struct be_eq_obj *eqo)
2056 {
2057 	struct be_adapter *adapter = eqo->adapter;
2058 	int eqd, start;
2059 	struct be_aic_obj *aic;
2060 	struct be_rx_obj *rxo;
2061 	struct be_tx_obj *txo;
2062 	u64 rx_pkts = 0, tx_pkts = 0;
2063 	ulong now;
2064 	u32 pps, delta;
2065 	int i;
2066 
2067 	aic = &adapter->aic_obj[eqo->idx];
2068 	if (!aic->enable) {
2069 		if (aic->jiffies)
2070 			aic->jiffies = 0;
2071 		eqd = aic->et_eqd;
2072 		return eqd;
2073 	}
2074 
2075 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2076 		do {
2077 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2078 			rx_pkts += rxo->stats.rx_pkts;
2079 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2080 	}
2081 
2082 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2083 		do {
2084 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2085 			tx_pkts += txo->stats.tx_reqs;
2086 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2087 	}
2088 
2089 	/* Skip, if wrapped around or first calculation */
2090 	now = jiffies;
2091 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2092 	    rx_pkts < aic->rx_pkts_prev ||
2093 	    tx_pkts < aic->tx_reqs_prev) {
2094 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2095 		return aic->prev_eqd;
2096 	}
2097 
2098 	delta = jiffies_to_msecs(now - aic->jiffies);
2099 	if (delta == 0)
2100 		return aic->prev_eqd;
2101 
2102 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2103 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2104 	eqd = (pps / 15000) << 2;
2105 
2106 	if (eqd < 8)
2107 		eqd = 0;
2108 	eqd = min_t(u32, eqd, aic->max_eqd);
2109 	eqd = max_t(u32, eqd, aic->min_eqd);
2110 
2111 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2112 
2113 	return eqd;
2114 }
2115 
2116 /* For Skyhawk-R only */
2117 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2118 {
2119 	struct be_adapter *adapter = eqo->adapter;
2120 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2121 	ulong now = jiffies;
2122 	int eqd;
2123 	u32 mult_enc;
2124 
2125 	if (!aic->enable)
2126 		return 0;
2127 
2128 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2129 		eqd = aic->prev_eqd;
2130 	else
2131 		eqd = be_get_new_eqd(eqo);
2132 
2133 	if (eqd > 100)
2134 		mult_enc = R2I_DLY_ENC_1;
2135 	else if (eqd > 60)
2136 		mult_enc = R2I_DLY_ENC_2;
2137 	else if (eqd > 20)
2138 		mult_enc = R2I_DLY_ENC_3;
2139 	else
2140 		mult_enc = R2I_DLY_ENC_0;
2141 
2142 	aic->prev_eqd = eqd;
2143 
2144 	return mult_enc;
2145 }
2146 
2147 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2148 {
2149 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2150 	struct be_aic_obj *aic;
2151 	struct be_eq_obj *eqo;
2152 	int i, num = 0, eqd;
2153 
2154 	for_all_evt_queues(adapter, eqo, i) {
2155 		aic = &adapter->aic_obj[eqo->idx];
2156 		eqd = be_get_new_eqd(eqo);
2157 		if (force_update || eqd != aic->prev_eqd) {
2158 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2159 			set_eqd[num].eq_id = eqo->q.id;
2160 			aic->prev_eqd = eqd;
2161 			num++;
2162 		}
2163 	}
2164 
2165 	if (num)
2166 		be_cmd_modify_eqd(adapter, set_eqd, num);
2167 }
2168 
2169 static void be_rx_stats_update(struct be_rx_obj *rxo,
2170 			       struct be_rx_compl_info *rxcp)
2171 {
2172 	struct be_rx_stats *stats = rx_stats(rxo);
2173 
2174 	u64_stats_update_begin(&stats->sync);
2175 	stats->rx_compl++;
2176 	stats->rx_bytes += rxcp->pkt_size;
2177 	stats->rx_pkts++;
2178 	if (rxcp->tunneled)
2179 		stats->rx_vxlan_offload_pkts++;
2180 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2181 		stats->rx_mcast_pkts++;
2182 	if (rxcp->err)
2183 		stats->rx_compl_err++;
2184 	u64_stats_update_end(&stats->sync);
2185 }
2186 
2187 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2188 {
2189 	/* L4 checksum is not reliable for non TCP/UDP packets.
2190 	 * Also ignore ipcksm for ipv6 pkts
2191 	 */
2192 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2193 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2194 }
2195 
2196 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2197 {
2198 	struct be_adapter *adapter = rxo->adapter;
2199 	struct be_rx_page_info *rx_page_info;
2200 	struct be_queue_info *rxq = &rxo->q;
2201 	u32 frag_idx = rxq->tail;
2202 
2203 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2204 	BUG_ON(!rx_page_info->page);
2205 
2206 	if (rx_page_info->last_frag) {
2207 		dma_unmap_page(&adapter->pdev->dev,
2208 			       dma_unmap_addr(rx_page_info, bus),
2209 			       adapter->big_page_size, DMA_FROM_DEVICE);
2210 		rx_page_info->last_frag = false;
2211 	} else {
2212 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2213 					dma_unmap_addr(rx_page_info, bus),
2214 					rx_frag_size, DMA_FROM_DEVICE);
2215 	}
2216 
2217 	queue_tail_inc(rxq);
2218 	atomic_dec(&rxq->used);
2219 	return rx_page_info;
2220 }
2221 
2222 /* Throwaway the data in the Rx completion */
2223 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2224 				struct be_rx_compl_info *rxcp)
2225 {
2226 	struct be_rx_page_info *page_info;
2227 	u16 i, num_rcvd = rxcp->num_rcvd;
2228 
2229 	for (i = 0; i < num_rcvd; i++) {
2230 		page_info = get_rx_page_info(rxo);
2231 		put_page(page_info->page);
2232 		memset(page_info, 0, sizeof(*page_info));
2233 	}
2234 }
2235 
2236 /*
2237  * skb_fill_rx_data forms a complete skb for an ether frame
2238  * indicated by rxcp.
2239  */
2240 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2241 			     struct be_rx_compl_info *rxcp)
2242 {
2243 	struct be_rx_page_info *page_info;
2244 	u16 i, j;
2245 	u16 hdr_len, curr_frag_len, remaining;
2246 	u8 *start;
2247 
2248 	page_info = get_rx_page_info(rxo);
2249 	start = page_address(page_info->page) + page_info->page_offset;
2250 	prefetch(start);
2251 
2252 	/* Copy data in the first descriptor of this completion */
2253 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2254 
2255 	skb->len = curr_frag_len;
2256 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2257 		memcpy(skb->data, start, curr_frag_len);
2258 		/* Complete packet has now been moved to data */
2259 		put_page(page_info->page);
2260 		skb->data_len = 0;
2261 		skb->tail += curr_frag_len;
2262 	} else {
2263 		hdr_len = ETH_HLEN;
2264 		memcpy(skb->data, start, hdr_len);
2265 		skb_shinfo(skb)->nr_frags = 1;
2266 		skb_frag_set_page(skb, 0, page_info->page);
2267 		skb_shinfo(skb)->frags[0].page_offset =
2268 					page_info->page_offset + hdr_len;
2269 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2270 				  curr_frag_len - hdr_len);
2271 		skb->data_len = curr_frag_len - hdr_len;
2272 		skb->truesize += rx_frag_size;
2273 		skb->tail += hdr_len;
2274 	}
2275 	page_info->page = NULL;
2276 
2277 	if (rxcp->pkt_size <= rx_frag_size) {
2278 		BUG_ON(rxcp->num_rcvd != 1);
2279 		return;
2280 	}
2281 
2282 	/* More frags present for this completion */
2283 	remaining = rxcp->pkt_size - curr_frag_len;
2284 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2285 		page_info = get_rx_page_info(rxo);
2286 		curr_frag_len = min(remaining, rx_frag_size);
2287 
2288 		/* Coalesce all frags from the same physical page in one slot */
2289 		if (page_info->page_offset == 0) {
2290 			/* Fresh page */
2291 			j++;
2292 			skb_frag_set_page(skb, j, page_info->page);
2293 			skb_shinfo(skb)->frags[j].page_offset =
2294 							page_info->page_offset;
2295 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2296 			skb_shinfo(skb)->nr_frags++;
2297 		} else {
2298 			put_page(page_info->page);
2299 		}
2300 
2301 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2302 		skb->len += curr_frag_len;
2303 		skb->data_len += curr_frag_len;
2304 		skb->truesize += rx_frag_size;
2305 		remaining -= curr_frag_len;
2306 		page_info->page = NULL;
2307 	}
2308 	BUG_ON(j > MAX_SKB_FRAGS);
2309 }
2310 
2311 /* Process the RX completion indicated by rxcp when GRO is disabled */
2312 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2313 				struct be_rx_compl_info *rxcp)
2314 {
2315 	struct be_adapter *adapter = rxo->adapter;
2316 	struct net_device *netdev = adapter->netdev;
2317 	struct sk_buff *skb;
2318 
2319 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2320 	if (unlikely(!skb)) {
2321 		rx_stats(rxo)->rx_drops_no_skbs++;
2322 		be_rx_compl_discard(rxo, rxcp);
2323 		return;
2324 	}
2325 
2326 	skb_fill_rx_data(rxo, skb, rxcp);
2327 
2328 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2329 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2330 	else
2331 		skb_checksum_none_assert(skb);
2332 
2333 	skb->protocol = eth_type_trans(skb, netdev);
2334 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2335 	if (netdev->features & NETIF_F_RXHASH)
2336 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2337 
2338 	skb->csum_level = rxcp->tunneled;
2339 	skb_mark_napi_id(skb, napi);
2340 
2341 	if (rxcp->vlanf)
2342 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2343 
2344 	netif_receive_skb(skb);
2345 }
2346 
2347 /* Process the RX completion indicated by rxcp when GRO is enabled */
2348 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2349 				    struct napi_struct *napi,
2350 				    struct be_rx_compl_info *rxcp)
2351 {
2352 	struct be_adapter *adapter = rxo->adapter;
2353 	struct be_rx_page_info *page_info;
2354 	struct sk_buff *skb = NULL;
2355 	u16 remaining, curr_frag_len;
2356 	u16 i, j;
2357 
2358 	skb = napi_get_frags(napi);
2359 	if (!skb) {
2360 		be_rx_compl_discard(rxo, rxcp);
2361 		return;
2362 	}
2363 
2364 	remaining = rxcp->pkt_size;
2365 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2366 		page_info = get_rx_page_info(rxo);
2367 
2368 		curr_frag_len = min(remaining, rx_frag_size);
2369 
2370 		/* Coalesce all frags from the same physical page in one slot */
2371 		if (i == 0 || page_info->page_offset == 0) {
2372 			/* First frag or Fresh page */
2373 			j++;
2374 			skb_frag_set_page(skb, j, page_info->page);
2375 			skb_shinfo(skb)->frags[j].page_offset =
2376 							page_info->page_offset;
2377 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378 		} else {
2379 			put_page(page_info->page);
2380 		}
2381 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2382 		skb->truesize += rx_frag_size;
2383 		remaining -= curr_frag_len;
2384 		memset(page_info, 0, sizeof(*page_info));
2385 	}
2386 	BUG_ON(j > MAX_SKB_FRAGS);
2387 
2388 	skb_shinfo(skb)->nr_frags = j + 1;
2389 	skb->len = rxcp->pkt_size;
2390 	skb->data_len = rxcp->pkt_size;
2391 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2392 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2393 	if (adapter->netdev->features & NETIF_F_RXHASH)
2394 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2395 
2396 	skb->csum_level = rxcp->tunneled;
2397 
2398 	if (rxcp->vlanf)
2399 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2400 
2401 	napi_gro_frags(napi);
2402 }
2403 
2404 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2405 				 struct be_rx_compl_info *rxcp)
2406 {
2407 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2408 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2409 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2410 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2411 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2412 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2413 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2414 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2415 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2416 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2417 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2418 	if (rxcp->vlanf) {
2419 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2420 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2421 	}
2422 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2423 	rxcp->tunneled =
2424 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2425 }
2426 
2427 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2428 				 struct be_rx_compl_info *rxcp)
2429 {
2430 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2431 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2432 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2433 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2434 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2435 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2436 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2437 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2438 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2439 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2440 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2441 	if (rxcp->vlanf) {
2442 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2443 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2444 	}
2445 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2446 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2447 }
2448 
2449 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2450 {
2451 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2452 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2453 	struct be_adapter *adapter = rxo->adapter;
2454 
2455 	/* For checking the valid bit it is Ok to use either definition as the
2456 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2457 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2458 		return NULL;
2459 
2460 	rmb();
2461 	be_dws_le_to_cpu(compl, sizeof(*compl));
2462 
2463 	if (adapter->be3_native)
2464 		be_parse_rx_compl_v1(compl, rxcp);
2465 	else
2466 		be_parse_rx_compl_v0(compl, rxcp);
2467 
2468 	if (rxcp->ip_frag)
2469 		rxcp->l4_csum = 0;
2470 
2471 	if (rxcp->vlanf) {
2472 		/* In QNQ modes, if qnq bit is not set, then the packet was
2473 		 * tagged only with the transparent outer vlan-tag and must
2474 		 * not be treated as a vlan packet by host
2475 		 */
2476 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2477 			rxcp->vlanf = 0;
2478 
2479 		if (!lancer_chip(adapter))
2480 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2481 
2482 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2483 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2484 			rxcp->vlanf = 0;
2485 	}
2486 
2487 	/* As the compl has been parsed, reset it; we wont touch it again */
2488 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2489 
2490 	queue_tail_inc(&rxo->cq);
2491 	return rxcp;
2492 }
2493 
2494 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2495 {
2496 	u32 order = get_order(size);
2497 
2498 	if (order > 0)
2499 		gfp |= __GFP_COMP;
2500 	return  alloc_pages(gfp, order);
2501 }
2502 
2503 /*
2504  * Allocate a page, split it to fragments of size rx_frag_size and post as
2505  * receive buffers to BE
2506  */
2507 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2508 {
2509 	struct be_adapter *adapter = rxo->adapter;
2510 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2511 	struct be_queue_info *rxq = &rxo->q;
2512 	struct page *pagep = NULL;
2513 	struct device *dev = &adapter->pdev->dev;
2514 	struct be_eth_rx_d *rxd;
2515 	u64 page_dmaaddr = 0, frag_dmaaddr;
2516 	u32 posted, page_offset = 0, notify = 0;
2517 
2518 	page_info = &rxo->page_info_tbl[rxq->head];
2519 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2520 		if (!pagep) {
2521 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2522 			if (unlikely(!pagep)) {
2523 				rx_stats(rxo)->rx_post_fail++;
2524 				break;
2525 			}
2526 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2527 						    adapter->big_page_size,
2528 						    DMA_FROM_DEVICE);
2529 			if (dma_mapping_error(dev, page_dmaaddr)) {
2530 				put_page(pagep);
2531 				pagep = NULL;
2532 				adapter->drv_stats.dma_map_errors++;
2533 				break;
2534 			}
2535 			page_offset = 0;
2536 		} else {
2537 			get_page(pagep);
2538 			page_offset += rx_frag_size;
2539 		}
2540 		page_info->page_offset = page_offset;
2541 		page_info->page = pagep;
2542 
2543 		rxd = queue_head_node(rxq);
2544 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2545 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2546 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2547 
2548 		/* Any space left in the current big page for another frag? */
2549 		if ((page_offset + rx_frag_size + rx_frag_size) >
2550 					adapter->big_page_size) {
2551 			pagep = NULL;
2552 			page_info->last_frag = true;
2553 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2554 		} else {
2555 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2556 		}
2557 
2558 		prev_page_info = page_info;
2559 		queue_head_inc(rxq);
2560 		page_info = &rxo->page_info_tbl[rxq->head];
2561 	}
2562 
2563 	/* Mark the last frag of a page when we break out of the above loop
2564 	 * with no more slots available in the RXQ
2565 	 */
2566 	if (pagep) {
2567 		prev_page_info->last_frag = true;
2568 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2569 	}
2570 
2571 	if (posted) {
2572 		atomic_add(posted, &rxq->used);
2573 		if (rxo->rx_post_starved)
2574 			rxo->rx_post_starved = false;
2575 		do {
2576 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2577 			be_rxq_notify(adapter, rxq->id, notify);
2578 			posted -= notify;
2579 		} while (posted);
2580 	} else if (atomic_read(&rxq->used) == 0) {
2581 		/* Let be_worker replenish when memory is available */
2582 		rxo->rx_post_starved = true;
2583 	}
2584 }
2585 
2586 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2587 {
2588 	struct be_queue_info *tx_cq = &txo->cq;
2589 	struct be_tx_compl_info *txcp = &txo->txcp;
2590 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2591 
2592 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2593 		return NULL;
2594 
2595 	/* Ensure load ordering of valid bit dword and other dwords below */
2596 	rmb();
2597 	be_dws_le_to_cpu(compl, sizeof(*compl));
2598 
2599 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2600 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2601 
2602 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2603 	queue_tail_inc(tx_cq);
2604 	return txcp;
2605 }
2606 
2607 static u16 be_tx_compl_process(struct be_adapter *adapter,
2608 			       struct be_tx_obj *txo, u16 last_index)
2609 {
2610 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2611 	struct be_queue_info *txq = &txo->q;
2612 	struct sk_buff *skb = NULL;
2613 	bool unmap_skb_hdr = false;
2614 	struct be_eth_wrb *wrb;
2615 	u16 num_wrbs = 0;
2616 	u32 frag_index;
2617 
2618 	do {
2619 		if (sent_skbs[txq->tail]) {
2620 			/* Free skb from prev req */
2621 			if (skb)
2622 				dev_consume_skb_any(skb);
2623 			skb = sent_skbs[txq->tail];
2624 			sent_skbs[txq->tail] = NULL;
2625 			queue_tail_inc(txq);  /* skip hdr wrb */
2626 			num_wrbs++;
2627 			unmap_skb_hdr = true;
2628 		}
2629 		wrb = queue_tail_node(txq);
2630 		frag_index = txq->tail;
2631 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2632 			      (unmap_skb_hdr && skb_headlen(skb)));
2633 		unmap_skb_hdr = false;
2634 		queue_tail_inc(txq);
2635 		num_wrbs++;
2636 	} while (frag_index != last_index);
2637 	dev_consume_skb_any(skb);
2638 
2639 	return num_wrbs;
2640 }
2641 
2642 /* Return the number of events in the event queue */
2643 static inline int events_get(struct be_eq_obj *eqo)
2644 {
2645 	struct be_eq_entry *eqe;
2646 	int num = 0;
2647 
2648 	do {
2649 		eqe = queue_tail_node(&eqo->q);
2650 		if (eqe->evt == 0)
2651 			break;
2652 
2653 		rmb();
2654 		eqe->evt = 0;
2655 		num++;
2656 		queue_tail_inc(&eqo->q);
2657 	} while (true);
2658 
2659 	return num;
2660 }
2661 
2662 /* Leaves the EQ is disarmed state */
2663 static void be_eq_clean(struct be_eq_obj *eqo)
2664 {
2665 	int num = events_get(eqo);
2666 
2667 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2668 }
2669 
2670 /* Free posted rx buffers that were not used */
2671 static void be_rxq_clean(struct be_rx_obj *rxo)
2672 {
2673 	struct be_queue_info *rxq = &rxo->q;
2674 	struct be_rx_page_info *page_info;
2675 
2676 	while (atomic_read(&rxq->used) > 0) {
2677 		page_info = get_rx_page_info(rxo);
2678 		put_page(page_info->page);
2679 		memset(page_info, 0, sizeof(*page_info));
2680 	}
2681 	BUG_ON(atomic_read(&rxq->used));
2682 	rxq->tail = 0;
2683 	rxq->head = 0;
2684 }
2685 
2686 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2687 {
2688 	struct be_queue_info *rx_cq = &rxo->cq;
2689 	struct be_rx_compl_info *rxcp;
2690 	struct be_adapter *adapter = rxo->adapter;
2691 	int flush_wait = 0;
2692 
2693 	/* Consume pending rx completions.
2694 	 * Wait for the flush completion (identified by zero num_rcvd)
2695 	 * to arrive. Notify CQ even when there are no more CQ entries
2696 	 * for HW to flush partially coalesced CQ entries.
2697 	 * In Lancer, there is no need to wait for flush compl.
2698 	 */
2699 	for (;;) {
2700 		rxcp = be_rx_compl_get(rxo);
2701 		if (!rxcp) {
2702 			if (lancer_chip(adapter))
2703 				break;
2704 
2705 			if (flush_wait++ > 50 ||
2706 			    be_check_error(adapter,
2707 					   BE_ERROR_HW)) {
2708 				dev_warn(&adapter->pdev->dev,
2709 					 "did not receive flush compl\n");
2710 				break;
2711 			}
2712 			be_cq_notify(adapter, rx_cq->id, true, 0);
2713 			mdelay(1);
2714 		} else {
2715 			be_rx_compl_discard(rxo, rxcp);
2716 			be_cq_notify(adapter, rx_cq->id, false, 1);
2717 			if (rxcp->num_rcvd == 0)
2718 				break;
2719 		}
2720 	}
2721 
2722 	/* After cleanup, leave the CQ in unarmed state */
2723 	be_cq_notify(adapter, rx_cq->id, false, 0);
2724 }
2725 
2726 static void be_tx_compl_clean(struct be_adapter *adapter)
2727 {
2728 	struct device *dev = &adapter->pdev->dev;
2729 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2730 	struct be_tx_compl_info *txcp;
2731 	struct be_queue_info *txq;
2732 	u32 end_idx, notified_idx;
2733 	struct be_tx_obj *txo;
2734 	int i, pending_txqs;
2735 
2736 	/* Stop polling for compls when HW has been silent for 10ms */
2737 	do {
2738 		pending_txqs = adapter->num_tx_qs;
2739 
2740 		for_all_tx_queues(adapter, txo, i) {
2741 			cmpl = 0;
2742 			num_wrbs = 0;
2743 			txq = &txo->q;
2744 			while ((txcp = be_tx_compl_get(txo))) {
2745 				num_wrbs +=
2746 					be_tx_compl_process(adapter, txo,
2747 							    txcp->end_index);
2748 				cmpl++;
2749 			}
2750 			if (cmpl) {
2751 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2752 				atomic_sub(num_wrbs, &txq->used);
2753 				timeo = 0;
2754 			}
2755 			if (!be_is_tx_compl_pending(txo))
2756 				pending_txqs--;
2757 		}
2758 
2759 		if (pending_txqs == 0 || ++timeo > 10 ||
2760 		    be_check_error(adapter, BE_ERROR_HW))
2761 			break;
2762 
2763 		mdelay(1);
2764 	} while (true);
2765 
2766 	/* Free enqueued TX that was never notified to HW */
2767 	for_all_tx_queues(adapter, txo, i) {
2768 		txq = &txo->q;
2769 
2770 		if (atomic_read(&txq->used)) {
2771 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2772 				 i, atomic_read(&txq->used));
2773 			notified_idx = txq->tail;
2774 			end_idx = txq->tail;
2775 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2776 				  txq->len);
2777 			/* Use the tx-compl process logic to handle requests
2778 			 * that were not sent to the HW.
2779 			 */
2780 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2781 			atomic_sub(num_wrbs, &txq->used);
2782 			BUG_ON(atomic_read(&txq->used));
2783 			txo->pend_wrb_cnt = 0;
2784 			/* Since hw was never notified of these requests,
2785 			 * reset TXQ indices
2786 			 */
2787 			txq->head = notified_idx;
2788 			txq->tail = notified_idx;
2789 		}
2790 	}
2791 }
2792 
2793 static void be_evt_queues_destroy(struct be_adapter *adapter)
2794 {
2795 	struct be_eq_obj *eqo;
2796 	int i;
2797 
2798 	for_all_evt_queues(adapter, eqo, i) {
2799 		if (eqo->q.created) {
2800 			be_eq_clean(eqo);
2801 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2802 			napi_hash_del(&eqo->napi);
2803 			netif_napi_del(&eqo->napi);
2804 			free_cpumask_var(eqo->affinity_mask);
2805 		}
2806 		be_queue_free(adapter, &eqo->q);
2807 	}
2808 }
2809 
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812 	struct be_queue_info *eq;
2813 	struct be_eq_obj *eqo;
2814 	struct be_aic_obj *aic;
2815 	int i, rc;
2816 
2817 	/* need enough EQs to service both RX and TX queues */
2818 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819 				    max(adapter->cfg_num_rx_irqs,
2820 					adapter->cfg_num_tx_irqs));
2821 
2822 	for_all_evt_queues(adapter, eqo, i) {
2823 		int numa_node = dev_to_node(&adapter->pdev->dev);
2824 
2825 		aic = &adapter->aic_obj[i];
2826 		eqo->adapter = adapter;
2827 		eqo->idx = i;
2828 		aic->max_eqd = BE_MAX_EQD;
2829 		aic->enable = true;
2830 
2831 		eq = &eqo->q;
2832 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833 				    sizeof(struct be_eq_entry));
2834 		if (rc)
2835 			return rc;
2836 
2837 		rc = be_cmd_eq_create(adapter, eqo);
2838 		if (rc)
2839 			return rc;
2840 
2841 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842 			return -ENOMEM;
2843 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844 				eqo->affinity_mask);
2845 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846 			       BE_NAPI_WEIGHT);
2847 	}
2848 	return 0;
2849 }
2850 
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853 	struct be_queue_info *q;
2854 
2855 	q = &adapter->mcc_obj.q;
2856 	if (q->created)
2857 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858 	be_queue_free(adapter, q);
2859 
2860 	q = &adapter->mcc_obj.cq;
2861 	if (q->created)
2862 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863 	be_queue_free(adapter, q);
2864 }
2865 
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869 	struct be_queue_info *q, *cq;
2870 
2871 	cq = &adapter->mcc_obj.cq;
2872 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873 			   sizeof(struct be_mcc_compl)))
2874 		goto err;
2875 
2876 	/* Use the default EQ for MCC completions */
2877 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878 		goto mcc_cq_free;
2879 
2880 	q = &adapter->mcc_obj.q;
2881 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882 		goto mcc_cq_destroy;
2883 
2884 	if (be_cmd_mccq_create(adapter, q, cq))
2885 		goto mcc_q_free;
2886 
2887 	return 0;
2888 
2889 mcc_q_free:
2890 	be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894 	be_queue_free(adapter, cq);
2895 err:
2896 	return -1;
2897 }
2898 
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901 	struct be_queue_info *q;
2902 	struct be_tx_obj *txo;
2903 	u8 i;
2904 
2905 	for_all_tx_queues(adapter, txo, i) {
2906 		q = &txo->q;
2907 		if (q->created)
2908 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909 		be_queue_free(adapter, q);
2910 
2911 		q = &txo->cq;
2912 		if (q->created)
2913 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914 		be_queue_free(adapter, q);
2915 	}
2916 }
2917 
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920 	struct be_queue_info *cq;
2921 	struct be_tx_obj *txo;
2922 	struct be_eq_obj *eqo;
2923 	int status, i;
2924 
2925 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926 
2927 	for_all_tx_queues(adapter, txo, i) {
2928 		cq = &txo->cq;
2929 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930 					sizeof(struct be_eth_tx_compl));
2931 		if (status)
2932 			return status;
2933 
2934 		u64_stats_init(&txo->stats.sync);
2935 		u64_stats_init(&txo->stats.sync_compl);
2936 
2937 		/* If num_evt_qs is less than num_tx_qs, then more than
2938 		 * one txq share an eq
2939 		 */
2940 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942 		if (status)
2943 			return status;
2944 
2945 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946 					sizeof(struct be_eth_wrb));
2947 		if (status)
2948 			return status;
2949 
2950 		status = be_cmd_txq_create(adapter, txo);
2951 		if (status)
2952 			return status;
2953 
2954 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955 				    eqo->idx);
2956 	}
2957 
2958 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959 		 adapter->num_tx_qs);
2960 	return 0;
2961 }
2962 
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965 	struct be_queue_info *q;
2966 	struct be_rx_obj *rxo;
2967 	int i;
2968 
2969 	for_all_rx_queues(adapter, rxo, i) {
2970 		q = &rxo->cq;
2971 		if (q->created)
2972 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973 		be_queue_free(adapter, q);
2974 	}
2975 }
2976 
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979 	struct be_queue_info *eq, *cq;
2980 	struct be_rx_obj *rxo;
2981 	int rc, i;
2982 
2983 	adapter->num_rss_qs =
2984 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985 
2986 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2987 	if (adapter->num_rss_qs < 2)
2988 		adapter->num_rss_qs = 0;
2989 
2990 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991 
2992 	/* When the interface is not capable of RSS rings (and there is no
2993 	 * need to create a default RXQ) we'll still need one RXQ
2994 	 */
2995 	if (adapter->num_rx_qs == 0)
2996 		adapter->num_rx_qs = 1;
2997 
2998 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999 	for_all_rx_queues(adapter, rxo, i) {
3000 		rxo->adapter = adapter;
3001 		cq = &rxo->cq;
3002 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003 				    sizeof(struct be_eth_rx_compl));
3004 		if (rc)
3005 			return rc;
3006 
3007 		u64_stats_init(&rxo->stats.sync);
3008 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010 		if (rc)
3011 			return rc;
3012 	}
3013 
3014 	dev_info(&adapter->pdev->dev,
3015 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3016 	return 0;
3017 }
3018 
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021 	struct be_eq_obj *eqo = dev;
3022 	struct be_adapter *adapter = eqo->adapter;
3023 	int num_evts = 0;
3024 
3025 	/* IRQ is not expected when NAPI is scheduled as the EQ
3026 	 * will not be armed.
3027 	 * But, this can happen on Lancer INTx where it takes
3028 	 * a while to de-assert INTx or in BE2 where occasionaly
3029 	 * an interrupt may be raised even when EQ is unarmed.
3030 	 * If NAPI is already scheduled, then counting & notifying
3031 	 * events will orphan them.
3032 	 */
3033 	if (napi_schedule_prep(&eqo->napi)) {
3034 		num_evts = events_get(eqo);
3035 		__napi_schedule(&eqo->napi);
3036 		if (num_evts)
3037 			eqo->spurious_intr = 0;
3038 	}
3039 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040 
3041 	/* Return IRQ_HANDLED only for the the first spurious intr
3042 	 * after a valid intr to stop the kernel from branding
3043 	 * this irq as a bad one!
3044 	 */
3045 	if (num_evts || eqo->spurious_intr++ == 0)
3046 		return IRQ_HANDLED;
3047 	else
3048 		return IRQ_NONE;
3049 }
3050 
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053 	struct be_eq_obj *eqo = dev;
3054 
3055 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056 	napi_schedule(&eqo->napi);
3057 	return IRQ_HANDLED;
3058 }
3059 
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064 
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066 			 int budget, int polling)
3067 {
3068 	struct be_adapter *adapter = rxo->adapter;
3069 	struct be_queue_info *rx_cq = &rxo->cq;
3070 	struct be_rx_compl_info *rxcp;
3071 	u32 work_done;
3072 	u32 frags_consumed = 0;
3073 
3074 	for (work_done = 0; work_done < budget; work_done++) {
3075 		rxcp = be_rx_compl_get(rxo);
3076 		if (!rxcp)
3077 			break;
3078 
3079 		/* Is it a flush compl that has no data */
3080 		if (unlikely(rxcp->num_rcvd == 0))
3081 			goto loop_continue;
3082 
3083 		/* Discard compl with partial DMA Lancer B0 */
3084 		if (unlikely(!rxcp->pkt_size)) {
3085 			be_rx_compl_discard(rxo, rxcp);
3086 			goto loop_continue;
3087 		}
3088 
3089 		/* On BE drop pkts that arrive due to imperfect filtering in
3090 		 * promiscuous mode on some skews
3091 		 */
3092 		if (unlikely(rxcp->port != adapter->port_num &&
3093 			     !lancer_chip(adapter))) {
3094 			be_rx_compl_discard(rxo, rxcp);
3095 			goto loop_continue;
3096 		}
3097 
3098 		/* Don't do gro when we're busy_polling */
3099 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3100 			be_rx_compl_process_gro(rxo, napi, rxcp);
3101 		else
3102 			be_rx_compl_process(rxo, napi, rxcp);
3103 
3104 loop_continue:
3105 		frags_consumed += rxcp->num_rcvd;
3106 		be_rx_stats_update(rxo, rxcp);
3107 	}
3108 
3109 	if (work_done) {
3110 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3111 
3112 		/* When an rx-obj gets into post_starved state, just
3113 		 * let be_worker do the posting.
3114 		 */
3115 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3116 		    !rxo->rx_post_starved)
3117 			be_post_rx_frags(rxo, GFP_ATOMIC,
3118 					 max_t(u32, MAX_RX_POST,
3119 					       frags_consumed));
3120 	}
3121 
3122 	return work_done;
3123 }
3124 
3125 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3126 {
3127 	switch (status) {
3128 	case BE_TX_COMP_HDR_PARSE_ERR:
3129 		tx_stats(txo)->tx_hdr_parse_err++;
3130 		break;
3131 	case BE_TX_COMP_NDMA_ERR:
3132 		tx_stats(txo)->tx_dma_err++;
3133 		break;
3134 	case BE_TX_COMP_ACL_ERR:
3135 		tx_stats(txo)->tx_spoof_check_err++;
3136 		break;
3137 	}
3138 }
3139 
3140 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3141 {
3142 	switch (status) {
3143 	case LANCER_TX_COMP_LSO_ERR:
3144 		tx_stats(txo)->tx_tso_err++;
3145 		break;
3146 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3147 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3148 		tx_stats(txo)->tx_spoof_check_err++;
3149 		break;
3150 	case LANCER_TX_COMP_QINQ_ERR:
3151 		tx_stats(txo)->tx_qinq_err++;
3152 		break;
3153 	case LANCER_TX_COMP_PARITY_ERR:
3154 		tx_stats(txo)->tx_internal_parity_err++;
3155 		break;
3156 	case LANCER_TX_COMP_DMA_ERR:
3157 		tx_stats(txo)->tx_dma_err++;
3158 		break;
3159 	}
3160 }
3161 
3162 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3163 			  int idx)
3164 {
3165 	int num_wrbs = 0, work_done = 0;
3166 	struct be_tx_compl_info *txcp;
3167 
3168 	while ((txcp = be_tx_compl_get(txo))) {
3169 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3170 		work_done++;
3171 
3172 		if (txcp->status) {
3173 			if (lancer_chip(adapter))
3174 				lancer_update_tx_err(txo, txcp->status);
3175 			else
3176 				be_update_tx_err(txo, txcp->status);
3177 		}
3178 	}
3179 
3180 	if (work_done) {
3181 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3182 		atomic_sub(num_wrbs, &txo->q.used);
3183 
3184 		/* As Tx wrbs have been freed up, wake up netdev queue
3185 		 * if it was stopped due to lack of tx wrbs.  */
3186 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3187 		    be_can_txq_wake(txo)) {
3188 			netif_wake_subqueue(adapter->netdev, idx);
3189 		}
3190 
3191 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3192 		tx_stats(txo)->tx_compl += work_done;
3193 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3194 	}
3195 }
3196 
3197 #ifdef CONFIG_NET_RX_BUSY_POLL
3198 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3199 {
3200 	bool status = true;
3201 
3202 	spin_lock(&eqo->lock); /* BH is already disabled */
3203 	if (eqo->state & BE_EQ_LOCKED) {
3204 		WARN_ON(eqo->state & BE_EQ_NAPI);
3205 		eqo->state |= BE_EQ_NAPI_YIELD;
3206 		status = false;
3207 	} else {
3208 		eqo->state = BE_EQ_NAPI;
3209 	}
3210 	spin_unlock(&eqo->lock);
3211 	return status;
3212 }
3213 
3214 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3215 {
3216 	spin_lock(&eqo->lock); /* BH is already disabled */
3217 
3218 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3219 	eqo->state = BE_EQ_IDLE;
3220 
3221 	spin_unlock(&eqo->lock);
3222 }
3223 
3224 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3225 {
3226 	bool status = true;
3227 
3228 	spin_lock_bh(&eqo->lock);
3229 	if (eqo->state & BE_EQ_LOCKED) {
3230 		eqo->state |= BE_EQ_POLL_YIELD;
3231 		status = false;
3232 	} else {
3233 		eqo->state |= BE_EQ_POLL;
3234 	}
3235 	spin_unlock_bh(&eqo->lock);
3236 	return status;
3237 }
3238 
3239 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3240 {
3241 	spin_lock_bh(&eqo->lock);
3242 
3243 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3244 	eqo->state = BE_EQ_IDLE;
3245 
3246 	spin_unlock_bh(&eqo->lock);
3247 }
3248 
3249 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3250 {
3251 	spin_lock_init(&eqo->lock);
3252 	eqo->state = BE_EQ_IDLE;
3253 }
3254 
3255 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3256 {
3257 	local_bh_disable();
3258 
3259 	/* It's enough to just acquire napi lock on the eqo to stop
3260 	 * be_busy_poll() from processing any queueus.
3261 	 */
3262 	while (!be_lock_napi(eqo))
3263 		mdelay(1);
3264 
3265 	local_bh_enable();
3266 }
3267 
3268 #else /* CONFIG_NET_RX_BUSY_POLL */
3269 
3270 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3271 {
3272 	return true;
3273 }
3274 
3275 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3276 {
3277 }
3278 
3279 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3280 {
3281 	return false;
3282 }
3283 
3284 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3285 {
3286 }
3287 
3288 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291 
3292 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3293 {
3294 }
3295 #endif /* CONFIG_NET_RX_BUSY_POLL */
3296 
3297 int be_poll(struct napi_struct *napi, int budget)
3298 {
3299 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3300 	struct be_adapter *adapter = eqo->adapter;
3301 	int max_work = 0, work, i, num_evts;
3302 	struct be_rx_obj *rxo;
3303 	struct be_tx_obj *txo;
3304 	u32 mult_enc = 0;
3305 
3306 	num_evts = events_get(eqo);
3307 
3308 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3309 		be_process_tx(adapter, txo, i);
3310 
3311 	if (be_lock_napi(eqo)) {
3312 		/* This loop will iterate twice for EQ0 in which
3313 		 * completions of the last RXQ (default one) are also processed
3314 		 * For other EQs the loop iterates only once
3315 		 */
3316 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3317 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3318 			max_work = max(work, max_work);
3319 		}
3320 		be_unlock_napi(eqo);
3321 	} else {
3322 		max_work = budget;
3323 	}
3324 
3325 	if (is_mcc_eqo(eqo))
3326 		be_process_mcc(adapter);
3327 
3328 	if (max_work < budget) {
3329 		napi_complete(napi);
3330 
3331 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3332 		 * delay via a delay multiplier encoding value
3333 		 */
3334 		if (skyhawk_chip(adapter))
3335 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3336 
3337 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3338 			     mult_enc);
3339 	} else {
3340 		/* As we'll continue in polling mode, count and clear events */
3341 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3342 	}
3343 	return max_work;
3344 }
3345 
3346 #ifdef CONFIG_NET_RX_BUSY_POLL
3347 static int be_busy_poll(struct napi_struct *napi)
3348 {
3349 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3350 	struct be_adapter *adapter = eqo->adapter;
3351 	struct be_rx_obj *rxo;
3352 	int i, work = 0;
3353 
3354 	if (!be_lock_busy_poll(eqo))
3355 		return LL_FLUSH_BUSY;
3356 
3357 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3358 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3359 		if (work)
3360 			break;
3361 	}
3362 
3363 	be_unlock_busy_poll(eqo);
3364 	return work;
3365 }
3366 #endif
3367 
3368 void be_detect_error(struct be_adapter *adapter)
3369 {
3370 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3371 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3372 	u32 i;
3373 	struct device *dev = &adapter->pdev->dev;
3374 
3375 	if (be_check_error(adapter, BE_ERROR_HW))
3376 		return;
3377 
3378 	if (lancer_chip(adapter)) {
3379 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3380 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3381 			be_set_error(adapter, BE_ERROR_UE);
3382 			sliport_err1 = ioread32(adapter->db +
3383 						SLIPORT_ERROR1_OFFSET);
3384 			sliport_err2 = ioread32(adapter->db +
3385 						SLIPORT_ERROR2_OFFSET);
3386 			/* Do not log error messages if its a FW reset */
3387 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3388 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3389 				dev_info(dev, "Firmware update in progress\n");
3390 			} else {
3391 				dev_err(dev, "Error detected in the card\n");
3392 				dev_err(dev, "ERR: sliport status 0x%x\n",
3393 					sliport_status);
3394 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3395 					sliport_err1);
3396 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3397 					sliport_err2);
3398 			}
3399 		}
3400 	} else {
3401 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3402 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3403 		ue_lo_mask = ioread32(adapter->pcicfg +
3404 				      PCICFG_UE_STATUS_LOW_MASK);
3405 		ue_hi_mask = ioread32(adapter->pcicfg +
3406 				      PCICFG_UE_STATUS_HI_MASK);
3407 
3408 		ue_lo = (ue_lo & ~ue_lo_mask);
3409 		ue_hi = (ue_hi & ~ue_hi_mask);
3410 
3411 		/* On certain platforms BE hardware can indicate spurious UEs.
3412 		 * Allow HW to stop working completely in case of a real UE.
3413 		 * Hence not setting the hw_error for UE detection.
3414 		 */
3415 
3416 		if (ue_lo || ue_hi) {
3417 			dev_err(dev, "Error detected in the adapter");
3418 			if (skyhawk_chip(adapter))
3419 				be_set_error(adapter, BE_ERROR_UE);
3420 
3421 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3422 				if (ue_lo & 1)
3423 					dev_err(dev, "UE: %s bit set\n",
3424 						ue_status_low_desc[i]);
3425 			}
3426 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3427 				if (ue_hi & 1)
3428 					dev_err(dev, "UE: %s bit set\n",
3429 						ue_status_hi_desc[i]);
3430 			}
3431 		}
3432 	}
3433 }
3434 
3435 static void be_msix_disable(struct be_adapter *adapter)
3436 {
3437 	if (msix_enabled(adapter)) {
3438 		pci_disable_msix(adapter->pdev);
3439 		adapter->num_msix_vec = 0;
3440 		adapter->num_msix_roce_vec = 0;
3441 	}
3442 }
3443 
3444 static int be_msix_enable(struct be_adapter *adapter)
3445 {
3446 	unsigned int i, max_roce_eqs;
3447 	struct device *dev = &adapter->pdev->dev;
3448 	int num_vec;
3449 
3450 	/* If RoCE is supported, program the max number of vectors that
3451 	 * could be used for NIC and RoCE, else, just program the number
3452 	 * we'll use initially.
3453 	 */
3454 	if (be_roce_supported(adapter)) {
3455 		max_roce_eqs =
3456 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3457 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3458 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3459 	} else {
3460 		num_vec = max(adapter->cfg_num_rx_irqs,
3461 			      adapter->cfg_num_tx_irqs);
3462 	}
3463 
3464 	for (i = 0; i < num_vec; i++)
3465 		adapter->msix_entries[i].entry = i;
3466 
3467 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3468 					MIN_MSIX_VECTORS, num_vec);
3469 	if (num_vec < 0)
3470 		goto fail;
3471 
3472 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3473 		adapter->num_msix_roce_vec = num_vec / 2;
3474 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3475 			 adapter->num_msix_roce_vec);
3476 	}
3477 
3478 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3479 
3480 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3481 		 adapter->num_msix_vec);
3482 	return 0;
3483 
3484 fail:
3485 	dev_warn(dev, "MSIx enable failed\n");
3486 
3487 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3488 	if (be_virtfn(adapter))
3489 		return num_vec;
3490 	return 0;
3491 }
3492 
3493 static inline int be_msix_vec_get(struct be_adapter *adapter,
3494 				  struct be_eq_obj *eqo)
3495 {
3496 	return adapter->msix_entries[eqo->msix_idx].vector;
3497 }
3498 
3499 static int be_msix_register(struct be_adapter *adapter)
3500 {
3501 	struct net_device *netdev = adapter->netdev;
3502 	struct be_eq_obj *eqo;
3503 	int status, i, vec;
3504 
3505 	for_all_evt_queues(adapter, eqo, i) {
3506 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3507 		vec = be_msix_vec_get(adapter, eqo);
3508 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3509 		if (status)
3510 			goto err_msix;
3511 
3512 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3513 	}
3514 
3515 	return 0;
3516 err_msix:
3517 	for (i--; i >= 0; i--) {
3518 		eqo = &adapter->eq_obj[i];
3519 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3520 	}
3521 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3522 		 status);
3523 	be_msix_disable(adapter);
3524 	return status;
3525 }
3526 
3527 static int be_irq_register(struct be_adapter *adapter)
3528 {
3529 	struct net_device *netdev = adapter->netdev;
3530 	int status;
3531 
3532 	if (msix_enabled(adapter)) {
3533 		status = be_msix_register(adapter);
3534 		if (status == 0)
3535 			goto done;
3536 		/* INTx is not supported for VF */
3537 		if (be_virtfn(adapter))
3538 			return status;
3539 	}
3540 
3541 	/* INTx: only the first EQ is used */
3542 	netdev->irq = adapter->pdev->irq;
3543 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3544 			     &adapter->eq_obj[0]);
3545 	if (status) {
3546 		dev_err(&adapter->pdev->dev,
3547 			"INTx request IRQ failed - err %d\n", status);
3548 		return status;
3549 	}
3550 done:
3551 	adapter->isr_registered = true;
3552 	return 0;
3553 }
3554 
3555 static void be_irq_unregister(struct be_adapter *adapter)
3556 {
3557 	struct net_device *netdev = adapter->netdev;
3558 	struct be_eq_obj *eqo;
3559 	int i, vec;
3560 
3561 	if (!adapter->isr_registered)
3562 		return;
3563 
3564 	/* INTx */
3565 	if (!msix_enabled(adapter)) {
3566 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3567 		goto done;
3568 	}
3569 
3570 	/* MSIx */
3571 	for_all_evt_queues(adapter, eqo, i) {
3572 		vec = be_msix_vec_get(adapter, eqo);
3573 		irq_set_affinity_hint(vec, NULL);
3574 		free_irq(vec, eqo);
3575 	}
3576 
3577 done:
3578 	adapter->isr_registered = false;
3579 }
3580 
3581 static void be_rx_qs_destroy(struct be_adapter *adapter)
3582 {
3583 	struct rss_info *rss = &adapter->rss_info;
3584 	struct be_queue_info *q;
3585 	struct be_rx_obj *rxo;
3586 	int i;
3587 
3588 	for_all_rx_queues(adapter, rxo, i) {
3589 		q = &rxo->q;
3590 		if (q->created) {
3591 			/* If RXQs are destroyed while in an "out of buffer"
3592 			 * state, there is a possibility of an HW stall on
3593 			 * Lancer. So, post 64 buffers to each queue to relieve
3594 			 * the "out of buffer" condition.
3595 			 * Make sure there's space in the RXQ before posting.
3596 			 */
3597 			if (lancer_chip(adapter)) {
3598 				be_rx_cq_clean(rxo);
3599 				if (atomic_read(&q->used) == 0)
3600 					be_post_rx_frags(rxo, GFP_KERNEL,
3601 							 MAX_RX_POST);
3602 			}
3603 
3604 			be_cmd_rxq_destroy(adapter, q);
3605 			be_rx_cq_clean(rxo);
3606 			be_rxq_clean(rxo);
3607 		}
3608 		be_queue_free(adapter, q);
3609 	}
3610 
3611 	if (rss->rss_flags) {
3612 		rss->rss_flags = RSS_ENABLE_NONE;
3613 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3614 				  128, rss->rss_hkey);
3615 	}
3616 }
3617 
3618 static void be_disable_if_filters(struct be_adapter *adapter)
3619 {
3620 	be_dev_mac_del(adapter, adapter->pmac_id[0]);
3621 	be_clear_uc_list(adapter);
3622 	be_clear_mc_list(adapter);
3623 
3624 	/* The IFACE flags are enabled in the open path and cleared
3625 	 * in the close path. When a VF gets detached from the host and
3626 	 * assigned to a VM the following happens:
3627 	 *	- VF's IFACE flags get cleared in the detach path
3628 	 *	- IFACE create is issued by the VF in the attach path
3629 	 * Due to a bug in the BE3/Skyhawk-R FW
3630 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3631 	 * specified along with the IFACE create cmd issued by a VF are not
3632 	 * honoured by FW.  As a consequence, if a *new* driver
3633 	 * (that enables/disables IFACE flags in open/close)
3634 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3635 	 * the IFACE gets created *without* the needed flags.
3636 	 * To avoid this, disable RX-filter flags only for Lancer.
3637 	 */
3638 	if (lancer_chip(adapter)) {
3639 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3640 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3641 	}
3642 }
3643 
3644 static int be_close(struct net_device *netdev)
3645 {
3646 	struct be_adapter *adapter = netdev_priv(netdev);
3647 	struct be_eq_obj *eqo;
3648 	int i;
3649 
3650 	/* This protection is needed as be_close() may be called even when the
3651 	 * adapter is in cleared state (after eeh perm failure)
3652 	 */
3653 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3654 		return 0;
3655 
3656 	/* Before attempting cleanup ensure all the pending cmds in the
3657 	 * config_wq have finished execution
3658 	 */
3659 	flush_workqueue(be_wq);
3660 
3661 	be_disable_if_filters(adapter);
3662 
3663 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3664 		for_all_evt_queues(adapter, eqo, i) {
3665 			napi_disable(&eqo->napi);
3666 			be_disable_busy_poll(eqo);
3667 		}
3668 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3669 	}
3670 
3671 	be_async_mcc_disable(adapter);
3672 
3673 	/* Wait for all pending tx completions to arrive so that
3674 	 * all tx skbs are freed.
3675 	 */
3676 	netif_tx_disable(netdev);
3677 	be_tx_compl_clean(adapter);
3678 
3679 	be_rx_qs_destroy(adapter);
3680 
3681 	for_all_evt_queues(adapter, eqo, i) {
3682 		if (msix_enabled(adapter))
3683 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3684 		else
3685 			synchronize_irq(netdev->irq);
3686 		be_eq_clean(eqo);
3687 	}
3688 
3689 	be_irq_unregister(adapter);
3690 
3691 	return 0;
3692 }
3693 
3694 static int be_rx_qs_create(struct be_adapter *adapter)
3695 {
3696 	struct rss_info *rss = &adapter->rss_info;
3697 	u8 rss_key[RSS_HASH_KEY_LEN];
3698 	struct be_rx_obj *rxo;
3699 	int rc, i, j;
3700 
3701 	for_all_rx_queues(adapter, rxo, i) {
3702 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3703 				    sizeof(struct be_eth_rx_d));
3704 		if (rc)
3705 			return rc;
3706 	}
3707 
3708 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3709 		rxo = default_rxo(adapter);
3710 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3711 				       rx_frag_size, adapter->if_handle,
3712 				       false, &rxo->rss_id);
3713 		if (rc)
3714 			return rc;
3715 	}
3716 
3717 	for_all_rss_queues(adapter, rxo, i) {
3718 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3719 				       rx_frag_size, adapter->if_handle,
3720 				       true, &rxo->rss_id);
3721 		if (rc)
3722 			return rc;
3723 	}
3724 
3725 	if (be_multi_rxq(adapter)) {
3726 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3727 			for_all_rss_queues(adapter, rxo, i) {
3728 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3729 					break;
3730 				rss->rsstable[j + i] = rxo->rss_id;
3731 				rss->rss_queue[j + i] = i;
3732 			}
3733 		}
3734 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3735 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3736 
3737 		if (!BEx_chip(adapter))
3738 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3739 				RSS_ENABLE_UDP_IPV6;
3740 
3741 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3742 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3743 				       RSS_INDIR_TABLE_LEN, rss_key);
3744 		if (rc) {
3745 			rss->rss_flags = RSS_ENABLE_NONE;
3746 			return rc;
3747 		}
3748 
3749 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3750 	} else {
3751 		/* Disable RSS, if only default RX Q is created */
3752 		rss->rss_flags = RSS_ENABLE_NONE;
3753 	}
3754 
3755 
3756 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3757 	 * which is a queue empty condition
3758 	 */
3759 	for_all_rx_queues(adapter, rxo, i)
3760 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3761 
3762 	return 0;
3763 }
3764 
3765 static int be_enable_if_filters(struct be_adapter *adapter)
3766 {
3767 	int status;
3768 
3769 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3770 	if (status)
3771 		return status;
3772 
3773 	/* For BE3 VFs, the PF programs the initial MAC address */
3774 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3775 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3776 		if (status)
3777 			return status;
3778 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3779 	}
3780 
3781 	if (adapter->vlans_added)
3782 		be_vid_config(adapter);
3783 
3784 	__be_set_rx_mode(adapter);
3785 
3786 	return 0;
3787 }
3788 
3789 static int be_open(struct net_device *netdev)
3790 {
3791 	struct be_adapter *adapter = netdev_priv(netdev);
3792 	struct be_eq_obj *eqo;
3793 	struct be_rx_obj *rxo;
3794 	struct be_tx_obj *txo;
3795 	u8 link_status;
3796 	int status, i;
3797 
3798 	status = be_rx_qs_create(adapter);
3799 	if (status)
3800 		goto err;
3801 
3802 	status = be_enable_if_filters(adapter);
3803 	if (status)
3804 		goto err;
3805 
3806 	status = be_irq_register(adapter);
3807 	if (status)
3808 		goto err;
3809 
3810 	for_all_rx_queues(adapter, rxo, i)
3811 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3812 
3813 	for_all_tx_queues(adapter, txo, i)
3814 		be_cq_notify(adapter, txo->cq.id, true, 0);
3815 
3816 	be_async_mcc_enable(adapter);
3817 
3818 	for_all_evt_queues(adapter, eqo, i) {
3819 		napi_enable(&eqo->napi);
3820 		be_enable_busy_poll(eqo);
3821 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3822 	}
3823 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3824 
3825 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3826 	if (!status)
3827 		be_link_status_update(adapter, link_status);
3828 
3829 	netif_tx_start_all_queues(netdev);
3830 	if (skyhawk_chip(adapter))
3831 		udp_tunnel_get_rx_info(netdev);
3832 
3833 	return 0;
3834 err:
3835 	be_close(adapter->netdev);
3836 	return -EIO;
3837 }
3838 
3839 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3840 {
3841 	u32 addr;
3842 
3843 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3844 
3845 	mac[5] = (u8)(addr & 0xFF);
3846 	mac[4] = (u8)((addr >> 8) & 0xFF);
3847 	mac[3] = (u8)((addr >> 16) & 0xFF);
3848 	/* Use the OUI from the current MAC address */
3849 	memcpy(mac, adapter->netdev->dev_addr, 3);
3850 }
3851 
3852 /*
3853  * Generate a seed MAC address from the PF MAC Address using jhash.
3854  * MAC Address for VFs are assigned incrementally starting from the seed.
3855  * These addresses are programmed in the ASIC by the PF and the VF driver
3856  * queries for the MAC address during its probe.
3857  */
3858 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3859 {
3860 	u32 vf;
3861 	int status = 0;
3862 	u8 mac[ETH_ALEN];
3863 	struct be_vf_cfg *vf_cfg;
3864 
3865 	be_vf_eth_addr_generate(adapter, mac);
3866 
3867 	for_all_vfs(adapter, vf_cfg, vf) {
3868 		if (BEx_chip(adapter))
3869 			status = be_cmd_pmac_add(adapter, mac,
3870 						 vf_cfg->if_handle,
3871 						 &vf_cfg->pmac_id, vf + 1);
3872 		else
3873 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3874 						vf + 1);
3875 
3876 		if (status)
3877 			dev_err(&adapter->pdev->dev,
3878 				"Mac address assignment failed for VF %d\n",
3879 				vf);
3880 		else
3881 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3882 
3883 		mac[5] += 1;
3884 	}
3885 	return status;
3886 }
3887 
3888 static int be_vfs_mac_query(struct be_adapter *adapter)
3889 {
3890 	int status, vf;
3891 	u8 mac[ETH_ALEN];
3892 	struct be_vf_cfg *vf_cfg;
3893 
3894 	for_all_vfs(adapter, vf_cfg, vf) {
3895 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3896 					       mac, vf_cfg->if_handle,
3897 					       false, vf+1);
3898 		if (status)
3899 			return status;
3900 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3901 	}
3902 	return 0;
3903 }
3904 
3905 static void be_vf_clear(struct be_adapter *adapter)
3906 {
3907 	struct be_vf_cfg *vf_cfg;
3908 	u32 vf;
3909 
3910 	if (pci_vfs_assigned(adapter->pdev)) {
3911 		dev_warn(&adapter->pdev->dev,
3912 			 "VFs are assigned to VMs: not disabling VFs\n");
3913 		goto done;
3914 	}
3915 
3916 	pci_disable_sriov(adapter->pdev);
3917 
3918 	for_all_vfs(adapter, vf_cfg, vf) {
3919 		if (BEx_chip(adapter))
3920 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3921 					vf_cfg->pmac_id, vf + 1);
3922 		else
3923 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3924 				       vf + 1);
3925 
3926 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3927 	}
3928 
3929 	if (BE3_chip(adapter))
3930 		be_cmd_set_hsw_config(adapter, 0, 0,
3931 				      adapter->if_handle,
3932 				      PORT_FWD_TYPE_PASSTHRU, 0);
3933 done:
3934 	kfree(adapter->vf_cfg);
3935 	adapter->num_vfs = 0;
3936 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3937 }
3938 
3939 static void be_clear_queues(struct be_adapter *adapter)
3940 {
3941 	be_mcc_queues_destroy(adapter);
3942 	be_rx_cqs_destroy(adapter);
3943 	be_tx_queues_destroy(adapter);
3944 	be_evt_queues_destroy(adapter);
3945 }
3946 
3947 static void be_cancel_worker(struct be_adapter *adapter)
3948 {
3949 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3950 		cancel_delayed_work_sync(&adapter->work);
3951 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3952 	}
3953 }
3954 
3955 static void be_cancel_err_detection(struct be_adapter *adapter)
3956 {
3957 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3958 
3959 	if (!be_err_recovery_workq)
3960 		return;
3961 
3962 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3963 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3964 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3965 	}
3966 }
3967 
3968 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3969 {
3970 	struct net_device *netdev = adapter->netdev;
3971 
3972 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3973 		be_cmd_manage_iface(adapter, adapter->if_handle,
3974 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3975 
3976 	if (adapter->vxlan_port)
3977 		be_cmd_set_vxlan_port(adapter, 0);
3978 
3979 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3980 	adapter->vxlan_port = 0;
3981 
3982 	netdev->hw_enc_features = 0;
3983 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3984 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3985 }
3986 
3987 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3988 				struct be_resources *vft_res)
3989 {
3990 	struct be_resources res = adapter->pool_res;
3991 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3992 	struct be_resources res_mod = {0};
3993 	u16 num_vf_qs = 1;
3994 
3995 	/* Distribute the queue resources among the PF and it's VFs */
3996 	if (num_vfs) {
3997 		/* Divide the rx queues evenly among the VFs and the PF, capped
3998 		 * at VF-EQ-count. Any remainder queues belong to the PF.
3999 		 */
4000 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4001 				res.max_rss_qs / (num_vfs + 1));
4002 
4003 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4004 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4005 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4006 		 */
4007 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4008 			num_vf_qs = 1;
4009 	}
4010 
4011 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4012 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4013 	 */
4014 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4015 				  RESOURCE_MODIFIABLE, 0);
4016 
4017 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4018 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4019 	 * more than 1 RSSQ is available for a VF.
4020 	 * Otherwise, provision only 1 queue pair for VF.
4021 	 */
4022 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4023 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4024 		if (num_vf_qs > 1) {
4025 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4026 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4027 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4028 		} else {
4029 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4030 					     BE_IF_FLAGS_DEFQ_RSS);
4031 		}
4032 	} else {
4033 		num_vf_qs = 1;
4034 	}
4035 
4036 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4037 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4038 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4039 	}
4040 
4041 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4042 	vft_res->max_rx_qs = num_vf_qs;
4043 	vft_res->max_rss_qs = num_vf_qs;
4044 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4045 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4046 
4047 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4048 	 * among the PF and it's VFs, if the fields are changeable
4049 	 */
4050 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4051 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4052 
4053 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4054 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4055 
4056 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4057 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4058 
4059 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4060 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4061 }
4062 
4063 static void be_if_destroy(struct be_adapter *adapter)
4064 {
4065 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4066 
4067 	kfree(adapter->pmac_id);
4068 	adapter->pmac_id = NULL;
4069 
4070 	kfree(adapter->mc_list);
4071 	adapter->mc_list = NULL;
4072 
4073 	kfree(adapter->uc_list);
4074 	adapter->uc_list = NULL;
4075 }
4076 
4077 static int be_clear(struct be_adapter *adapter)
4078 {
4079 	struct pci_dev *pdev = adapter->pdev;
4080 	struct  be_resources vft_res = {0};
4081 
4082 	be_cancel_worker(adapter);
4083 
4084 	flush_workqueue(be_wq);
4085 
4086 	if (sriov_enabled(adapter))
4087 		be_vf_clear(adapter);
4088 
4089 	/* Re-configure FW to distribute resources evenly across max-supported
4090 	 * number of VFs, only when VFs are not already enabled.
4091 	 */
4092 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4093 	    !pci_vfs_assigned(pdev)) {
4094 		be_calculate_vf_res(adapter,
4095 				    pci_sriov_get_totalvfs(pdev),
4096 				    &vft_res);
4097 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4098 					pci_sriov_get_totalvfs(pdev),
4099 					&vft_res);
4100 	}
4101 
4102 	be_disable_vxlan_offloads(adapter);
4103 
4104 	be_if_destroy(adapter);
4105 
4106 	be_clear_queues(adapter);
4107 
4108 	be_msix_disable(adapter);
4109 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4110 	return 0;
4111 }
4112 
4113 static int be_vfs_if_create(struct be_adapter *adapter)
4114 {
4115 	struct be_resources res = {0};
4116 	u32 cap_flags, en_flags, vf;
4117 	struct be_vf_cfg *vf_cfg;
4118 	int status;
4119 
4120 	/* If a FW profile exists, then cap_flags are updated */
4121 	cap_flags = BE_VF_IF_EN_FLAGS;
4122 
4123 	for_all_vfs(adapter, vf_cfg, vf) {
4124 		if (!BE3_chip(adapter)) {
4125 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4126 							   ACTIVE_PROFILE_TYPE,
4127 							   RESOURCE_LIMITS,
4128 							   vf + 1);
4129 			if (!status) {
4130 				cap_flags = res.if_cap_flags;
4131 				/* Prevent VFs from enabling VLAN promiscuous
4132 				 * mode
4133 				 */
4134 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4135 			}
4136 		}
4137 
4138 		/* PF should enable IF flags during proxy if_create call */
4139 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4140 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4141 					  &vf_cfg->if_handle, vf + 1);
4142 		if (status)
4143 			return status;
4144 	}
4145 
4146 	return 0;
4147 }
4148 
4149 static int be_vf_setup_init(struct be_adapter *adapter)
4150 {
4151 	struct be_vf_cfg *vf_cfg;
4152 	int vf;
4153 
4154 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4155 				  GFP_KERNEL);
4156 	if (!adapter->vf_cfg)
4157 		return -ENOMEM;
4158 
4159 	for_all_vfs(adapter, vf_cfg, vf) {
4160 		vf_cfg->if_handle = -1;
4161 		vf_cfg->pmac_id = -1;
4162 	}
4163 	return 0;
4164 }
4165 
4166 static int be_vf_setup(struct be_adapter *adapter)
4167 {
4168 	struct device *dev = &adapter->pdev->dev;
4169 	struct be_vf_cfg *vf_cfg;
4170 	int status, old_vfs, vf;
4171 	bool spoofchk;
4172 
4173 	old_vfs = pci_num_vf(adapter->pdev);
4174 
4175 	status = be_vf_setup_init(adapter);
4176 	if (status)
4177 		goto err;
4178 
4179 	if (old_vfs) {
4180 		for_all_vfs(adapter, vf_cfg, vf) {
4181 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4182 			if (status)
4183 				goto err;
4184 		}
4185 
4186 		status = be_vfs_mac_query(adapter);
4187 		if (status)
4188 			goto err;
4189 	} else {
4190 		status = be_vfs_if_create(adapter);
4191 		if (status)
4192 			goto err;
4193 
4194 		status = be_vf_eth_addr_config(adapter);
4195 		if (status)
4196 			goto err;
4197 	}
4198 
4199 	for_all_vfs(adapter, vf_cfg, vf) {
4200 		/* Allow VFs to programs MAC/VLAN filters */
4201 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4202 						  vf + 1);
4203 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4204 			status = be_cmd_set_fn_privileges(adapter,
4205 							  vf_cfg->privileges |
4206 							  BE_PRIV_FILTMGMT,
4207 							  vf + 1);
4208 			if (!status) {
4209 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4210 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4211 					 vf);
4212 			}
4213 		}
4214 
4215 		/* Allow full available bandwidth */
4216 		if (!old_vfs)
4217 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4218 
4219 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4220 					       vf_cfg->if_handle, NULL,
4221 					       &spoofchk);
4222 		if (!status)
4223 			vf_cfg->spoofchk = spoofchk;
4224 
4225 		if (!old_vfs) {
4226 			be_cmd_enable_vf(adapter, vf + 1);
4227 			be_cmd_set_logical_link_config(adapter,
4228 						       IFLA_VF_LINK_STATE_AUTO,
4229 						       vf+1);
4230 		}
4231 	}
4232 
4233 	if (!old_vfs) {
4234 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4235 		if (status) {
4236 			dev_err(dev, "SRIOV enable failed\n");
4237 			adapter->num_vfs = 0;
4238 			goto err;
4239 		}
4240 	}
4241 
4242 	if (BE3_chip(adapter)) {
4243 		/* On BE3, enable VEB only when SRIOV is enabled */
4244 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4245 					       adapter->if_handle,
4246 					       PORT_FWD_TYPE_VEB, 0);
4247 		if (status)
4248 			goto err;
4249 	}
4250 
4251 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4252 	return 0;
4253 err:
4254 	dev_err(dev, "VF setup failed\n");
4255 	be_vf_clear(adapter);
4256 	return status;
4257 }
4258 
4259 /* Converting function_mode bits on BE3 to SH mc_type enums */
4260 
4261 static u8 be_convert_mc_type(u32 function_mode)
4262 {
4263 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4264 		return vNIC1;
4265 	else if (function_mode & QNQ_MODE)
4266 		return FLEX10;
4267 	else if (function_mode & VNIC_MODE)
4268 		return vNIC2;
4269 	else if (function_mode & UMC_ENABLED)
4270 		return UMC;
4271 	else
4272 		return MC_NONE;
4273 }
4274 
4275 /* On BE2/BE3 FW does not suggest the supported limits */
4276 static void BEx_get_resources(struct be_adapter *adapter,
4277 			      struct be_resources *res)
4278 {
4279 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4280 
4281 	if (be_physfn(adapter))
4282 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4283 	else
4284 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4285 
4286 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4287 
4288 	if (be_is_mc(adapter)) {
4289 		/* Assuming that there are 4 channels per port,
4290 		 * when multi-channel is enabled
4291 		 */
4292 		if (be_is_qnq_mode(adapter))
4293 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4294 		else
4295 			/* In a non-qnq multichannel mode, the pvid
4296 			 * takes up one vlan entry
4297 			 */
4298 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4299 	} else {
4300 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4301 	}
4302 
4303 	res->max_mcast_mac = BE_MAX_MC;
4304 
4305 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4306 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4307 	 *    *only* if it is RSS-capable.
4308 	 */
4309 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4310 	    be_virtfn(adapter) ||
4311 	    (be_is_mc(adapter) &&
4312 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4313 		res->max_tx_qs = 1;
4314 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4315 		struct be_resources super_nic_res = {0};
4316 
4317 		/* On a SuperNIC profile, the driver needs to use the
4318 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4319 		 */
4320 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4321 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4322 					  0);
4323 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4324 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4325 	} else {
4326 		res->max_tx_qs = BE3_MAX_TX_QS;
4327 	}
4328 
4329 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4330 	    !use_sriov && be_physfn(adapter))
4331 		res->max_rss_qs = (adapter->be3_native) ?
4332 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4333 	res->max_rx_qs = res->max_rss_qs + 1;
4334 
4335 	if (be_physfn(adapter))
4336 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4337 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4338 	else
4339 		res->max_evt_qs = 1;
4340 
4341 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4342 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4343 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4344 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4345 }
4346 
4347 static void be_setup_init(struct be_adapter *adapter)
4348 {
4349 	adapter->vlan_prio_bmap = 0xff;
4350 	adapter->phy.link_speed = -1;
4351 	adapter->if_handle = -1;
4352 	adapter->be3_native = false;
4353 	adapter->if_flags = 0;
4354 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4355 	if (be_physfn(adapter))
4356 		adapter->cmd_privileges = MAX_PRIVILEGES;
4357 	else
4358 		adapter->cmd_privileges = MIN_PRIVILEGES;
4359 }
4360 
4361 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4362  * However, this HW limitation is not exposed to the host via any SLI cmd.
4363  * As a result, in the case of SRIOV and in particular multi-partition configs
4364  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4365  * for distribution between the VFs. This self-imposed limit will determine the
4366  * no: of VFs for which RSS can be enabled.
4367  */
4368 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4369 {
4370 	struct be_port_resources port_res = {0};
4371 	u8 rss_tables_on_port;
4372 	u16 max_vfs = be_max_vfs(adapter);
4373 
4374 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4375 				  RESOURCE_LIMITS, 0);
4376 
4377 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4378 
4379 	/* Each PF Pool's RSS Tables limit =
4380 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4381 	 */
4382 	adapter->pool_res.max_rss_tables =
4383 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4384 }
4385 
4386 static int be_get_sriov_config(struct be_adapter *adapter)
4387 {
4388 	struct be_resources res = {0};
4389 	int max_vfs, old_vfs;
4390 
4391 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4392 				  RESOURCE_LIMITS, 0);
4393 
4394 	/* Some old versions of BE3 FW don't report max_vfs value */
4395 	if (BE3_chip(adapter) && !res.max_vfs) {
4396 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4397 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4398 	}
4399 
4400 	adapter->pool_res = res;
4401 
4402 	/* If during previous unload of the driver, the VFs were not disabled,
4403 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4404 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4405 	 */
4406 	old_vfs = pci_num_vf(adapter->pdev);
4407 	if (old_vfs) {
4408 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4409 			 old_vfs);
4410 
4411 		adapter->pool_res.max_vfs =
4412 			pci_sriov_get_totalvfs(adapter->pdev);
4413 		adapter->num_vfs = old_vfs;
4414 	}
4415 
4416 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4417 		be_calculate_pf_pool_rss_tables(adapter);
4418 		dev_info(&adapter->pdev->dev,
4419 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4420 			 be_max_pf_pool_rss_tables(adapter));
4421 	}
4422 	return 0;
4423 }
4424 
4425 static void be_alloc_sriov_res(struct be_adapter *adapter)
4426 {
4427 	int old_vfs = pci_num_vf(adapter->pdev);
4428 	struct  be_resources vft_res = {0};
4429 	int status;
4430 
4431 	be_get_sriov_config(adapter);
4432 
4433 	if (!old_vfs)
4434 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4435 
4436 	/* When the HW is in SRIOV capable configuration, the PF-pool
4437 	 * resources are given to PF during driver load, if there are no
4438 	 * old VFs. This facility is not available in BE3 FW.
4439 	 * Also, this is done by FW in Lancer chip.
4440 	 */
4441 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4442 		be_calculate_vf_res(adapter, 0, &vft_res);
4443 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4444 						 &vft_res);
4445 		if (status)
4446 			dev_err(&adapter->pdev->dev,
4447 				"Failed to optimize SRIOV resources\n");
4448 	}
4449 }
4450 
4451 static int be_get_resources(struct be_adapter *adapter)
4452 {
4453 	struct device *dev = &adapter->pdev->dev;
4454 	struct be_resources res = {0};
4455 	int status;
4456 
4457 	/* For Lancer, SH etc read per-function resource limits from FW.
4458 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4459 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4460 	 */
4461 	if (BEx_chip(adapter)) {
4462 		BEx_get_resources(adapter, &res);
4463 	} else {
4464 		status = be_cmd_get_func_config(adapter, &res);
4465 		if (status)
4466 			return status;
4467 
4468 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4469 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4470 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4471 			res.max_rss_qs -= 1;
4472 	}
4473 
4474 	/* If RoCE is supported stash away half the EQs for RoCE */
4475 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4476 				res.max_evt_qs / 2 : res.max_evt_qs;
4477 	adapter->res = res;
4478 
4479 	/* If FW supports RSS default queue, then skip creating non-RSS
4480 	 * queue for non-IP traffic.
4481 	 */
4482 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4483 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4484 
4485 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4486 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4487 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4488 		 be_max_vfs(adapter));
4489 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4490 		 be_max_uc(adapter), be_max_mc(adapter),
4491 		 be_max_vlans(adapter));
4492 
4493 	/* Ensure RX and TX queues are created in pairs at init time */
4494 	adapter->cfg_num_rx_irqs =
4495 				min_t(u16, netif_get_num_default_rss_queues(),
4496 				      be_max_qp_irqs(adapter));
4497 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4498 	return 0;
4499 }
4500 
4501 static int be_get_config(struct be_adapter *adapter)
4502 {
4503 	int status, level;
4504 	u16 profile_id;
4505 
4506 	status = be_cmd_get_cntl_attributes(adapter);
4507 	if (status)
4508 		return status;
4509 
4510 	status = be_cmd_query_fw_cfg(adapter);
4511 	if (status)
4512 		return status;
4513 
4514 	if (!lancer_chip(adapter) && be_physfn(adapter))
4515 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4516 
4517 	if (BEx_chip(adapter)) {
4518 		level = be_cmd_get_fw_log_level(adapter);
4519 		adapter->msg_enable =
4520 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4521 	}
4522 
4523 	be_cmd_get_acpi_wol_cap(adapter);
4524 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4525 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4526 
4527 	be_cmd_query_port_name(adapter);
4528 
4529 	if (be_physfn(adapter)) {
4530 		status = be_cmd_get_active_profile(adapter, &profile_id);
4531 		if (!status)
4532 			dev_info(&adapter->pdev->dev,
4533 				 "Using profile 0x%x\n", profile_id);
4534 	}
4535 
4536 	return 0;
4537 }
4538 
4539 static int be_mac_setup(struct be_adapter *adapter)
4540 {
4541 	u8 mac[ETH_ALEN];
4542 	int status;
4543 
4544 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4545 		status = be_cmd_get_perm_mac(adapter, mac);
4546 		if (status)
4547 			return status;
4548 
4549 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4550 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4551 	}
4552 
4553 	return 0;
4554 }
4555 
4556 static void be_schedule_worker(struct be_adapter *adapter)
4557 {
4558 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4559 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4560 }
4561 
4562 static void be_destroy_err_recovery_workq(void)
4563 {
4564 	if (!be_err_recovery_workq)
4565 		return;
4566 
4567 	flush_workqueue(be_err_recovery_workq);
4568 	destroy_workqueue(be_err_recovery_workq);
4569 	be_err_recovery_workq = NULL;
4570 }
4571 
4572 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4573 {
4574 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4575 
4576 	if (!be_err_recovery_workq)
4577 		return;
4578 
4579 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4580 			   msecs_to_jiffies(delay));
4581 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4582 }
4583 
4584 static int be_setup_queues(struct be_adapter *adapter)
4585 {
4586 	struct net_device *netdev = adapter->netdev;
4587 	int status;
4588 
4589 	status = be_evt_queues_create(adapter);
4590 	if (status)
4591 		goto err;
4592 
4593 	status = be_tx_qs_create(adapter);
4594 	if (status)
4595 		goto err;
4596 
4597 	status = be_rx_cqs_create(adapter);
4598 	if (status)
4599 		goto err;
4600 
4601 	status = be_mcc_queues_create(adapter);
4602 	if (status)
4603 		goto err;
4604 
4605 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4606 	if (status)
4607 		goto err;
4608 
4609 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4610 	if (status)
4611 		goto err;
4612 
4613 	return 0;
4614 err:
4615 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4616 	return status;
4617 }
4618 
4619 static int be_if_create(struct be_adapter *adapter)
4620 {
4621 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4622 	u32 cap_flags = be_if_cap_flags(adapter);
4623 	int status;
4624 
4625 	/* alloc required memory for other filtering fields */
4626 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4627 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4628 	if (!adapter->pmac_id)
4629 		return -ENOMEM;
4630 
4631 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4632 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4633 	if (!adapter->mc_list)
4634 		return -ENOMEM;
4635 
4636 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4637 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4638 	if (!adapter->uc_list)
4639 		return -ENOMEM;
4640 
4641 	if (adapter->cfg_num_rx_irqs == 1)
4642 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4643 
4644 	en_flags &= cap_flags;
4645 	/* will enable all the needed filter flags in be_open() */
4646 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4647 				  &adapter->if_handle, 0);
4648 
4649 	if (status)
4650 		return status;
4651 
4652 	return 0;
4653 }
4654 
4655 int be_update_queues(struct be_adapter *adapter)
4656 {
4657 	struct net_device *netdev = adapter->netdev;
4658 	int status;
4659 
4660 	if (netif_running(netdev))
4661 		be_close(netdev);
4662 
4663 	be_cancel_worker(adapter);
4664 
4665 	/* If any vectors have been shared with RoCE we cannot re-program
4666 	 * the MSIx table.
4667 	 */
4668 	if (!adapter->num_msix_roce_vec)
4669 		be_msix_disable(adapter);
4670 
4671 	be_clear_queues(adapter);
4672 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4673 	if (status)
4674 		return status;
4675 
4676 	if (!msix_enabled(adapter)) {
4677 		status = be_msix_enable(adapter);
4678 		if (status)
4679 			return status;
4680 	}
4681 
4682 	status = be_if_create(adapter);
4683 	if (status)
4684 		return status;
4685 
4686 	status = be_setup_queues(adapter);
4687 	if (status)
4688 		return status;
4689 
4690 	be_schedule_worker(adapter);
4691 
4692 	if (netif_running(netdev))
4693 		status = be_open(netdev);
4694 
4695 	return status;
4696 }
4697 
4698 static inline int fw_major_num(const char *fw_ver)
4699 {
4700 	int fw_major = 0, i;
4701 
4702 	i = sscanf(fw_ver, "%d.", &fw_major);
4703 	if (i != 1)
4704 		return 0;
4705 
4706 	return fw_major;
4707 }
4708 
4709 /* If it is error recovery, FLR the PF
4710  * Else if any VFs are already enabled don't FLR the PF
4711  */
4712 static bool be_reset_required(struct be_adapter *adapter)
4713 {
4714 	if (be_error_recovering(adapter))
4715 		return true;
4716 	else
4717 		return pci_num_vf(adapter->pdev) == 0;
4718 }
4719 
4720 /* Wait for the FW to be ready and perform the required initialization */
4721 static int be_func_init(struct be_adapter *adapter)
4722 {
4723 	int status;
4724 
4725 	status = be_fw_wait_ready(adapter);
4726 	if (status)
4727 		return status;
4728 
4729 	/* FW is now ready; clear errors to allow cmds/doorbell */
4730 	be_clear_error(adapter, BE_CLEAR_ALL);
4731 
4732 	if (be_reset_required(adapter)) {
4733 		status = be_cmd_reset_function(adapter);
4734 		if (status)
4735 			return status;
4736 
4737 		/* Wait for interrupts to quiesce after an FLR */
4738 		msleep(100);
4739 	}
4740 
4741 	/* Tell FW we're ready to fire cmds */
4742 	status = be_cmd_fw_init(adapter);
4743 	if (status)
4744 		return status;
4745 
4746 	/* Allow interrupts for other ULPs running on NIC function */
4747 	be_intr_set(adapter, true);
4748 
4749 	return 0;
4750 }
4751 
4752 static int be_setup(struct be_adapter *adapter)
4753 {
4754 	struct device *dev = &adapter->pdev->dev;
4755 	int status;
4756 
4757 	status = be_func_init(adapter);
4758 	if (status)
4759 		return status;
4760 
4761 	be_setup_init(adapter);
4762 
4763 	if (!lancer_chip(adapter))
4764 		be_cmd_req_native_mode(adapter);
4765 
4766 	/* invoke this cmd first to get pf_num and vf_num which are needed
4767 	 * for issuing profile related cmds
4768 	 */
4769 	if (!BEx_chip(adapter)) {
4770 		status = be_cmd_get_func_config(adapter, NULL);
4771 		if (status)
4772 			return status;
4773 	}
4774 
4775 	status = be_get_config(adapter);
4776 	if (status)
4777 		goto err;
4778 
4779 	if (!BE2_chip(adapter) && be_physfn(adapter))
4780 		be_alloc_sriov_res(adapter);
4781 
4782 	status = be_get_resources(adapter);
4783 	if (status)
4784 		goto err;
4785 
4786 	status = be_msix_enable(adapter);
4787 	if (status)
4788 		goto err;
4789 
4790 	/* will enable all the needed filter flags in be_open() */
4791 	status = be_if_create(adapter);
4792 	if (status)
4793 		goto err;
4794 
4795 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4796 	rtnl_lock();
4797 	status = be_setup_queues(adapter);
4798 	rtnl_unlock();
4799 	if (status)
4800 		goto err;
4801 
4802 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4803 
4804 	status = be_mac_setup(adapter);
4805 	if (status)
4806 		goto err;
4807 
4808 	be_cmd_get_fw_ver(adapter);
4809 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4810 
4811 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4812 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4813 			adapter->fw_ver);
4814 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4815 	}
4816 
4817 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4818 					 adapter->rx_fc);
4819 	if (status)
4820 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4821 					&adapter->rx_fc);
4822 
4823 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4824 		 adapter->tx_fc, adapter->rx_fc);
4825 
4826 	if (be_physfn(adapter))
4827 		be_cmd_set_logical_link_config(adapter,
4828 					       IFLA_VF_LINK_STATE_AUTO, 0);
4829 
4830 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4831 	 * confusing a linux bridge or OVS that it might be connected to.
4832 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4833 	 * when SRIOV is not enabled.
4834 	 */
4835 	if (BE3_chip(adapter))
4836 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4837 				      PORT_FWD_TYPE_PASSTHRU, 0);
4838 
4839 	if (adapter->num_vfs)
4840 		be_vf_setup(adapter);
4841 
4842 	status = be_cmd_get_phy_info(adapter);
4843 	if (!status && be_pause_supported(adapter))
4844 		adapter->phy.fc_autoneg = 1;
4845 
4846 	if (be_physfn(adapter) && !lancer_chip(adapter))
4847 		be_cmd_set_features(adapter);
4848 
4849 	be_schedule_worker(adapter);
4850 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4851 	return 0;
4852 err:
4853 	be_clear(adapter);
4854 	return status;
4855 }
4856 
4857 #ifdef CONFIG_NET_POLL_CONTROLLER
4858 static void be_netpoll(struct net_device *netdev)
4859 {
4860 	struct be_adapter *adapter = netdev_priv(netdev);
4861 	struct be_eq_obj *eqo;
4862 	int i;
4863 
4864 	for_all_evt_queues(adapter, eqo, i) {
4865 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4866 		napi_schedule(&eqo->napi);
4867 	}
4868 }
4869 #endif
4870 
4871 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4872 {
4873 	const struct firmware *fw;
4874 	int status;
4875 
4876 	if (!netif_running(adapter->netdev)) {
4877 		dev_err(&adapter->pdev->dev,
4878 			"Firmware load not allowed (interface is down)\n");
4879 		return -ENETDOWN;
4880 	}
4881 
4882 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4883 	if (status)
4884 		goto fw_exit;
4885 
4886 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4887 
4888 	if (lancer_chip(adapter))
4889 		status = lancer_fw_download(adapter, fw);
4890 	else
4891 		status = be_fw_download(adapter, fw);
4892 
4893 	if (!status)
4894 		be_cmd_get_fw_ver(adapter);
4895 
4896 fw_exit:
4897 	release_firmware(fw);
4898 	return status;
4899 }
4900 
4901 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4902 				 u16 flags)
4903 {
4904 	struct be_adapter *adapter = netdev_priv(dev);
4905 	struct nlattr *attr, *br_spec;
4906 	int rem;
4907 	int status = 0;
4908 	u16 mode = 0;
4909 
4910 	if (!sriov_enabled(adapter))
4911 		return -EOPNOTSUPP;
4912 
4913 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4914 	if (!br_spec)
4915 		return -EINVAL;
4916 
4917 	nla_for_each_nested(attr, br_spec, rem) {
4918 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4919 			continue;
4920 
4921 		if (nla_len(attr) < sizeof(mode))
4922 			return -EINVAL;
4923 
4924 		mode = nla_get_u16(attr);
4925 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4926 			return -EOPNOTSUPP;
4927 
4928 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4929 			return -EINVAL;
4930 
4931 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4932 					       adapter->if_handle,
4933 					       mode == BRIDGE_MODE_VEPA ?
4934 					       PORT_FWD_TYPE_VEPA :
4935 					       PORT_FWD_TYPE_VEB, 0);
4936 		if (status)
4937 			goto err;
4938 
4939 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4940 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4941 
4942 		return status;
4943 	}
4944 err:
4945 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4946 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4947 
4948 	return status;
4949 }
4950 
4951 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4952 				 struct net_device *dev, u32 filter_mask,
4953 				 int nlflags)
4954 {
4955 	struct be_adapter *adapter = netdev_priv(dev);
4956 	int status = 0;
4957 	u8 hsw_mode;
4958 
4959 	/* BE and Lancer chips support VEB mode only */
4960 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4961 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4962 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4963 			return 0;
4964 		hsw_mode = PORT_FWD_TYPE_VEB;
4965 	} else {
4966 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4967 					       adapter->if_handle, &hsw_mode,
4968 					       NULL);
4969 		if (status)
4970 			return 0;
4971 
4972 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4973 			return 0;
4974 	}
4975 
4976 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4977 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4978 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4979 				       0, 0, nlflags, filter_mask, NULL);
4980 }
4981 
4982 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4983 					 void (*func)(struct work_struct *))
4984 {
4985 	struct be_cmd_work *work;
4986 
4987 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4988 	if (!work) {
4989 		dev_err(&adapter->pdev->dev,
4990 			"be_work memory allocation failed\n");
4991 		return NULL;
4992 	}
4993 
4994 	INIT_WORK(&work->work, func);
4995 	work->adapter = adapter;
4996 	return work;
4997 }
4998 
4999 /* VxLAN offload Notes:
5000  *
5001  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5002  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5003  * is expected to work across all types of IP tunnels once exported. Skyhawk
5004  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5005  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5006  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5007  * those other tunnels are unexported on the fly through ndo_features_check().
5008  *
5009  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5010  * adds more than one port, disable offloads and don't re-enable them again
5011  * until after all the tunnels are removed.
5012  */
5013 static void be_work_add_vxlan_port(struct work_struct *work)
5014 {
5015 	struct be_cmd_work *cmd_work =
5016 				container_of(work, struct be_cmd_work, work);
5017 	struct be_adapter *adapter = cmd_work->adapter;
5018 	struct net_device *netdev = adapter->netdev;
5019 	struct device *dev = &adapter->pdev->dev;
5020 	__be16 port = cmd_work->info.vxlan_port;
5021 	int status;
5022 
5023 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5024 		adapter->vxlan_port_aliases++;
5025 		goto done;
5026 	}
5027 
5028 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5029 		dev_info(dev,
5030 			 "Only one UDP port supported for VxLAN offloads\n");
5031 		dev_info(dev, "Disabling VxLAN offloads\n");
5032 		adapter->vxlan_port_count++;
5033 		goto err;
5034 	}
5035 
5036 	if (adapter->vxlan_port_count++ >= 1)
5037 		goto done;
5038 
5039 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5040 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5041 	if (status) {
5042 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5043 		goto err;
5044 	}
5045 
5046 	status = be_cmd_set_vxlan_port(adapter, port);
5047 	if (status) {
5048 		dev_warn(dev, "Failed to add VxLAN port\n");
5049 		goto err;
5050 	}
5051 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5052 	adapter->vxlan_port = port;
5053 
5054 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5055 				   NETIF_F_TSO | NETIF_F_TSO6 |
5056 				   NETIF_F_GSO_UDP_TUNNEL;
5057 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5058 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5059 
5060 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5061 		 be16_to_cpu(port));
5062 	goto done;
5063 err:
5064 	be_disable_vxlan_offloads(adapter);
5065 done:
5066 	kfree(cmd_work);
5067 }
5068 
5069 static void be_work_del_vxlan_port(struct work_struct *work)
5070 {
5071 	struct be_cmd_work *cmd_work =
5072 				container_of(work, struct be_cmd_work, work);
5073 	struct be_adapter *adapter = cmd_work->adapter;
5074 	__be16 port = cmd_work->info.vxlan_port;
5075 
5076 	if (adapter->vxlan_port != port)
5077 		goto done;
5078 
5079 	if (adapter->vxlan_port_aliases) {
5080 		adapter->vxlan_port_aliases--;
5081 		goto out;
5082 	}
5083 
5084 	be_disable_vxlan_offloads(adapter);
5085 
5086 	dev_info(&adapter->pdev->dev,
5087 		 "Disabled VxLAN offloads for UDP port %d\n",
5088 		 be16_to_cpu(port));
5089 done:
5090 	adapter->vxlan_port_count--;
5091 out:
5092 	kfree(cmd_work);
5093 }
5094 
5095 static void be_cfg_vxlan_port(struct net_device *netdev,
5096 			      struct udp_tunnel_info *ti,
5097 			      void (*func)(struct work_struct *))
5098 {
5099 	struct be_adapter *adapter = netdev_priv(netdev);
5100 	struct be_cmd_work *cmd_work;
5101 
5102 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5103 		return;
5104 
5105 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5106 		return;
5107 
5108 	cmd_work = be_alloc_work(adapter, func);
5109 	if (cmd_work) {
5110 		cmd_work->info.vxlan_port = ti->port;
5111 		queue_work(be_wq, &cmd_work->work);
5112 	}
5113 }
5114 
5115 static void be_del_vxlan_port(struct net_device *netdev,
5116 			      struct udp_tunnel_info *ti)
5117 {
5118 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5119 }
5120 
5121 static void be_add_vxlan_port(struct net_device *netdev,
5122 			      struct udp_tunnel_info *ti)
5123 {
5124 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5125 }
5126 
5127 static netdev_features_t be_features_check(struct sk_buff *skb,
5128 					   struct net_device *dev,
5129 					   netdev_features_t features)
5130 {
5131 	struct be_adapter *adapter = netdev_priv(dev);
5132 	u8 l4_hdr = 0;
5133 
5134 	/* The code below restricts offload features for some tunneled packets.
5135 	 * Offload features for normal (non tunnel) packets are unchanged.
5136 	 */
5137 	if (!skb->encapsulation ||
5138 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5139 		return features;
5140 
5141 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5142 	 * should disable tunnel offload features if it's not a VxLAN packet,
5143 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5144 	 * allow other tunneled traffic like GRE work fine while VxLAN
5145 	 * offloads are configured in Skyhawk-R.
5146 	 */
5147 	switch (vlan_get_protocol(skb)) {
5148 	case htons(ETH_P_IP):
5149 		l4_hdr = ip_hdr(skb)->protocol;
5150 		break;
5151 	case htons(ETH_P_IPV6):
5152 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5153 		break;
5154 	default:
5155 		return features;
5156 	}
5157 
5158 	if (l4_hdr != IPPROTO_UDP ||
5159 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5160 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5161 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5162 	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5163 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5164 
5165 	return features;
5166 }
5167 
5168 static int be_get_phys_port_id(struct net_device *dev,
5169 			       struct netdev_phys_item_id *ppid)
5170 {
5171 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5172 	struct be_adapter *adapter = netdev_priv(dev);
5173 	u8 *id;
5174 
5175 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5176 		return -ENOSPC;
5177 
5178 	ppid->id[0] = adapter->hba_port_num + 1;
5179 	id = &ppid->id[1];
5180 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5181 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5182 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5183 
5184 	ppid->id_len = id_len;
5185 
5186 	return 0;
5187 }
5188 
5189 static void be_set_rx_mode(struct net_device *dev)
5190 {
5191 	struct be_adapter *adapter = netdev_priv(dev);
5192 	struct be_cmd_work *work;
5193 
5194 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5195 	if (work)
5196 		queue_work(be_wq, &work->work);
5197 }
5198 
5199 static const struct net_device_ops be_netdev_ops = {
5200 	.ndo_open		= be_open,
5201 	.ndo_stop		= be_close,
5202 	.ndo_start_xmit		= be_xmit,
5203 	.ndo_set_rx_mode	= be_set_rx_mode,
5204 	.ndo_set_mac_address	= be_mac_addr_set,
5205 	.ndo_change_mtu		= be_change_mtu,
5206 	.ndo_get_stats64	= be_get_stats64,
5207 	.ndo_validate_addr	= eth_validate_addr,
5208 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5209 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5210 	.ndo_set_vf_mac		= be_set_vf_mac,
5211 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5212 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5213 	.ndo_get_vf_config	= be_get_vf_config,
5214 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5215 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5216 #ifdef CONFIG_NET_POLL_CONTROLLER
5217 	.ndo_poll_controller	= be_netpoll,
5218 #endif
5219 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5220 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5221 #ifdef CONFIG_NET_RX_BUSY_POLL
5222 	.ndo_busy_poll		= be_busy_poll,
5223 #endif
5224 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5225 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5226 	.ndo_features_check	= be_features_check,
5227 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5228 };
5229 
5230 static void be_netdev_init(struct net_device *netdev)
5231 {
5232 	struct be_adapter *adapter = netdev_priv(netdev);
5233 
5234 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5235 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5236 		NETIF_F_HW_VLAN_CTAG_TX;
5237 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5238 		netdev->hw_features |= NETIF_F_RXHASH;
5239 
5240 	netdev->features |= netdev->hw_features |
5241 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5242 
5243 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5244 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5245 
5246 	netdev->priv_flags |= IFF_UNICAST_FLT;
5247 
5248 	netdev->flags |= IFF_MULTICAST;
5249 
5250 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5251 
5252 	netdev->netdev_ops = &be_netdev_ops;
5253 
5254 	netdev->ethtool_ops = &be_ethtool_ops;
5255 }
5256 
5257 static void be_cleanup(struct be_adapter *adapter)
5258 {
5259 	struct net_device *netdev = adapter->netdev;
5260 
5261 	rtnl_lock();
5262 	netif_device_detach(netdev);
5263 	if (netif_running(netdev))
5264 		be_close(netdev);
5265 	rtnl_unlock();
5266 
5267 	be_clear(adapter);
5268 }
5269 
5270 static int be_resume(struct be_adapter *adapter)
5271 {
5272 	struct net_device *netdev = adapter->netdev;
5273 	int status;
5274 
5275 	status = be_setup(adapter);
5276 	if (status)
5277 		return status;
5278 
5279 	rtnl_lock();
5280 	if (netif_running(netdev))
5281 		status = be_open(netdev);
5282 	rtnl_unlock();
5283 
5284 	if (status)
5285 		return status;
5286 
5287 	netif_device_attach(netdev);
5288 
5289 	return 0;
5290 }
5291 
5292 static void be_soft_reset(struct be_adapter *adapter)
5293 {
5294 	u32 val;
5295 
5296 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5297 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5298 	val |= SLIPORT_SOFTRESET_SR_MASK;
5299 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5300 }
5301 
5302 static bool be_err_is_recoverable(struct be_adapter *adapter)
5303 {
5304 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5305 	unsigned long initial_idle_time =
5306 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5307 	unsigned long recovery_interval =
5308 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5309 	u16 ue_err_code;
5310 	u32 val;
5311 
5312 	val = be_POST_stage_get(adapter);
5313 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5314 		return false;
5315 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5316 	if (ue_err_code == 0)
5317 		return false;
5318 
5319 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5320 		ue_err_code);
5321 
5322 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5323 		dev_err(&adapter->pdev->dev,
5324 			"Cannot recover within %lu sec from driver load\n",
5325 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5326 		return false;
5327 	}
5328 
5329 	if (err_rec->last_recovery_time &&
5330 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5331 		dev_err(&adapter->pdev->dev,
5332 			"Cannot recover within %lu sec from last recovery\n",
5333 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5334 		return false;
5335 	}
5336 
5337 	if (ue_err_code == err_rec->last_err_code) {
5338 		dev_err(&adapter->pdev->dev,
5339 			"Cannot recover from a consecutive TPE error\n");
5340 		return false;
5341 	}
5342 
5343 	err_rec->last_recovery_time = jiffies;
5344 	err_rec->last_err_code = ue_err_code;
5345 	return true;
5346 }
5347 
5348 static int be_tpe_recover(struct be_adapter *adapter)
5349 {
5350 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5351 	int status = -EAGAIN;
5352 	u32 val;
5353 
5354 	switch (err_rec->recovery_state) {
5355 	case ERR_RECOVERY_ST_NONE:
5356 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5357 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5358 		break;
5359 
5360 	case ERR_RECOVERY_ST_DETECT:
5361 		val = be_POST_stage_get(adapter);
5362 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5363 		    POST_STAGE_RECOVERABLE_ERR) {
5364 			dev_err(&adapter->pdev->dev,
5365 				"Unrecoverable HW error detected: 0x%x\n", val);
5366 			status = -EINVAL;
5367 			err_rec->resched_delay = 0;
5368 			break;
5369 		}
5370 
5371 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5372 
5373 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5374 		 * milliseconds before it checks for final error status in
5375 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5376 		 * If it does, then PF0 initiates a Soft Reset.
5377 		 */
5378 		if (adapter->pf_num == 0) {
5379 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5380 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5381 					ERR_RECOVERY_UE_DETECT_DURATION;
5382 			break;
5383 		}
5384 
5385 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5386 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5387 					ERR_RECOVERY_UE_DETECT_DURATION;
5388 		break;
5389 
5390 	case ERR_RECOVERY_ST_RESET:
5391 		if (!be_err_is_recoverable(adapter)) {
5392 			dev_err(&adapter->pdev->dev,
5393 				"Failed to meet recovery criteria\n");
5394 			status = -EIO;
5395 			err_rec->resched_delay = 0;
5396 			break;
5397 		}
5398 		be_soft_reset(adapter);
5399 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5400 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5401 					err_rec->ue_to_reset_time;
5402 		break;
5403 
5404 	case ERR_RECOVERY_ST_PRE_POLL:
5405 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5406 		err_rec->resched_delay = 0;
5407 		status = 0;			/* done */
5408 		break;
5409 
5410 	default:
5411 		status = -EINVAL;
5412 		err_rec->resched_delay = 0;
5413 		break;
5414 	}
5415 
5416 	return status;
5417 }
5418 
5419 static int be_err_recover(struct be_adapter *adapter)
5420 {
5421 	int status;
5422 
5423 	if (!lancer_chip(adapter)) {
5424 		if (!adapter->error_recovery.recovery_supported ||
5425 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5426 			return -EIO;
5427 		status = be_tpe_recover(adapter);
5428 		if (status)
5429 			goto err;
5430 	}
5431 
5432 	/* Wait for adapter to reach quiescent state before
5433 	 * destroying queues
5434 	 */
5435 	status = be_fw_wait_ready(adapter);
5436 	if (status)
5437 		goto err;
5438 
5439 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5440 
5441 	be_cleanup(adapter);
5442 
5443 	status = be_resume(adapter);
5444 	if (status)
5445 		goto err;
5446 
5447 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5448 
5449 err:
5450 	return status;
5451 }
5452 
5453 static void be_err_detection_task(struct work_struct *work)
5454 {
5455 	struct be_error_recovery *err_rec =
5456 			container_of(work, struct be_error_recovery,
5457 				     err_detection_work.work);
5458 	struct be_adapter *adapter =
5459 			container_of(err_rec, struct be_adapter,
5460 				     error_recovery);
5461 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5462 	struct device *dev = &adapter->pdev->dev;
5463 	int recovery_status;
5464 
5465 	be_detect_error(adapter);
5466 	if (!be_check_error(adapter, BE_ERROR_HW))
5467 		goto reschedule_task;
5468 
5469 	recovery_status = be_err_recover(adapter);
5470 	if (!recovery_status) {
5471 		err_rec->recovery_retries = 0;
5472 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5473 		dev_info(dev, "Adapter recovery successful\n");
5474 		goto reschedule_task;
5475 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5476 		/* BEx/SH recovery state machine */
5477 		if (adapter->pf_num == 0 &&
5478 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5479 			dev_err(&adapter->pdev->dev,
5480 				"Adapter recovery in progress\n");
5481 		resched_delay = err_rec->resched_delay;
5482 		goto reschedule_task;
5483 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5484 		/* For VFs, check if PF have allocated resources
5485 		 * every second.
5486 		 */
5487 		dev_err(dev, "Re-trying adapter recovery\n");
5488 		goto reschedule_task;
5489 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5490 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5491 		/* In case of another error during recovery, it takes 30 sec
5492 		 * for adapter to come out of error. Retry error recovery after
5493 		 * this time interval.
5494 		 */
5495 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5496 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5497 		goto reschedule_task;
5498 	} else {
5499 		dev_err(dev, "Adapter recovery failed\n");
5500 		dev_err(dev, "Please reboot server to recover\n");
5501 	}
5502 
5503 	return;
5504 
5505 reschedule_task:
5506 	be_schedule_err_detection(adapter, resched_delay);
5507 }
5508 
5509 static void be_log_sfp_info(struct be_adapter *adapter)
5510 {
5511 	int status;
5512 
5513 	status = be_cmd_query_sfp_info(adapter);
5514 	if (!status) {
5515 		dev_err(&adapter->pdev->dev,
5516 			"Port %c: %s Vendor: %s part no: %s",
5517 			adapter->port_name,
5518 			be_misconfig_evt_port_state[adapter->phy_state],
5519 			adapter->phy.vendor_name,
5520 			adapter->phy.vendor_pn);
5521 	}
5522 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5523 }
5524 
5525 static void be_worker(struct work_struct *work)
5526 {
5527 	struct be_adapter *adapter =
5528 		container_of(work, struct be_adapter, work.work);
5529 	struct be_rx_obj *rxo;
5530 	int i;
5531 
5532 	if (be_physfn(adapter) &&
5533 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5534 		be_cmd_get_die_temperature(adapter);
5535 
5536 	/* when interrupts are not yet enabled, just reap any pending
5537 	 * mcc completions
5538 	 */
5539 	if (!netif_running(adapter->netdev)) {
5540 		local_bh_disable();
5541 		be_process_mcc(adapter);
5542 		local_bh_enable();
5543 		goto reschedule;
5544 	}
5545 
5546 	if (!adapter->stats_cmd_sent) {
5547 		if (lancer_chip(adapter))
5548 			lancer_cmd_get_pport_stats(adapter,
5549 						   &adapter->stats_cmd);
5550 		else
5551 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5552 	}
5553 
5554 	for_all_rx_queues(adapter, rxo, i) {
5555 		/* Replenish RX-queues starved due to memory
5556 		 * allocation failures.
5557 		 */
5558 		if (rxo->rx_post_starved)
5559 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5560 	}
5561 
5562 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5563 	if (!skyhawk_chip(adapter))
5564 		be_eqd_update(adapter, false);
5565 
5566 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5567 		be_log_sfp_info(adapter);
5568 
5569 reschedule:
5570 	adapter->work_counter++;
5571 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5572 }
5573 
5574 static void be_unmap_pci_bars(struct be_adapter *adapter)
5575 {
5576 	if (adapter->csr)
5577 		pci_iounmap(adapter->pdev, adapter->csr);
5578 	if (adapter->db)
5579 		pci_iounmap(adapter->pdev, adapter->db);
5580 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5581 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5582 }
5583 
5584 static int db_bar(struct be_adapter *adapter)
5585 {
5586 	if (lancer_chip(adapter) || be_virtfn(adapter))
5587 		return 0;
5588 	else
5589 		return 4;
5590 }
5591 
5592 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5593 {
5594 	if (skyhawk_chip(adapter)) {
5595 		adapter->roce_db.size = 4096;
5596 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5597 							      db_bar(adapter));
5598 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5599 							       db_bar(adapter));
5600 	}
5601 	return 0;
5602 }
5603 
5604 static int be_map_pci_bars(struct be_adapter *adapter)
5605 {
5606 	struct pci_dev *pdev = adapter->pdev;
5607 	u8 __iomem *addr;
5608 	u32 sli_intf;
5609 
5610 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5611 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5612 				SLI_INTF_FAMILY_SHIFT;
5613 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5614 
5615 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5616 		adapter->csr = pci_iomap(pdev, 2, 0);
5617 		if (!adapter->csr)
5618 			return -ENOMEM;
5619 	}
5620 
5621 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5622 	if (!addr)
5623 		goto pci_map_err;
5624 	adapter->db = addr;
5625 
5626 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5627 		if (be_physfn(adapter)) {
5628 			/* PCICFG is the 2nd BAR in BE2 */
5629 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5630 			if (!addr)
5631 				goto pci_map_err;
5632 			adapter->pcicfg = addr;
5633 			adapter->pcicfg_mapped = true;
5634 		} else {
5635 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5636 			adapter->pcicfg_mapped = false;
5637 		}
5638 	}
5639 
5640 	be_roce_map_pci_bars(adapter);
5641 	return 0;
5642 
5643 pci_map_err:
5644 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5645 	be_unmap_pci_bars(adapter);
5646 	return -ENOMEM;
5647 }
5648 
5649 static void be_drv_cleanup(struct be_adapter *adapter)
5650 {
5651 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5652 	struct device *dev = &adapter->pdev->dev;
5653 
5654 	if (mem->va)
5655 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5656 
5657 	mem = &adapter->rx_filter;
5658 	if (mem->va)
5659 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660 
5661 	mem = &adapter->stats_cmd;
5662 	if (mem->va)
5663 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664 }
5665 
5666 /* Allocate and initialize various fields in be_adapter struct */
5667 static int be_drv_init(struct be_adapter *adapter)
5668 {
5669 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5670 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5671 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5672 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5673 	struct device *dev = &adapter->pdev->dev;
5674 	int status = 0;
5675 
5676 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5677 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5678 						 &mbox_mem_alloc->dma,
5679 						 GFP_KERNEL);
5680 	if (!mbox_mem_alloc->va)
5681 		return -ENOMEM;
5682 
5683 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5684 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5685 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5686 
5687 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5688 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5689 					    &rx_filter->dma, GFP_KERNEL);
5690 	if (!rx_filter->va) {
5691 		status = -ENOMEM;
5692 		goto free_mbox;
5693 	}
5694 
5695 	if (lancer_chip(adapter))
5696 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5697 	else if (BE2_chip(adapter))
5698 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5699 	else if (BE3_chip(adapter))
5700 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5701 	else
5702 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5703 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5704 					    &stats_cmd->dma, GFP_KERNEL);
5705 	if (!stats_cmd->va) {
5706 		status = -ENOMEM;
5707 		goto free_rx_filter;
5708 	}
5709 
5710 	mutex_init(&adapter->mbox_lock);
5711 	mutex_init(&adapter->mcc_lock);
5712 	mutex_init(&adapter->rx_filter_lock);
5713 	spin_lock_init(&adapter->mcc_cq_lock);
5714 	init_completion(&adapter->et_cmd_compl);
5715 
5716 	pci_save_state(adapter->pdev);
5717 
5718 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5719 
5720 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5721 	adapter->error_recovery.resched_delay = 0;
5722 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5723 			  be_err_detection_task);
5724 
5725 	adapter->rx_fc = true;
5726 	adapter->tx_fc = true;
5727 
5728 	/* Must be a power of 2 or else MODULO will BUG_ON */
5729 	adapter->be_get_temp_freq = 64;
5730 
5731 	return 0;
5732 
5733 free_rx_filter:
5734 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5735 free_mbox:
5736 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5737 			  mbox_mem_alloc->dma);
5738 	return status;
5739 }
5740 
5741 static void be_remove(struct pci_dev *pdev)
5742 {
5743 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5744 
5745 	if (!adapter)
5746 		return;
5747 
5748 	be_roce_dev_remove(adapter);
5749 	be_intr_set(adapter, false);
5750 
5751 	be_cancel_err_detection(adapter);
5752 
5753 	unregister_netdev(adapter->netdev);
5754 
5755 	be_clear(adapter);
5756 
5757 	if (!pci_vfs_assigned(adapter->pdev))
5758 		be_cmd_reset_function(adapter);
5759 
5760 	/* tell fw we're done with firing cmds */
5761 	be_cmd_fw_clean(adapter);
5762 
5763 	be_unmap_pci_bars(adapter);
5764 	be_drv_cleanup(adapter);
5765 
5766 	pci_disable_pcie_error_reporting(pdev);
5767 
5768 	pci_release_regions(pdev);
5769 	pci_disable_device(pdev);
5770 
5771 	free_netdev(adapter->netdev);
5772 }
5773 
5774 static ssize_t be_hwmon_show_temp(struct device *dev,
5775 				  struct device_attribute *dev_attr,
5776 				  char *buf)
5777 {
5778 	struct be_adapter *adapter = dev_get_drvdata(dev);
5779 
5780 	/* Unit: millidegree Celsius */
5781 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5782 		return -EIO;
5783 	else
5784 		return sprintf(buf, "%u\n",
5785 			       adapter->hwmon_info.be_on_die_temp * 1000);
5786 }
5787 
5788 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5789 			  be_hwmon_show_temp, NULL, 1);
5790 
5791 static struct attribute *be_hwmon_attrs[] = {
5792 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5793 	NULL
5794 };
5795 
5796 ATTRIBUTE_GROUPS(be_hwmon);
5797 
5798 static char *mc_name(struct be_adapter *adapter)
5799 {
5800 	char *str = "";	/* default */
5801 
5802 	switch (adapter->mc_type) {
5803 	case UMC:
5804 		str = "UMC";
5805 		break;
5806 	case FLEX10:
5807 		str = "FLEX10";
5808 		break;
5809 	case vNIC1:
5810 		str = "vNIC-1";
5811 		break;
5812 	case nPAR:
5813 		str = "nPAR";
5814 		break;
5815 	case UFP:
5816 		str = "UFP";
5817 		break;
5818 	case vNIC2:
5819 		str = "vNIC-2";
5820 		break;
5821 	default:
5822 		str = "";
5823 	}
5824 
5825 	return str;
5826 }
5827 
5828 static inline char *func_name(struct be_adapter *adapter)
5829 {
5830 	return be_physfn(adapter) ? "PF" : "VF";
5831 }
5832 
5833 static inline char *nic_name(struct pci_dev *pdev)
5834 {
5835 	switch (pdev->device) {
5836 	case OC_DEVICE_ID1:
5837 		return OC_NAME;
5838 	case OC_DEVICE_ID2:
5839 		return OC_NAME_BE;
5840 	case OC_DEVICE_ID3:
5841 	case OC_DEVICE_ID4:
5842 		return OC_NAME_LANCER;
5843 	case BE_DEVICE_ID2:
5844 		return BE3_NAME;
5845 	case OC_DEVICE_ID5:
5846 	case OC_DEVICE_ID6:
5847 		return OC_NAME_SH;
5848 	default:
5849 		return BE_NAME;
5850 	}
5851 }
5852 
5853 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5854 {
5855 	struct be_adapter *adapter;
5856 	struct net_device *netdev;
5857 	int status = 0;
5858 
5859 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5860 
5861 	status = pci_enable_device(pdev);
5862 	if (status)
5863 		goto do_none;
5864 
5865 	status = pci_request_regions(pdev, DRV_NAME);
5866 	if (status)
5867 		goto disable_dev;
5868 	pci_set_master(pdev);
5869 
5870 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5871 	if (!netdev) {
5872 		status = -ENOMEM;
5873 		goto rel_reg;
5874 	}
5875 	adapter = netdev_priv(netdev);
5876 	adapter->pdev = pdev;
5877 	pci_set_drvdata(pdev, adapter);
5878 	adapter->netdev = netdev;
5879 	SET_NETDEV_DEV(netdev, &pdev->dev);
5880 
5881 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5882 	if (!status) {
5883 		netdev->features |= NETIF_F_HIGHDMA;
5884 	} else {
5885 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5886 		if (status) {
5887 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5888 			goto free_netdev;
5889 		}
5890 	}
5891 
5892 	status = pci_enable_pcie_error_reporting(pdev);
5893 	if (!status)
5894 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5895 
5896 	status = be_map_pci_bars(adapter);
5897 	if (status)
5898 		goto free_netdev;
5899 
5900 	status = be_drv_init(adapter);
5901 	if (status)
5902 		goto unmap_bars;
5903 
5904 	status = be_setup(adapter);
5905 	if (status)
5906 		goto drv_cleanup;
5907 
5908 	be_netdev_init(netdev);
5909 	status = register_netdev(netdev);
5910 	if (status != 0)
5911 		goto unsetup;
5912 
5913 	be_roce_dev_add(adapter);
5914 
5915 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5916 	adapter->error_recovery.probe_time = jiffies;
5917 
5918 	/* On Die temperature not supported for VF. */
5919 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5920 		adapter->hwmon_info.hwmon_dev =
5921 			devm_hwmon_device_register_with_groups(&pdev->dev,
5922 							       DRV_NAME,
5923 							       adapter,
5924 							       be_hwmon_groups);
5925 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5926 	}
5927 
5928 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5929 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5930 
5931 	return 0;
5932 
5933 unsetup:
5934 	be_clear(adapter);
5935 drv_cleanup:
5936 	be_drv_cleanup(adapter);
5937 unmap_bars:
5938 	be_unmap_pci_bars(adapter);
5939 free_netdev:
5940 	free_netdev(netdev);
5941 rel_reg:
5942 	pci_release_regions(pdev);
5943 disable_dev:
5944 	pci_disable_device(pdev);
5945 do_none:
5946 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5947 	return status;
5948 }
5949 
5950 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5951 {
5952 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5953 
5954 	be_intr_set(adapter, false);
5955 	be_cancel_err_detection(adapter);
5956 
5957 	be_cleanup(adapter);
5958 
5959 	pci_save_state(pdev);
5960 	pci_disable_device(pdev);
5961 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5962 	return 0;
5963 }
5964 
5965 static int be_pci_resume(struct pci_dev *pdev)
5966 {
5967 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5968 	int status = 0;
5969 
5970 	status = pci_enable_device(pdev);
5971 	if (status)
5972 		return status;
5973 
5974 	pci_restore_state(pdev);
5975 
5976 	status = be_resume(adapter);
5977 	if (status)
5978 		return status;
5979 
5980 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5981 
5982 	return 0;
5983 }
5984 
5985 /*
5986  * An FLR will stop BE from DMAing any data.
5987  */
5988 static void be_shutdown(struct pci_dev *pdev)
5989 {
5990 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5991 
5992 	if (!adapter)
5993 		return;
5994 
5995 	be_roce_dev_shutdown(adapter);
5996 	cancel_delayed_work_sync(&adapter->work);
5997 	be_cancel_err_detection(adapter);
5998 
5999 	netif_device_detach(adapter->netdev);
6000 
6001 	be_cmd_reset_function(adapter);
6002 
6003 	pci_disable_device(pdev);
6004 }
6005 
6006 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6007 					    pci_channel_state_t state)
6008 {
6009 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6010 
6011 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6012 
6013 	be_roce_dev_remove(adapter);
6014 
6015 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6016 		be_set_error(adapter, BE_ERROR_EEH);
6017 
6018 		be_cancel_err_detection(adapter);
6019 
6020 		be_cleanup(adapter);
6021 	}
6022 
6023 	if (state == pci_channel_io_perm_failure)
6024 		return PCI_ERS_RESULT_DISCONNECT;
6025 
6026 	pci_disable_device(pdev);
6027 
6028 	/* The error could cause the FW to trigger a flash debug dump.
6029 	 * Resetting the card while flash dump is in progress
6030 	 * can cause it not to recover; wait for it to finish.
6031 	 * Wait only for first function as it is needed only once per
6032 	 * adapter.
6033 	 */
6034 	if (pdev->devfn == 0)
6035 		ssleep(30);
6036 
6037 	return PCI_ERS_RESULT_NEED_RESET;
6038 }
6039 
6040 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6041 {
6042 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6043 	int status;
6044 
6045 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6046 
6047 	status = pci_enable_device(pdev);
6048 	if (status)
6049 		return PCI_ERS_RESULT_DISCONNECT;
6050 
6051 	pci_set_master(pdev);
6052 	pci_restore_state(pdev);
6053 
6054 	/* Check if card is ok and fw is ready */
6055 	dev_info(&adapter->pdev->dev,
6056 		 "Waiting for FW to be ready after EEH reset\n");
6057 	status = be_fw_wait_ready(adapter);
6058 	if (status)
6059 		return PCI_ERS_RESULT_DISCONNECT;
6060 
6061 	pci_cleanup_aer_uncorrect_error_status(pdev);
6062 	be_clear_error(adapter, BE_CLEAR_ALL);
6063 	return PCI_ERS_RESULT_RECOVERED;
6064 }
6065 
6066 static void be_eeh_resume(struct pci_dev *pdev)
6067 {
6068 	int status = 0;
6069 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6070 
6071 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6072 
6073 	pci_save_state(pdev);
6074 
6075 	status = be_resume(adapter);
6076 	if (status)
6077 		goto err;
6078 
6079 	be_roce_dev_add(adapter);
6080 
6081 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6082 	return;
6083 err:
6084 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6085 }
6086 
6087 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6088 {
6089 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6090 	struct be_resources vft_res = {0};
6091 	int status;
6092 
6093 	if (!num_vfs)
6094 		be_vf_clear(adapter);
6095 
6096 	adapter->num_vfs = num_vfs;
6097 
6098 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6099 		dev_warn(&pdev->dev,
6100 			 "Cannot disable VFs while they are assigned\n");
6101 		return -EBUSY;
6102 	}
6103 
6104 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6105 	 * are equally distributed across the max-number of VFs. The user may
6106 	 * request only a subset of the max-vfs to be enabled.
6107 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6108 	 * each VF will have access to more number of resources.
6109 	 * This facility is not available in BE3 FW.
6110 	 * Also, this is done by FW in Lancer chip.
6111 	 */
6112 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6113 		be_calculate_vf_res(adapter, adapter->num_vfs,
6114 				    &vft_res);
6115 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6116 						 adapter->num_vfs, &vft_res);
6117 		if (status)
6118 			dev_err(&pdev->dev,
6119 				"Failed to optimize SR-IOV resources\n");
6120 	}
6121 
6122 	status = be_get_resources(adapter);
6123 	if (status)
6124 		return be_cmd_status(status);
6125 
6126 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6127 	rtnl_lock();
6128 	status = be_update_queues(adapter);
6129 	rtnl_unlock();
6130 	if (status)
6131 		return be_cmd_status(status);
6132 
6133 	if (adapter->num_vfs)
6134 		status = be_vf_setup(adapter);
6135 
6136 	if (!status)
6137 		return adapter->num_vfs;
6138 
6139 	return 0;
6140 }
6141 
6142 static const struct pci_error_handlers be_eeh_handlers = {
6143 	.error_detected = be_eeh_err_detected,
6144 	.slot_reset = be_eeh_reset,
6145 	.resume = be_eeh_resume,
6146 };
6147 
6148 static struct pci_driver be_driver = {
6149 	.name = DRV_NAME,
6150 	.id_table = be_dev_ids,
6151 	.probe = be_probe,
6152 	.remove = be_remove,
6153 	.suspend = be_suspend,
6154 	.resume = be_pci_resume,
6155 	.shutdown = be_shutdown,
6156 	.sriov_configure = be_pci_sriov_configure,
6157 	.err_handler = &be_eeh_handlers
6158 };
6159 
6160 static int __init be_init_module(void)
6161 {
6162 	int status;
6163 
6164 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6165 	    rx_frag_size != 2048) {
6166 		printk(KERN_WARNING DRV_NAME
6167 			" : Module param rx_frag_size must be 2048/4096/8192."
6168 			" Using 2048\n");
6169 		rx_frag_size = 2048;
6170 	}
6171 
6172 	if (num_vfs > 0) {
6173 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6174 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6175 	}
6176 
6177 	be_wq = create_singlethread_workqueue("be_wq");
6178 	if (!be_wq) {
6179 		pr_warn(DRV_NAME "workqueue creation failed\n");
6180 		return -1;
6181 	}
6182 
6183 	be_err_recovery_workq =
6184 		create_singlethread_workqueue("be_err_recover");
6185 	if (!be_err_recovery_workq)
6186 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6187 
6188 	status = pci_register_driver(&be_driver);
6189 	if (status) {
6190 		destroy_workqueue(be_wq);
6191 		be_destroy_err_recovery_workq();
6192 	}
6193 	return status;
6194 }
6195 module_init(be_init_module);
6196 
6197 static void __exit be_exit_module(void)
6198 {
6199 	pci_unregister_driver(&be_driver);
6200 
6201 	be_destroy_err_recovery_workq();
6202 
6203 	if (be_wq)
6204 		destroy_workqueue(be_wq);
6205 }
6206 module_exit(be_exit_module);
6207