xref: /linux/drivers/net/ethernet/emulex/benet/be_main.c (revision a4cc96d1f0170b779c32c6b2cc58764f5d2cdef0)
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17 
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27 
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32 
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39 
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43 
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48 
49 static const struct pci_device_id be_dev_ids[] = {
50 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57 	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58 	{ 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61 
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64 
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67 	"CEV",
68 	"CTX",
69 	"DBUF",
70 	"ERX",
71 	"Host",
72 	"MPU",
73 	"NDMA",
74 	"PTC ",
75 	"RDMA ",
76 	"RXF ",
77 	"RXIPS ",
78 	"RXULP0 ",
79 	"RXULP1 ",
80 	"RXULP2 ",
81 	"TIM ",
82 	"TPOST ",
83 	"TPRE ",
84 	"TXIPS ",
85 	"TXULP0 ",
86 	"TXULP1 ",
87 	"UC ",
88 	"WDMA ",
89 	"TXULP2 ",
90 	"HOST1 ",
91 	"P0_OB_LINK ",
92 	"P1_OB_LINK ",
93 	"HOST_GPIO ",
94 	"MBOX ",
95 	"ERX2 ",
96 	"SPARE ",
97 	"JTAG ",
98 	"MPU_INTPEND "
99 };
100 
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103 	"LPCMEMHOST",
104 	"MGMT_MAC",
105 	"PCS0ONLINE",
106 	"MPU_IRAM",
107 	"PCS1ONLINE",
108 	"PCTL0",
109 	"PCTL1",
110 	"PMEM",
111 	"RR",
112 	"TXPB",
113 	"RXPP",
114 	"XAUI",
115 	"TXP",
116 	"ARM",
117 	"IPC",
118 	"HOST2",
119 	"HOST3",
120 	"HOST4",
121 	"HOST5",
122 	"HOST6",
123 	"HOST7",
124 	"ECRC",
125 	"Poison TLP",
126 	"NETC",
127 	"PERIPH",
128 	"LLTXULP",
129 	"D2P",
130 	"RCON",
131 	"LDMA",
132 	"LLTXP",
133 	"LLTXPB",
134 	"Unknown"
135 };
136 
137 #define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
138 				 BE_IF_FLAGS_BROADCAST | \
139 				 BE_IF_FLAGS_MULTICAST | \
140 				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
141 
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144 	struct be_dma_mem *mem = &q->dma_mem;
145 
146 	if (mem->va) {
147 		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148 				  mem->dma);
149 		mem->va = NULL;
150 	}
151 }
152 
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154 			  u16 len, u16 entry_size)
155 {
156 	struct be_dma_mem *mem = &q->dma_mem;
157 
158 	memset(q, 0, sizeof(*q));
159 	q->len = len;
160 	q->entry_size = entry_size;
161 	mem->size = len * entry_size;
162 	mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163 				      GFP_KERNEL);
164 	if (!mem->va)
165 		return -ENOMEM;
166 	return 0;
167 }
168 
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171 	u32 reg, enabled;
172 
173 	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174 			      &reg);
175 	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176 
177 	if (!enabled && enable)
178 		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179 	else if (enabled && !enable)
180 		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181 	else
182 		return;
183 
184 	pci_write_config_dword(adapter->pdev,
185 			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187 
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190 	int status = 0;
191 
192 	/* On lancer interrupts can't be controlled via this register */
193 	if (lancer_chip(adapter))
194 		return;
195 
196 	if (be_check_error(adapter, BE_ERROR_EEH))
197 		return;
198 
199 	status = be_cmd_intr_set(adapter, enable);
200 	if (status)
201 		be_reg_intr_set(adapter, enable);
202 }
203 
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206 	u32 val = 0;
207 
208 	if (be_check_error(adapter, BE_ERROR_HW))
209 		return;
210 
211 	val |= qid & DB_RQ_RING_ID_MASK;
212 	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213 
214 	wmb();
215 	iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217 
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219 			  u16 posted)
220 {
221 	u32 val = 0;
222 
223 	if (be_check_error(adapter, BE_ERROR_HW))
224 		return;
225 
226 	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227 	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228 
229 	wmb();
230 	iowrite32(val, adapter->db + txo->db_offset);
231 }
232 
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234 			 bool arm, bool clear_int, u16 num_popped,
235 			 u32 eq_delay_mult_enc)
236 {
237 	u32 val = 0;
238 
239 	val |= qid & DB_EQ_RING_ID_MASK;
240 	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241 
242 	if (be_check_error(adapter, BE_ERROR_HW))
243 		return;
244 
245 	if (arm)
246 		val |= 1 << DB_EQ_REARM_SHIFT;
247 	if (clear_int)
248 		val |= 1 << DB_EQ_CLR_SHIFT;
249 	val |= 1 << DB_EQ_EVNT_SHIFT;
250 	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251 	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252 	iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254 
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257 	u32 val = 0;
258 
259 	val |= qid & DB_CQ_RING_ID_MASK;
260 	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261 			DB_CQ_RING_ID_EXT_MASK_SHIFT);
262 
263 	if (be_check_error(adapter, BE_ERROR_HW))
264 		return;
265 
266 	if (arm)
267 		val |= 1 << DB_CQ_REARM_SHIFT;
268 	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271 
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274 	int i;
275 
276 	/* Check if mac has already been added as part of uc-list */
277 	for (i = 0; i < adapter->uc_macs; i++) {
278 		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279 				     mac)) {
280 			/* mac already added, skip addition */
281 			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282 			return 0;
283 		}
284 	}
285 
286 	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287 			       &adapter->pmac_id[0], 0);
288 }
289 
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292 	int i;
293 
294 	/* Skip deletion if the programmed mac is
295 	 * being used in uc-list
296 	 */
297 	for (i = 0; i < adapter->uc_macs; i++) {
298 		if (adapter->pmac_id[i + 1] == pmac_id)
299 			return;
300 	}
301 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303 
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306 	struct be_adapter *adapter = netdev_priv(netdev);
307 	struct device *dev = &adapter->pdev->dev;
308 	struct sockaddr *addr = p;
309 	int status;
310 	u8 mac[ETH_ALEN];
311 	u32 old_pmac_id = adapter->pmac_id[0];
312 
313 	if (!is_valid_ether_addr(addr->sa_data))
314 		return -EADDRNOTAVAIL;
315 
316 	/* Proceed further only if, User provided MAC is different
317 	 * from active MAC
318 	 */
319 	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320 		return 0;
321 
322 	/* if device is not running, copy MAC to netdev->dev_addr */
323 	if (!netif_running(netdev))
324 		goto done;
325 
326 	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327 	 * privilege or if PF did not provision the new MAC address.
328 	 * On BE3, this cmd will always fail if the VF doesn't have the
329 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
330 	 * the MAC for the VF.
331 	 */
332 	mutex_lock(&adapter->rx_filter_lock);
333 	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334 	if (!status) {
335 
336 		/* Delete the old programmed MAC. This call may fail if the
337 		 * old MAC was already deleted by the PF driver.
338 		 */
339 		if (adapter->pmac_id[0] != old_pmac_id)
340 			be_dev_mac_del(adapter, old_pmac_id);
341 	}
342 
343 	mutex_unlock(&adapter->rx_filter_lock);
344 	/* Decide if the new MAC is successfully activated only after
345 	 * querying the FW
346 	 */
347 	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348 				       adapter->if_handle, true, 0);
349 	if (status)
350 		goto err;
351 
352 	/* The MAC change did not happen, either due to lack of privilege
353 	 * or PF didn't pre-provision.
354 	 */
355 	if (!ether_addr_equal(addr->sa_data, mac)) {
356 		status = -EPERM;
357 		goto err;
358 	}
359 done:
360 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
361 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
362 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363 	return 0;
364 err:
365 	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366 	return status;
367 }
368 
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372 	if (BE2_chip(adapter)) {
373 		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374 
375 		return &cmd->hw_stats;
376 	} else if (BE3_chip(adapter)) {
377 		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378 
379 		return &cmd->hw_stats;
380 	} else {
381 		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382 
383 		return &cmd->hw_stats;
384 	}
385 }
386 
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390 	if (BE2_chip(adapter)) {
391 		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392 
393 		return &hw_stats->erx;
394 	} else if (BE3_chip(adapter)) {
395 		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396 
397 		return &hw_stats->erx;
398 	} else {
399 		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400 
401 		return &hw_stats->erx;
402 	}
403 }
404 
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407 	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409 	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410 	struct be_port_rxf_stats_v0 *port_stats =
411 					&rxf_stats->port[adapter->port_num];
412 	struct be_drv_stats *drvs = &adapter->drv_stats;
413 
414 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
416 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
417 	drvs->rx_control_frames = port_stats->rx_control_frames;
418 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424 	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429 	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430 	drvs->rx_dropped_header_too_small =
431 		port_stats->rx_dropped_header_too_small;
432 	drvs->rx_address_filtered =
433 					port_stats->rx_address_filtered +
434 					port_stats->rx_vlan_filtered;
435 	drvs->rx_alignment_symbol_errors =
436 		port_stats->rx_alignment_symbol_errors;
437 
438 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
439 	drvs->tx_controlframes = port_stats->tx_controlframes;
440 
441 	if (adapter->port_num)
442 		drvs->jabber_events = rxf_stats->port1_jabber_events;
443 	else
444 		drvs->jabber_events = rxf_stats->port0_jabber_events;
445 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
448 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453 
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456 	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458 	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459 	struct be_port_rxf_stats_v1 *port_stats =
460 					&rxf_stats->port[adapter->port_num];
461 	struct be_drv_stats *drvs = &adapter->drv_stats;
462 
463 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
467 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
468 	drvs->rx_control_frames = port_stats->rx_control_frames;
469 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479 	drvs->rx_dropped_header_too_small =
480 		port_stats->rx_dropped_header_too_small;
481 	drvs->rx_input_fifo_overflow_drop =
482 		port_stats->rx_input_fifo_overflow_drop;
483 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
484 	drvs->rx_alignment_symbol_errors =
485 		port_stats->rx_alignment_symbol_errors;
486 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
488 	drvs->tx_controlframes = port_stats->tx_controlframes;
489 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490 	drvs->jabber_events = port_stats->jabber_events;
491 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
494 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499 
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502 	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503 	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504 	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505 	struct be_port_rxf_stats_v2 *port_stats =
506 					&rxf_stats->port[adapter->port_num];
507 	struct be_drv_stats *drvs = &adapter->drv_stats;
508 
509 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510 	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511 	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
513 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
514 	drvs->rx_control_frames = port_stats->rx_control_frames;
515 	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516 	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517 	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518 	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519 	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520 	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521 	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522 	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523 	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524 	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525 	drvs->rx_dropped_header_too_small =
526 		port_stats->rx_dropped_header_too_small;
527 	drvs->rx_input_fifo_overflow_drop =
528 		port_stats->rx_input_fifo_overflow_drop;
529 	drvs->rx_address_filtered = port_stats->rx_address_filtered;
530 	drvs->rx_alignment_symbol_errors =
531 		port_stats->rx_alignment_symbol_errors;
532 	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533 	drvs->tx_pauseframes = port_stats->tx_pauseframes;
534 	drvs->tx_controlframes = port_stats->tx_controlframes;
535 	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536 	drvs->jabber_events = port_stats->jabber_events;
537 	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538 	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539 	drvs->forwarded_packets = rxf_stats->forwarded_packets;
540 	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541 	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542 	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543 	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544 	if (be_roce_supported(adapter)) {
545 		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546 		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547 		drvs->rx_roce_frames = port_stats->roce_frames_received;
548 		drvs->roce_drops_crc = port_stats->roce_drops_crc;
549 		drvs->roce_drops_payload_len =
550 			port_stats->roce_drops_payload_len;
551 	}
552 }
553 
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556 	struct be_drv_stats *drvs = &adapter->drv_stats;
557 	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558 
559 	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560 	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561 	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562 	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563 	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564 	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565 	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566 	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567 	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568 	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569 	drvs->rx_dropped_tcp_length =
570 				pport_stats->rx_dropped_invalid_tcp_length;
571 	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572 	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573 	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574 	drvs->rx_dropped_header_too_small =
575 				pport_stats->rx_dropped_header_too_small;
576 	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577 	drvs->rx_address_filtered =
578 					pport_stats->rx_address_filtered +
579 					pport_stats->rx_vlan_filtered;
580 	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581 	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582 	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583 	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584 	drvs->jabber_events = pport_stats->rx_jabbers;
585 	drvs->forwarded_packets = pport_stats->num_forwards_lo;
586 	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587 	drvs->rx_drops_too_many_frags =
588 				pport_stats->rx_drops_too_many_frags_lo;
589 }
590 
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)			(x & 0xFFFF)
594 #define hi(x)			(x & 0xFFFF0000)
595 	bool wrapped = val < lo(*acc);
596 	u32 newacc = hi(*acc) + val;
597 
598 	if (wrapped)
599 		newacc += 65536;
600 	ACCESS_ONCE(*acc) = newacc;
601 }
602 
603 static void populate_erx_stats(struct be_adapter *adapter,
604 			       struct be_rx_obj *rxo, u32 erx_stat)
605 {
606 	if (!BEx_chip(adapter))
607 		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608 	else
609 		/* below erx HW counter can actually wrap around after
610 		 * 65535. Driver accumulates a 32-bit value
611 		 */
612 		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613 				     (u16)erx_stat);
614 }
615 
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618 	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619 	struct be_rx_obj *rxo;
620 	int i;
621 	u32 erx_stat;
622 
623 	if (lancer_chip(adapter)) {
624 		populate_lancer_stats(adapter);
625 	} else {
626 		if (BE2_chip(adapter))
627 			populate_be_v0_stats(adapter);
628 		else if (BE3_chip(adapter))
629 			/* for BE3 */
630 			populate_be_v1_stats(adapter);
631 		else
632 			populate_be_v2_stats(adapter);
633 
634 		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635 		for_all_rx_queues(adapter, rxo, i) {
636 			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637 			populate_erx_stats(adapter, rxo, erx_stat);
638 		}
639 	}
640 }
641 
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643 						struct rtnl_link_stats64 *stats)
644 {
645 	struct be_adapter *adapter = netdev_priv(netdev);
646 	struct be_drv_stats *drvs = &adapter->drv_stats;
647 	struct be_rx_obj *rxo;
648 	struct be_tx_obj *txo;
649 	u64 pkts, bytes;
650 	unsigned int start;
651 	int i;
652 
653 	for_all_rx_queues(adapter, rxo, i) {
654 		const struct be_rx_stats *rx_stats = rx_stats(rxo);
655 
656 		do {
657 			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658 			pkts = rx_stats(rxo)->rx_pkts;
659 			bytes = rx_stats(rxo)->rx_bytes;
660 		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661 		stats->rx_packets += pkts;
662 		stats->rx_bytes += bytes;
663 		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664 		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665 					rx_stats(rxo)->rx_drops_no_frags;
666 	}
667 
668 	for_all_tx_queues(adapter, txo, i) {
669 		const struct be_tx_stats *tx_stats = tx_stats(txo);
670 
671 		do {
672 			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673 			pkts = tx_stats(txo)->tx_pkts;
674 			bytes = tx_stats(txo)->tx_bytes;
675 		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676 		stats->tx_packets += pkts;
677 		stats->tx_bytes += bytes;
678 	}
679 
680 	/* bad pkts received */
681 	stats->rx_errors = drvs->rx_crc_errors +
682 		drvs->rx_alignment_symbol_errors +
683 		drvs->rx_in_range_errors +
684 		drvs->rx_out_range_errors +
685 		drvs->rx_frame_too_long +
686 		drvs->rx_dropped_too_small +
687 		drvs->rx_dropped_too_short +
688 		drvs->rx_dropped_header_too_small +
689 		drvs->rx_dropped_tcp_length +
690 		drvs->rx_dropped_runt;
691 
692 	/* detailed rx errors */
693 	stats->rx_length_errors = drvs->rx_in_range_errors +
694 		drvs->rx_out_range_errors +
695 		drvs->rx_frame_too_long;
696 
697 	stats->rx_crc_errors = drvs->rx_crc_errors;
698 
699 	/* frame alignment errors */
700 	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701 
702 	/* receiver fifo overrun */
703 	/* drops_no_pbuf is no per i/f, it's per BE card */
704 	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705 				drvs->rx_input_fifo_overflow_drop +
706 				drvs->rx_drops_no_pbuf;
707 	return stats;
708 }
709 
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712 	struct net_device *netdev = adapter->netdev;
713 
714 	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715 		netif_carrier_off(netdev);
716 		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717 	}
718 
719 	if (link_status)
720 		netif_carrier_on(netdev);
721 	else
722 		netif_carrier_off(netdev);
723 
724 	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726 
727 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
728 {
729 	struct be_tx_stats *stats = tx_stats(txo);
730 	u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
731 
732 	u64_stats_update_begin(&stats->sync);
733 	stats->tx_reqs++;
734 	stats->tx_bytes += skb->len;
735 	stats->tx_pkts += tx_pkts;
736 	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
737 		stats->tx_vxlan_offload_pkts += tx_pkts;
738 	u64_stats_update_end(&stats->sync);
739 }
740 
741 /* Returns number of WRBs needed for the skb */
742 static u32 skb_wrb_cnt(struct sk_buff *skb)
743 {
744 	/* +1 for the header wrb */
745 	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
746 }
747 
748 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
749 {
750 	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
751 	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
752 	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
753 	wrb->rsvd0 = 0;
754 }
755 
756 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
757  * to avoid the swap and shift/mask operations in wrb_fill().
758  */
759 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
760 {
761 	wrb->frag_pa_hi = 0;
762 	wrb->frag_pa_lo = 0;
763 	wrb->frag_len = 0;
764 	wrb->rsvd0 = 0;
765 }
766 
767 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
768 				     struct sk_buff *skb)
769 {
770 	u8 vlan_prio;
771 	u16 vlan_tag;
772 
773 	vlan_tag = skb_vlan_tag_get(skb);
774 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
775 	/* If vlan priority provided by OS is NOT in available bmap */
776 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
777 		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
778 				adapter->recommended_prio_bits;
779 
780 	return vlan_tag;
781 }
782 
783 /* Used only for IP tunnel packets */
784 static u16 skb_inner_ip_proto(struct sk_buff *skb)
785 {
786 	return (inner_ip_hdr(skb)->version == 4) ?
787 		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
788 }
789 
790 static u16 skb_ip_proto(struct sk_buff *skb)
791 {
792 	return (ip_hdr(skb)->version == 4) ?
793 		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
794 }
795 
796 static inline bool be_is_txq_full(struct be_tx_obj *txo)
797 {
798 	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
799 }
800 
801 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
802 {
803 	return atomic_read(&txo->q.used) < txo->q.len / 2;
804 }
805 
806 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
807 {
808 	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
809 }
810 
811 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
812 				       struct sk_buff *skb,
813 				       struct be_wrb_params *wrb_params)
814 {
815 	u16 proto;
816 
817 	if (skb_is_gso(skb)) {
818 		BE_WRB_F_SET(wrb_params->features, LSO, 1);
819 		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
820 		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
821 			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
822 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
823 		if (skb->encapsulation) {
824 			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
825 			proto = skb_inner_ip_proto(skb);
826 		} else {
827 			proto = skb_ip_proto(skb);
828 		}
829 		if (proto == IPPROTO_TCP)
830 			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
831 		else if (proto == IPPROTO_UDP)
832 			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
833 	}
834 
835 	if (skb_vlan_tag_present(skb)) {
836 		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
837 		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
838 	}
839 
840 	BE_WRB_F_SET(wrb_params->features, CRC, 1);
841 }
842 
843 static void wrb_fill_hdr(struct be_adapter *adapter,
844 			 struct be_eth_hdr_wrb *hdr,
845 			 struct be_wrb_params *wrb_params,
846 			 struct sk_buff *skb)
847 {
848 	memset(hdr, 0, sizeof(*hdr));
849 
850 	SET_TX_WRB_HDR_BITS(crc, hdr,
851 			    BE_WRB_F_GET(wrb_params->features, CRC));
852 	SET_TX_WRB_HDR_BITS(ipcs, hdr,
853 			    BE_WRB_F_GET(wrb_params->features, IPCS));
854 	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
855 			    BE_WRB_F_GET(wrb_params->features, TCPCS));
856 	SET_TX_WRB_HDR_BITS(udpcs, hdr,
857 			    BE_WRB_F_GET(wrb_params->features, UDPCS));
858 
859 	SET_TX_WRB_HDR_BITS(lso, hdr,
860 			    BE_WRB_F_GET(wrb_params->features, LSO));
861 	SET_TX_WRB_HDR_BITS(lso6, hdr,
862 			    BE_WRB_F_GET(wrb_params->features, LSO6));
863 	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
864 
865 	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
866 	 * hack is not needed, the evt bit is set while ringing DB.
867 	 */
868 	SET_TX_WRB_HDR_BITS(event, hdr,
869 			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
870 	SET_TX_WRB_HDR_BITS(vlan, hdr,
871 			    BE_WRB_F_GET(wrb_params->features, VLAN));
872 	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
873 
874 	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
875 	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
876 	SET_TX_WRB_HDR_BITS(mgmt, hdr,
877 			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
878 }
879 
880 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
881 			  bool unmap_single)
882 {
883 	dma_addr_t dma;
884 	u32 frag_len = le32_to_cpu(wrb->frag_len);
885 
886 
887 	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
888 		(u64)le32_to_cpu(wrb->frag_pa_lo);
889 	if (frag_len) {
890 		if (unmap_single)
891 			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
892 		else
893 			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
894 	}
895 }
896 
897 /* Grab a WRB header for xmit */
898 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
899 {
900 	u32 head = txo->q.head;
901 
902 	queue_head_inc(&txo->q);
903 	return head;
904 }
905 
906 /* Set up the WRB header for xmit */
907 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
908 				struct be_tx_obj *txo,
909 				struct be_wrb_params *wrb_params,
910 				struct sk_buff *skb, u16 head)
911 {
912 	u32 num_frags = skb_wrb_cnt(skb);
913 	struct be_queue_info *txq = &txo->q;
914 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
915 
916 	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
917 	be_dws_cpu_to_le(hdr, sizeof(*hdr));
918 
919 	BUG_ON(txo->sent_skb_list[head]);
920 	txo->sent_skb_list[head] = skb;
921 	txo->last_req_hdr = head;
922 	atomic_add(num_frags, &txq->used);
923 	txo->last_req_wrb_cnt = num_frags;
924 	txo->pend_wrb_cnt += num_frags;
925 }
926 
927 /* Setup a WRB fragment (buffer descriptor) for xmit */
928 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
929 				 int len)
930 {
931 	struct be_eth_wrb *wrb;
932 	struct be_queue_info *txq = &txo->q;
933 
934 	wrb = queue_head_node(txq);
935 	wrb_fill(wrb, busaddr, len);
936 	queue_head_inc(txq);
937 }
938 
939 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
940  * was invoked. The producer index is restored to the previous packet and the
941  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
942  */
943 static void be_xmit_restore(struct be_adapter *adapter,
944 			    struct be_tx_obj *txo, u32 head, bool map_single,
945 			    u32 copied)
946 {
947 	struct device *dev;
948 	struct be_eth_wrb *wrb;
949 	struct be_queue_info *txq = &txo->q;
950 
951 	dev = &adapter->pdev->dev;
952 	txq->head = head;
953 
954 	/* skip the first wrb (hdr); it's not mapped */
955 	queue_head_inc(txq);
956 	while (copied) {
957 		wrb = queue_head_node(txq);
958 		unmap_tx_frag(dev, wrb, map_single);
959 		map_single = false;
960 		copied -= le32_to_cpu(wrb->frag_len);
961 		queue_head_inc(txq);
962 	}
963 
964 	txq->head = head;
965 }
966 
967 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
968  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
969  * of WRBs used up by the packet.
970  */
971 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
972 			   struct sk_buff *skb,
973 			   struct be_wrb_params *wrb_params)
974 {
975 	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
976 	struct device *dev = &adapter->pdev->dev;
977 	struct be_queue_info *txq = &txo->q;
978 	bool map_single = false;
979 	u32 head = txq->head;
980 	dma_addr_t busaddr;
981 	int len;
982 
983 	head = be_tx_get_wrb_hdr(txo);
984 
985 	if (skb->len > skb->data_len) {
986 		len = skb_headlen(skb);
987 
988 		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
989 		if (dma_mapping_error(dev, busaddr))
990 			goto dma_err;
991 		map_single = true;
992 		be_tx_setup_wrb_frag(txo, busaddr, len);
993 		copied += len;
994 	}
995 
996 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
997 		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
998 		len = skb_frag_size(frag);
999 
1000 		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1001 		if (dma_mapping_error(dev, busaddr))
1002 			goto dma_err;
1003 		be_tx_setup_wrb_frag(txo, busaddr, len);
1004 		copied += len;
1005 	}
1006 
1007 	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1008 
1009 	be_tx_stats_update(txo, skb);
1010 	return wrb_cnt;
1011 
1012 dma_err:
1013 	adapter->drv_stats.dma_map_errors++;
1014 	be_xmit_restore(adapter, txo, head, map_single, copied);
1015 	return 0;
1016 }
1017 
1018 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1019 {
1020 	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1021 }
1022 
1023 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1024 					     struct sk_buff *skb,
1025 					     struct be_wrb_params
1026 					     *wrb_params)
1027 {
1028 	u16 vlan_tag = 0;
1029 
1030 	skb = skb_share_check(skb, GFP_ATOMIC);
1031 	if (unlikely(!skb))
1032 		return skb;
1033 
1034 	if (skb_vlan_tag_present(skb))
1035 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1036 
1037 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1038 		if (!vlan_tag)
1039 			vlan_tag = adapter->pvid;
1040 		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1041 		 * skip VLAN insertion
1042 		 */
1043 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1044 	}
1045 
1046 	if (vlan_tag) {
1047 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1048 						vlan_tag);
1049 		if (unlikely(!skb))
1050 			return skb;
1051 		skb->vlan_tci = 0;
1052 	}
1053 
1054 	/* Insert the outer VLAN, if any */
1055 	if (adapter->qnq_vid) {
1056 		vlan_tag = adapter->qnq_vid;
1057 		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058 						vlan_tag);
1059 		if (unlikely(!skb))
1060 			return skb;
1061 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062 	}
1063 
1064 	return skb;
1065 }
1066 
1067 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1068 {
1069 	struct ethhdr *eh = (struct ethhdr *)skb->data;
1070 	u16 offset = ETH_HLEN;
1071 
1072 	if (eh->h_proto == htons(ETH_P_IPV6)) {
1073 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1074 
1075 		offset += sizeof(struct ipv6hdr);
1076 		if (ip6h->nexthdr != NEXTHDR_TCP &&
1077 		    ip6h->nexthdr != NEXTHDR_UDP) {
1078 			struct ipv6_opt_hdr *ehdr =
1079 				(struct ipv6_opt_hdr *)(skb->data + offset);
1080 
1081 			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1082 			if (ehdr->hdrlen == 0xff)
1083 				return true;
1084 		}
1085 	}
1086 	return false;
1087 }
1088 
1089 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1090 {
1091 	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1092 }
1093 
1094 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1095 {
1096 	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1097 }
1098 
1099 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1100 						  struct sk_buff *skb,
1101 						  struct be_wrb_params
1102 						  *wrb_params)
1103 {
1104 	struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1105 	unsigned int eth_hdr_len;
1106 	struct iphdr *ip;
1107 
1108 	/* For padded packets, BE HW modifies tot_len field in IP header
1109 	 * incorrecly when VLAN tag is inserted by HW.
1110 	 * For padded packets, Lancer computes incorrect checksum.
1111 	 */
1112 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1113 						VLAN_ETH_HLEN : ETH_HLEN;
1114 	if (skb->len <= 60 &&
1115 	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1116 	    is_ipv4_pkt(skb)) {
1117 		ip = (struct iphdr *)ip_hdr(skb);
1118 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1119 	}
1120 
1121 	/* If vlan tag is already inlined in the packet, skip HW VLAN
1122 	 * tagging in pvid-tagging mode
1123 	 */
1124 	if (be_pvid_tagging_enabled(adapter) &&
1125 	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1126 		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1127 
1128 	/* HW has a bug wherein it will calculate CSUM for VLAN
1129 	 * pkts even though it is disabled.
1130 	 * Manually insert VLAN in pkt.
1131 	 */
1132 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1133 	    skb_vlan_tag_present(skb)) {
1134 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1135 		if (unlikely(!skb))
1136 			goto err;
1137 	}
1138 
1139 	/* HW may lockup when VLAN HW tagging is requested on
1140 	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1141 	 * skip HW tagging is not enabled by FW.
1142 	 */
1143 	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1144 		     (adapter->pvid || adapter->qnq_vid) &&
1145 		     !qnq_async_evt_rcvd(adapter)))
1146 		goto tx_drop;
1147 
1148 	/* Manual VLAN tag insertion to prevent:
1149 	 * ASIC lockup when the ASIC inserts VLAN tag into
1150 	 * certain ipv6 packets. Insert VLAN tags in driver,
1151 	 * and set event, completion, vlan bits accordingly
1152 	 * in the Tx WRB.
1153 	 */
1154 	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1155 	    be_vlan_tag_tx_chk(adapter, skb)) {
1156 		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1157 		if (unlikely(!skb))
1158 			goto err;
1159 	}
1160 
1161 	return skb;
1162 tx_drop:
1163 	dev_kfree_skb_any(skb);
1164 err:
1165 	return NULL;
1166 }
1167 
1168 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1169 					   struct sk_buff *skb,
1170 					   struct be_wrb_params *wrb_params)
1171 {
1172 	int err;
1173 
1174 	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1175 	 * packets that are 32b or less may cause a transmit stall
1176 	 * on that port. The workaround is to pad such packets
1177 	 * (len <= 32 bytes) to a minimum length of 36b.
1178 	 */
1179 	if (skb->len <= 32) {
1180 		if (skb_put_padto(skb, 36))
1181 			return NULL;
1182 	}
1183 
1184 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1185 		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1186 		if (!skb)
1187 			return NULL;
1188 	}
1189 
1190 	/* The stack can send us skbs with length greater than
1191 	 * what the HW can handle. Trim the extra bytes.
1192 	 */
1193 	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1194 	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1195 	WARN_ON(err);
1196 
1197 	return skb;
1198 }
1199 
1200 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1201 {
1202 	struct be_queue_info *txq = &txo->q;
1203 	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1204 
1205 	/* Mark the last request eventable if it hasn't been marked already */
1206 	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1207 		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1208 
1209 	/* compose a dummy wrb if there are odd set of wrbs to notify */
1210 	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1211 		wrb_fill_dummy(queue_head_node(txq));
1212 		queue_head_inc(txq);
1213 		atomic_inc(&txq->used);
1214 		txo->pend_wrb_cnt++;
1215 		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1216 					   TX_HDR_WRB_NUM_SHIFT);
1217 		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1218 					  TX_HDR_WRB_NUM_SHIFT);
1219 	}
1220 	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1221 	txo->pend_wrb_cnt = 0;
1222 }
1223 
1224 /* OS2BMC related */
1225 
1226 #define DHCP_CLIENT_PORT	68
1227 #define DHCP_SERVER_PORT	67
1228 #define NET_BIOS_PORT1		137
1229 #define NET_BIOS_PORT2		138
1230 #define DHCPV6_RAS_PORT		547
1231 
1232 #define is_mc_allowed_on_bmc(adapter, eh)	\
1233 	(!is_multicast_filt_enabled(adapter) &&	\
1234 	 is_multicast_ether_addr(eh->h_dest) &&	\
1235 	 !is_broadcast_ether_addr(eh->h_dest))
1236 
1237 #define is_bc_allowed_on_bmc(adapter, eh)	\
1238 	(!is_broadcast_filt_enabled(adapter) &&	\
1239 	 is_broadcast_ether_addr(eh->h_dest))
1240 
1241 #define is_arp_allowed_on_bmc(adapter, skb)	\
1242 	(is_arp(skb) && is_arp_filt_enabled(adapter))
1243 
1244 #define is_broadcast_packet(eh, adapter)	\
1245 		(is_multicast_ether_addr(eh->h_dest) && \
1246 		!compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1247 
1248 #define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1249 
1250 #define is_arp_filt_enabled(adapter)	\
1251 		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1252 
1253 #define is_dhcp_client_filt_enabled(adapter)	\
1254 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1255 
1256 #define is_dhcp_srvr_filt_enabled(adapter)	\
1257 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1258 
1259 #define is_nbios_filt_enabled(adapter)	\
1260 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1261 
1262 #define is_ipv6_na_filt_enabled(adapter)	\
1263 		(adapter->bmc_filt_mask &	\
1264 			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1265 
1266 #define is_ipv6_ra_filt_enabled(adapter)	\
1267 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1268 
1269 #define is_ipv6_ras_filt_enabled(adapter)	\
1270 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1271 
1272 #define is_broadcast_filt_enabled(adapter)	\
1273 		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1274 
1275 #define is_multicast_filt_enabled(adapter)	\
1276 		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1277 
1278 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1279 			       struct sk_buff **skb)
1280 {
1281 	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1282 	bool os2bmc = false;
1283 
1284 	if (!be_is_os2bmc_enabled(adapter))
1285 		goto done;
1286 
1287 	if (!is_multicast_ether_addr(eh->h_dest))
1288 		goto done;
1289 
1290 	if (is_mc_allowed_on_bmc(adapter, eh) ||
1291 	    is_bc_allowed_on_bmc(adapter, eh) ||
1292 	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1293 		os2bmc = true;
1294 		goto done;
1295 	}
1296 
1297 	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1298 		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1299 		u8 nexthdr = hdr->nexthdr;
1300 
1301 		if (nexthdr == IPPROTO_ICMPV6) {
1302 			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1303 
1304 			switch (icmp6->icmp6_type) {
1305 			case NDISC_ROUTER_ADVERTISEMENT:
1306 				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1307 				goto done;
1308 			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1309 				os2bmc = is_ipv6_na_filt_enabled(adapter);
1310 				goto done;
1311 			default:
1312 				break;
1313 			}
1314 		}
1315 	}
1316 
1317 	if (is_udp_pkt((*skb))) {
1318 		struct udphdr *udp = udp_hdr((*skb));
1319 
1320 		switch (ntohs(udp->dest)) {
1321 		case DHCP_CLIENT_PORT:
1322 			os2bmc = is_dhcp_client_filt_enabled(adapter);
1323 			goto done;
1324 		case DHCP_SERVER_PORT:
1325 			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1326 			goto done;
1327 		case NET_BIOS_PORT1:
1328 		case NET_BIOS_PORT2:
1329 			os2bmc = is_nbios_filt_enabled(adapter);
1330 			goto done;
1331 		case DHCPV6_RAS_PORT:
1332 			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1333 			goto done;
1334 		default:
1335 			break;
1336 		}
1337 	}
1338 done:
1339 	/* For packets over a vlan, which are destined
1340 	 * to BMC, asic expects the vlan to be inline in the packet.
1341 	 */
1342 	if (os2bmc)
1343 		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1344 
1345 	return os2bmc;
1346 }
1347 
1348 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1349 {
1350 	struct be_adapter *adapter = netdev_priv(netdev);
1351 	u16 q_idx = skb_get_queue_mapping(skb);
1352 	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1353 	struct be_wrb_params wrb_params = { 0 };
1354 	bool flush = !skb->xmit_more;
1355 	u16 wrb_cnt;
1356 
1357 	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1358 	if (unlikely(!skb))
1359 		goto drop;
1360 
1361 	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1362 
1363 	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1364 	if (unlikely(!wrb_cnt)) {
1365 		dev_kfree_skb_any(skb);
1366 		goto drop;
1367 	}
1368 
1369 	/* if os2bmc is enabled and if the pkt is destined to bmc,
1370 	 * enqueue the pkt a 2nd time with mgmt bit set.
1371 	 */
1372 	if (be_send_pkt_to_bmc(adapter, &skb)) {
1373 		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1374 		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1375 		if (unlikely(!wrb_cnt))
1376 			goto drop;
1377 		else
1378 			skb_get(skb);
1379 	}
1380 
1381 	if (be_is_txq_full(txo)) {
1382 		netif_stop_subqueue(netdev, q_idx);
1383 		tx_stats(txo)->tx_stops++;
1384 	}
1385 
1386 	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1387 		be_xmit_flush(adapter, txo);
1388 
1389 	return NETDEV_TX_OK;
1390 drop:
1391 	tx_stats(txo)->tx_drv_drops++;
1392 	/* Flush the already enqueued tx requests */
1393 	if (flush && txo->pend_wrb_cnt)
1394 		be_xmit_flush(adapter, txo);
1395 
1396 	return NETDEV_TX_OK;
1397 }
1398 
1399 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1400 {
1401 	struct be_adapter *adapter = netdev_priv(netdev);
1402 	struct device *dev = &adapter->pdev->dev;
1403 
1404 	if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1405 		dev_info(dev, "MTU must be between %d and %d bytes\n",
1406 			 BE_MIN_MTU, BE_MAX_MTU);
1407 		return -EINVAL;
1408 	}
1409 
1410 	dev_info(dev, "MTU changed from %d to %d bytes\n",
1411 		 netdev->mtu, new_mtu);
1412 	netdev->mtu = new_mtu;
1413 	return 0;
1414 }
1415 
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419 			BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421 
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424 	struct device *dev = &adapter->pdev->dev;
1425 	int status;
1426 
1427 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428 		return 0;
1429 
1430 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431 	if (!status) {
1432 		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433 		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434 	} else {
1435 		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436 	}
1437 	return status;
1438 }
1439 
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442 	struct device *dev = &adapter->pdev->dev;
1443 	int status;
1444 
1445 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446 	if (!status) {
1447 		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448 		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449 	}
1450 	return status;
1451 }
1452 
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459 	struct device *dev = &adapter->pdev->dev;
1460 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1461 	u16 num = 0, i = 0;
1462 	int status = 0;
1463 
1464 	/* No need to change the VLAN state if the I/F is in promiscuous */
1465 	if (adapter->netdev->flags & IFF_PROMISC)
1466 		return 0;
1467 
1468 	if (adapter->vlans_added > be_max_vlans(adapter))
1469 		return be_set_vlan_promisc(adapter);
1470 
1471 	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472 		status = be_clear_vlan_promisc(adapter);
1473 		if (status)
1474 			return status;
1475 	}
1476 	/* Construct VLAN Table to give to HW */
1477 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478 		vids[num++] = cpu_to_le16(i);
1479 
1480 	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481 	if (status) {
1482 		dev_err(dev, "Setting HW VLAN filtering failed\n");
1483 		/* Set to VLAN promisc mode as setting VLAN filter failed */
1484 		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485 		    addl_status(status) ==
1486 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487 			return be_set_vlan_promisc(adapter);
1488 	}
1489 	return status;
1490 }
1491 
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494 	struct be_adapter *adapter = netdev_priv(netdev);
1495 	int status = 0;
1496 
1497 	mutex_lock(&adapter->rx_filter_lock);
1498 
1499 	/* Packets with VID 0 are always received by Lancer by default */
1500 	if (lancer_chip(adapter) && vid == 0)
1501 		goto done;
1502 
1503 	if (test_bit(vid, adapter->vids))
1504 		goto done;
1505 
1506 	set_bit(vid, adapter->vids);
1507 	adapter->vlans_added++;
1508 
1509 	status = be_vid_config(adapter);
1510 done:
1511 	mutex_unlock(&adapter->rx_filter_lock);
1512 	return status;
1513 }
1514 
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517 	struct be_adapter *adapter = netdev_priv(netdev);
1518 	int status = 0;
1519 
1520 	mutex_lock(&adapter->rx_filter_lock);
1521 
1522 	/* Packets with VID 0 are always received by Lancer by default */
1523 	if (lancer_chip(adapter) && vid == 0)
1524 		goto done;
1525 
1526 	if (!test_bit(vid, adapter->vids))
1527 		goto done;
1528 
1529 	clear_bit(vid, adapter->vids);
1530 	adapter->vlans_added--;
1531 
1532 	status = be_vid_config(adapter);
1533 done:
1534 	mutex_unlock(&adapter->rx_filter_lock);
1535 	return status;
1536 }
1537 
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541 	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543 
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546 	int status;
1547 
1548 	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549 		return;
1550 
1551 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552 	if (!status)
1553 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555 
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558 	int status;
1559 
1560 	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561 		return;
1562 
1563 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564 	if (!status)
1565 		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567 
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570 	int status;
1571 
1572 	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573 		return;
1574 
1575 	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576 	if (!status)
1577 		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579 
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586 			     const unsigned char *addr)
1587 {
1588 	struct be_adapter *adapter = netdev_priv(netdev);
1589 
1590 	adapter->update_uc_list = true;
1591 	return 0;
1592 }
1593 
1594 static int be_mc_list_update(struct net_device *netdev,
1595 			     const unsigned char *addr)
1596 {
1597 	struct be_adapter *adapter = netdev_priv(netdev);
1598 
1599 	adapter->update_mc_list = true;
1600 	return 0;
1601 }
1602 
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605 	struct net_device *netdev = adapter->netdev;
1606 	struct netdev_hw_addr *ha;
1607 	bool mc_promisc = false;
1608 	int status;
1609 
1610 	netif_addr_lock_bh(netdev);
1611 	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612 
1613 	if (netdev->flags & IFF_PROMISC) {
1614 		adapter->update_mc_list = false;
1615 	} else if (netdev->flags & IFF_ALLMULTI ||
1616 		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617 		/* Enable multicast promisc if num configured exceeds
1618 		 * what we support
1619 		 */
1620 		mc_promisc = true;
1621 		adapter->update_mc_list = false;
1622 	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623 		/* Update mc-list unconditionally if the iface was previously
1624 		 * in mc-promisc mode and now is out of that mode.
1625 		 */
1626 		adapter->update_mc_list = true;
1627 	}
1628 
1629 	if (adapter->update_mc_list) {
1630 		int i = 0;
1631 
1632 		/* cache the mc-list in adapter */
1633 		netdev_for_each_mc_addr(ha, netdev) {
1634 			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635 			i++;
1636 		}
1637 		adapter->mc_count = netdev_mc_count(netdev);
1638 	}
1639 	netif_addr_unlock_bh(netdev);
1640 
1641 	if (mc_promisc) {
1642 		be_set_mc_promisc(adapter);
1643 	} else if (adapter->update_mc_list) {
1644 		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645 		if (!status)
1646 			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647 		else
1648 			be_set_mc_promisc(adapter);
1649 
1650 		adapter->update_mc_list = false;
1651 	}
1652 }
1653 
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656 	struct net_device *netdev = adapter->netdev;
1657 
1658 	__dev_mc_unsync(netdev, NULL);
1659 	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660 	adapter->mc_count = 0;
1661 }
1662 
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665 	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666 			     adapter->dev_mac)) {
1667 		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668 		return 0;
1669 	}
1670 
1671 	return be_cmd_pmac_add(adapter,
1672 			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1673 			       adapter->if_handle,
1674 			       &adapter->pmac_id[uc_idx + 1], 0);
1675 }
1676 
1677 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1678 {
1679 	if (pmac_id == adapter->pmac_id[0])
1680 		return;
1681 
1682 	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1683 }
1684 
1685 static void be_set_uc_list(struct be_adapter *adapter)
1686 {
1687 	struct net_device *netdev = adapter->netdev;
1688 	struct netdev_hw_addr *ha;
1689 	bool uc_promisc = false;
1690 	int curr_uc_macs = 0, i;
1691 
1692 	netif_addr_lock_bh(netdev);
1693 	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1694 
1695 	if (netdev->flags & IFF_PROMISC) {
1696 		adapter->update_uc_list = false;
1697 	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1698 		uc_promisc = true;
1699 		adapter->update_uc_list = false;
1700 	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1701 		/* Update uc-list unconditionally if the iface was previously
1702 		 * in uc-promisc mode and now is out of that mode.
1703 		 */
1704 		adapter->update_uc_list = true;
1705 	}
1706 
1707 	if (adapter->update_uc_list) {
1708 		i = 1; /* First slot is claimed by the Primary MAC */
1709 
1710 		/* cache the uc-list in adapter array */
1711 		netdev_for_each_uc_addr(ha, netdev) {
1712 			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1713 			i++;
1714 		}
1715 		curr_uc_macs = netdev_uc_count(netdev);
1716 	}
1717 	netif_addr_unlock_bh(netdev);
1718 
1719 	if (uc_promisc) {
1720 		be_set_uc_promisc(adapter);
1721 	} else if (adapter->update_uc_list) {
1722 		be_clear_uc_promisc(adapter);
1723 
1724 		for (i = 0; i < adapter->uc_macs; i++)
1725 			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1726 
1727 		for (i = 0; i < curr_uc_macs; i++)
1728 			be_uc_mac_add(adapter, i);
1729 		adapter->uc_macs = curr_uc_macs;
1730 		adapter->update_uc_list = false;
1731 	}
1732 }
1733 
1734 static void be_clear_uc_list(struct be_adapter *adapter)
1735 {
1736 	struct net_device *netdev = adapter->netdev;
1737 	int i;
1738 
1739 	__dev_uc_unsync(netdev, NULL);
1740 	for (i = 0; i < adapter->uc_macs; i++)
1741 		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1742 
1743 	adapter->uc_macs = 0;
1744 }
1745 
1746 static void __be_set_rx_mode(struct be_adapter *adapter)
1747 {
1748 	struct net_device *netdev = adapter->netdev;
1749 
1750 	mutex_lock(&adapter->rx_filter_lock);
1751 
1752 	if (netdev->flags & IFF_PROMISC) {
1753 		if (!be_in_all_promisc(adapter))
1754 			be_set_all_promisc(adapter);
1755 	} else if (be_in_all_promisc(adapter)) {
1756 		/* We need to re-program the vlan-list or clear
1757 		 * vlan-promisc mode (if needed) when the interface
1758 		 * comes out of promisc mode.
1759 		 */
1760 		be_vid_config(adapter);
1761 	}
1762 
1763 	be_set_uc_list(adapter);
1764 	be_set_mc_list(adapter);
1765 
1766 	mutex_unlock(&adapter->rx_filter_lock);
1767 }
1768 
1769 static void be_work_set_rx_mode(struct work_struct *work)
1770 {
1771 	struct be_cmd_work *cmd_work =
1772 				container_of(work, struct be_cmd_work, work);
1773 
1774 	__be_set_rx_mode(cmd_work->adapter);
1775 	kfree(cmd_work);
1776 }
1777 
1778 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1779 {
1780 	struct be_adapter *adapter = netdev_priv(netdev);
1781 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1782 	int status;
1783 
1784 	if (!sriov_enabled(adapter))
1785 		return -EPERM;
1786 
1787 	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1788 		return -EINVAL;
1789 
1790 	/* Proceed further only if user provided MAC is different
1791 	 * from active MAC
1792 	 */
1793 	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1794 		return 0;
1795 
1796 	if (BEx_chip(adapter)) {
1797 		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1798 				vf + 1);
1799 
1800 		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1801 					 &vf_cfg->pmac_id, vf + 1);
1802 	} else {
1803 		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1804 					vf + 1);
1805 	}
1806 
1807 	if (status) {
1808 		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1809 			mac, vf, status);
1810 		return be_cmd_status(status);
1811 	}
1812 
1813 	ether_addr_copy(vf_cfg->mac_addr, mac);
1814 
1815 	return 0;
1816 }
1817 
1818 static int be_get_vf_config(struct net_device *netdev, int vf,
1819 			    struct ifla_vf_info *vi)
1820 {
1821 	struct be_adapter *adapter = netdev_priv(netdev);
1822 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1823 
1824 	if (!sriov_enabled(adapter))
1825 		return -EPERM;
1826 
1827 	if (vf >= adapter->num_vfs)
1828 		return -EINVAL;
1829 
1830 	vi->vf = vf;
1831 	vi->max_tx_rate = vf_cfg->tx_rate;
1832 	vi->min_tx_rate = 0;
1833 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1834 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1835 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1836 	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1837 	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1838 
1839 	return 0;
1840 }
1841 
1842 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1843 {
1844 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1845 	u16 vids[BE_NUM_VLANS_SUPPORTED];
1846 	int vf_if_id = vf_cfg->if_handle;
1847 	int status;
1848 
1849 	/* Enable Transparent VLAN Tagging */
1850 	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1851 	if (status)
1852 		return status;
1853 
1854 	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1855 	vids[0] = 0;
1856 	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1857 	if (!status)
1858 		dev_info(&adapter->pdev->dev,
1859 			 "Cleared guest VLANs on VF%d", vf);
1860 
1861 	/* After TVT is enabled, disallow VFs to program VLAN filters */
1862 	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1863 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1864 						  ~BE_PRIV_FILTMGMT, vf + 1);
1865 		if (!status)
1866 			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1867 	}
1868 	return 0;
1869 }
1870 
1871 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1872 {
1873 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1874 	struct device *dev = &adapter->pdev->dev;
1875 	int status;
1876 
1877 	/* Reset Transparent VLAN Tagging. */
1878 	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1879 				       vf_cfg->if_handle, 0, 0);
1880 	if (status)
1881 		return status;
1882 
1883 	/* Allow VFs to program VLAN filtering */
1884 	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1885 		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1886 						  BE_PRIV_FILTMGMT, vf + 1);
1887 		if (!status) {
1888 			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1889 			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1890 		}
1891 	}
1892 
1893 	dev_info(dev,
1894 		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1895 	return 0;
1896 }
1897 
1898 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1899 			  __be16 vlan_proto)
1900 {
1901 	struct be_adapter *adapter = netdev_priv(netdev);
1902 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1903 	int status;
1904 
1905 	if (!sriov_enabled(adapter))
1906 		return -EPERM;
1907 
1908 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1909 		return -EINVAL;
1910 
1911 	if (vlan_proto != htons(ETH_P_8021Q))
1912 		return -EPROTONOSUPPORT;
1913 
1914 	if (vlan || qos) {
1915 		vlan |= qos << VLAN_PRIO_SHIFT;
1916 		status = be_set_vf_tvt(adapter, vf, vlan);
1917 	} else {
1918 		status = be_clear_vf_tvt(adapter, vf);
1919 	}
1920 
1921 	if (status) {
1922 		dev_err(&adapter->pdev->dev,
1923 			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1924 			status);
1925 		return be_cmd_status(status);
1926 	}
1927 
1928 	vf_cfg->vlan_tag = vlan;
1929 	return 0;
1930 }
1931 
1932 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1933 			     int min_tx_rate, int max_tx_rate)
1934 {
1935 	struct be_adapter *adapter = netdev_priv(netdev);
1936 	struct device *dev = &adapter->pdev->dev;
1937 	int percent_rate, status = 0;
1938 	u16 link_speed = 0;
1939 	u8 link_status;
1940 
1941 	if (!sriov_enabled(adapter))
1942 		return -EPERM;
1943 
1944 	if (vf >= adapter->num_vfs)
1945 		return -EINVAL;
1946 
1947 	if (min_tx_rate)
1948 		return -EINVAL;
1949 
1950 	if (!max_tx_rate)
1951 		goto config_qos;
1952 
1953 	status = be_cmd_link_status_query(adapter, &link_speed,
1954 					  &link_status, 0);
1955 	if (status)
1956 		goto err;
1957 
1958 	if (!link_status) {
1959 		dev_err(dev, "TX-rate setting not allowed when link is down\n");
1960 		status = -ENETDOWN;
1961 		goto err;
1962 	}
1963 
1964 	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1965 		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1966 			link_speed);
1967 		status = -EINVAL;
1968 		goto err;
1969 	}
1970 
1971 	/* On Skyhawk the QOS setting must be done only as a % value */
1972 	percent_rate = link_speed / 100;
1973 	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1974 		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1975 			percent_rate);
1976 		status = -EINVAL;
1977 		goto err;
1978 	}
1979 
1980 config_qos:
1981 	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1982 	if (status)
1983 		goto err;
1984 
1985 	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1986 	return 0;
1987 
1988 err:
1989 	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1990 		max_tx_rate, vf);
1991 	return be_cmd_status(status);
1992 }
1993 
1994 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1995 				int link_state)
1996 {
1997 	struct be_adapter *adapter = netdev_priv(netdev);
1998 	int status;
1999 
2000 	if (!sriov_enabled(adapter))
2001 		return -EPERM;
2002 
2003 	if (vf >= adapter->num_vfs)
2004 		return -EINVAL;
2005 
2006 	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2007 	if (status) {
2008 		dev_err(&adapter->pdev->dev,
2009 			"Link state change on VF %d failed: %#x\n", vf, status);
2010 		return be_cmd_status(status);
2011 	}
2012 
2013 	adapter->vf_cfg[vf].plink_tracking = link_state;
2014 
2015 	return 0;
2016 }
2017 
2018 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2019 {
2020 	struct be_adapter *adapter = netdev_priv(netdev);
2021 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2022 	u8 spoofchk;
2023 	int status;
2024 
2025 	if (!sriov_enabled(adapter))
2026 		return -EPERM;
2027 
2028 	if (vf >= adapter->num_vfs)
2029 		return -EINVAL;
2030 
2031 	if (BEx_chip(adapter))
2032 		return -EOPNOTSUPP;
2033 
2034 	if (enable == vf_cfg->spoofchk)
2035 		return 0;
2036 
2037 	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2038 
2039 	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2040 				       0, spoofchk);
2041 	if (status) {
2042 		dev_err(&adapter->pdev->dev,
2043 			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2044 		return be_cmd_status(status);
2045 	}
2046 
2047 	vf_cfg->spoofchk = enable;
2048 	return 0;
2049 }
2050 
2051 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2052 			  ulong now)
2053 {
2054 	aic->rx_pkts_prev = rx_pkts;
2055 	aic->tx_reqs_prev = tx_pkts;
2056 	aic->jiffies = now;
2057 }
2058 
2059 static int be_get_new_eqd(struct be_eq_obj *eqo)
2060 {
2061 	struct be_adapter *adapter = eqo->adapter;
2062 	int eqd, start;
2063 	struct be_aic_obj *aic;
2064 	struct be_rx_obj *rxo;
2065 	struct be_tx_obj *txo;
2066 	u64 rx_pkts = 0, tx_pkts = 0;
2067 	ulong now;
2068 	u32 pps, delta;
2069 	int i;
2070 
2071 	aic = &adapter->aic_obj[eqo->idx];
2072 	if (!aic->enable) {
2073 		if (aic->jiffies)
2074 			aic->jiffies = 0;
2075 		eqd = aic->et_eqd;
2076 		return eqd;
2077 	}
2078 
2079 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2080 		do {
2081 			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2082 			rx_pkts += rxo->stats.rx_pkts;
2083 		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2084 	}
2085 
2086 	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2087 		do {
2088 			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2089 			tx_pkts += txo->stats.tx_reqs;
2090 		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2091 	}
2092 
2093 	/* Skip, if wrapped around or first calculation */
2094 	now = jiffies;
2095 	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2096 	    rx_pkts < aic->rx_pkts_prev ||
2097 	    tx_pkts < aic->tx_reqs_prev) {
2098 		be_aic_update(aic, rx_pkts, tx_pkts, now);
2099 		return aic->prev_eqd;
2100 	}
2101 
2102 	delta = jiffies_to_msecs(now - aic->jiffies);
2103 	if (delta == 0)
2104 		return aic->prev_eqd;
2105 
2106 	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2107 		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2108 	eqd = (pps / 15000) << 2;
2109 
2110 	if (eqd < 8)
2111 		eqd = 0;
2112 	eqd = min_t(u32, eqd, aic->max_eqd);
2113 	eqd = max_t(u32, eqd, aic->min_eqd);
2114 
2115 	be_aic_update(aic, rx_pkts, tx_pkts, now);
2116 
2117 	return eqd;
2118 }
2119 
2120 /* For Skyhawk-R only */
2121 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2122 {
2123 	struct be_adapter *adapter = eqo->adapter;
2124 	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2125 	ulong now = jiffies;
2126 	int eqd;
2127 	u32 mult_enc;
2128 
2129 	if (!aic->enable)
2130 		return 0;
2131 
2132 	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2133 		eqd = aic->prev_eqd;
2134 	else
2135 		eqd = be_get_new_eqd(eqo);
2136 
2137 	if (eqd > 100)
2138 		mult_enc = R2I_DLY_ENC_1;
2139 	else if (eqd > 60)
2140 		mult_enc = R2I_DLY_ENC_2;
2141 	else if (eqd > 20)
2142 		mult_enc = R2I_DLY_ENC_3;
2143 	else
2144 		mult_enc = R2I_DLY_ENC_0;
2145 
2146 	aic->prev_eqd = eqd;
2147 
2148 	return mult_enc;
2149 }
2150 
2151 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2152 {
2153 	struct be_set_eqd set_eqd[MAX_EVT_QS];
2154 	struct be_aic_obj *aic;
2155 	struct be_eq_obj *eqo;
2156 	int i, num = 0, eqd;
2157 
2158 	for_all_evt_queues(adapter, eqo, i) {
2159 		aic = &adapter->aic_obj[eqo->idx];
2160 		eqd = be_get_new_eqd(eqo);
2161 		if (force_update || eqd != aic->prev_eqd) {
2162 			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2163 			set_eqd[num].eq_id = eqo->q.id;
2164 			aic->prev_eqd = eqd;
2165 			num++;
2166 		}
2167 	}
2168 
2169 	if (num)
2170 		be_cmd_modify_eqd(adapter, set_eqd, num);
2171 }
2172 
2173 static void be_rx_stats_update(struct be_rx_obj *rxo,
2174 			       struct be_rx_compl_info *rxcp)
2175 {
2176 	struct be_rx_stats *stats = rx_stats(rxo);
2177 
2178 	u64_stats_update_begin(&stats->sync);
2179 	stats->rx_compl++;
2180 	stats->rx_bytes += rxcp->pkt_size;
2181 	stats->rx_pkts++;
2182 	if (rxcp->tunneled)
2183 		stats->rx_vxlan_offload_pkts++;
2184 	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2185 		stats->rx_mcast_pkts++;
2186 	if (rxcp->err)
2187 		stats->rx_compl_err++;
2188 	u64_stats_update_end(&stats->sync);
2189 }
2190 
2191 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2192 {
2193 	/* L4 checksum is not reliable for non TCP/UDP packets.
2194 	 * Also ignore ipcksm for ipv6 pkts
2195 	 */
2196 	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2197 		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2198 }
2199 
2200 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2201 {
2202 	struct be_adapter *adapter = rxo->adapter;
2203 	struct be_rx_page_info *rx_page_info;
2204 	struct be_queue_info *rxq = &rxo->q;
2205 	u32 frag_idx = rxq->tail;
2206 
2207 	rx_page_info = &rxo->page_info_tbl[frag_idx];
2208 	BUG_ON(!rx_page_info->page);
2209 
2210 	if (rx_page_info->last_frag) {
2211 		dma_unmap_page(&adapter->pdev->dev,
2212 			       dma_unmap_addr(rx_page_info, bus),
2213 			       adapter->big_page_size, DMA_FROM_DEVICE);
2214 		rx_page_info->last_frag = false;
2215 	} else {
2216 		dma_sync_single_for_cpu(&adapter->pdev->dev,
2217 					dma_unmap_addr(rx_page_info, bus),
2218 					rx_frag_size, DMA_FROM_DEVICE);
2219 	}
2220 
2221 	queue_tail_inc(rxq);
2222 	atomic_dec(&rxq->used);
2223 	return rx_page_info;
2224 }
2225 
2226 /* Throwaway the data in the Rx completion */
2227 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2228 				struct be_rx_compl_info *rxcp)
2229 {
2230 	struct be_rx_page_info *page_info;
2231 	u16 i, num_rcvd = rxcp->num_rcvd;
2232 
2233 	for (i = 0; i < num_rcvd; i++) {
2234 		page_info = get_rx_page_info(rxo);
2235 		put_page(page_info->page);
2236 		memset(page_info, 0, sizeof(*page_info));
2237 	}
2238 }
2239 
2240 /*
2241  * skb_fill_rx_data forms a complete skb for an ether frame
2242  * indicated by rxcp.
2243  */
2244 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2245 			     struct be_rx_compl_info *rxcp)
2246 {
2247 	struct be_rx_page_info *page_info;
2248 	u16 i, j;
2249 	u16 hdr_len, curr_frag_len, remaining;
2250 	u8 *start;
2251 
2252 	page_info = get_rx_page_info(rxo);
2253 	start = page_address(page_info->page) + page_info->page_offset;
2254 	prefetch(start);
2255 
2256 	/* Copy data in the first descriptor of this completion */
2257 	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2258 
2259 	skb->len = curr_frag_len;
2260 	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2261 		memcpy(skb->data, start, curr_frag_len);
2262 		/* Complete packet has now been moved to data */
2263 		put_page(page_info->page);
2264 		skb->data_len = 0;
2265 		skb->tail += curr_frag_len;
2266 	} else {
2267 		hdr_len = ETH_HLEN;
2268 		memcpy(skb->data, start, hdr_len);
2269 		skb_shinfo(skb)->nr_frags = 1;
2270 		skb_frag_set_page(skb, 0, page_info->page);
2271 		skb_shinfo(skb)->frags[0].page_offset =
2272 					page_info->page_offset + hdr_len;
2273 		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2274 				  curr_frag_len - hdr_len);
2275 		skb->data_len = curr_frag_len - hdr_len;
2276 		skb->truesize += rx_frag_size;
2277 		skb->tail += hdr_len;
2278 	}
2279 	page_info->page = NULL;
2280 
2281 	if (rxcp->pkt_size <= rx_frag_size) {
2282 		BUG_ON(rxcp->num_rcvd != 1);
2283 		return;
2284 	}
2285 
2286 	/* More frags present for this completion */
2287 	remaining = rxcp->pkt_size - curr_frag_len;
2288 	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2289 		page_info = get_rx_page_info(rxo);
2290 		curr_frag_len = min(remaining, rx_frag_size);
2291 
2292 		/* Coalesce all frags from the same physical page in one slot */
2293 		if (page_info->page_offset == 0) {
2294 			/* Fresh page */
2295 			j++;
2296 			skb_frag_set_page(skb, j, page_info->page);
2297 			skb_shinfo(skb)->frags[j].page_offset =
2298 							page_info->page_offset;
2299 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2300 			skb_shinfo(skb)->nr_frags++;
2301 		} else {
2302 			put_page(page_info->page);
2303 		}
2304 
2305 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2306 		skb->len += curr_frag_len;
2307 		skb->data_len += curr_frag_len;
2308 		skb->truesize += rx_frag_size;
2309 		remaining -= curr_frag_len;
2310 		page_info->page = NULL;
2311 	}
2312 	BUG_ON(j > MAX_SKB_FRAGS);
2313 }
2314 
2315 /* Process the RX completion indicated by rxcp when GRO is disabled */
2316 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2317 				struct be_rx_compl_info *rxcp)
2318 {
2319 	struct be_adapter *adapter = rxo->adapter;
2320 	struct net_device *netdev = adapter->netdev;
2321 	struct sk_buff *skb;
2322 
2323 	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2324 	if (unlikely(!skb)) {
2325 		rx_stats(rxo)->rx_drops_no_skbs++;
2326 		be_rx_compl_discard(rxo, rxcp);
2327 		return;
2328 	}
2329 
2330 	skb_fill_rx_data(rxo, skb, rxcp);
2331 
2332 	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2333 		skb->ip_summed = CHECKSUM_UNNECESSARY;
2334 	else
2335 		skb_checksum_none_assert(skb);
2336 
2337 	skb->protocol = eth_type_trans(skb, netdev);
2338 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2339 	if (netdev->features & NETIF_F_RXHASH)
2340 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2341 
2342 	skb->csum_level = rxcp->tunneled;
2343 	skb_mark_napi_id(skb, napi);
2344 
2345 	if (rxcp->vlanf)
2346 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2347 
2348 	netif_receive_skb(skb);
2349 }
2350 
2351 /* Process the RX completion indicated by rxcp when GRO is enabled */
2352 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2353 				    struct napi_struct *napi,
2354 				    struct be_rx_compl_info *rxcp)
2355 {
2356 	struct be_adapter *adapter = rxo->adapter;
2357 	struct be_rx_page_info *page_info;
2358 	struct sk_buff *skb = NULL;
2359 	u16 remaining, curr_frag_len;
2360 	u16 i, j;
2361 
2362 	skb = napi_get_frags(napi);
2363 	if (!skb) {
2364 		be_rx_compl_discard(rxo, rxcp);
2365 		return;
2366 	}
2367 
2368 	remaining = rxcp->pkt_size;
2369 	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2370 		page_info = get_rx_page_info(rxo);
2371 
2372 		curr_frag_len = min(remaining, rx_frag_size);
2373 
2374 		/* Coalesce all frags from the same physical page in one slot */
2375 		if (i == 0 || page_info->page_offset == 0) {
2376 			/* First frag or Fresh page */
2377 			j++;
2378 			skb_frag_set_page(skb, j, page_info->page);
2379 			skb_shinfo(skb)->frags[j].page_offset =
2380 							page_info->page_offset;
2381 			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2382 		} else {
2383 			put_page(page_info->page);
2384 		}
2385 		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2386 		skb->truesize += rx_frag_size;
2387 		remaining -= curr_frag_len;
2388 		memset(page_info, 0, sizeof(*page_info));
2389 	}
2390 	BUG_ON(j > MAX_SKB_FRAGS);
2391 
2392 	skb_shinfo(skb)->nr_frags = j + 1;
2393 	skb->len = rxcp->pkt_size;
2394 	skb->data_len = rxcp->pkt_size;
2395 	skb->ip_summed = CHECKSUM_UNNECESSARY;
2396 	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2397 	if (adapter->netdev->features & NETIF_F_RXHASH)
2398 		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2399 
2400 	skb->csum_level = rxcp->tunneled;
2401 
2402 	if (rxcp->vlanf)
2403 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2404 
2405 	napi_gro_frags(napi);
2406 }
2407 
2408 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2409 				 struct be_rx_compl_info *rxcp)
2410 {
2411 	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2412 	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2413 	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2414 	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2415 	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2416 	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2417 	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2418 	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2419 	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2420 	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2421 	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2422 	if (rxcp->vlanf) {
2423 		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2424 		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2425 	}
2426 	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2427 	rxcp->tunneled =
2428 		GET_RX_COMPL_V1_BITS(tunneled, compl);
2429 }
2430 
2431 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2432 				 struct be_rx_compl_info *rxcp)
2433 {
2434 	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2435 	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2436 	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2437 	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2438 	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2439 	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2440 	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2441 	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2442 	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2443 	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2444 	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2445 	if (rxcp->vlanf) {
2446 		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2447 		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2448 	}
2449 	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2450 	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2451 }
2452 
2453 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2454 {
2455 	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2456 	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2457 	struct be_adapter *adapter = rxo->adapter;
2458 
2459 	/* For checking the valid bit it is Ok to use either definition as the
2460 	 * valid bit is at the same position in both v0 and v1 Rx compl */
2461 	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2462 		return NULL;
2463 
2464 	rmb();
2465 	be_dws_le_to_cpu(compl, sizeof(*compl));
2466 
2467 	if (adapter->be3_native)
2468 		be_parse_rx_compl_v1(compl, rxcp);
2469 	else
2470 		be_parse_rx_compl_v0(compl, rxcp);
2471 
2472 	if (rxcp->ip_frag)
2473 		rxcp->l4_csum = 0;
2474 
2475 	if (rxcp->vlanf) {
2476 		/* In QNQ modes, if qnq bit is not set, then the packet was
2477 		 * tagged only with the transparent outer vlan-tag and must
2478 		 * not be treated as a vlan packet by host
2479 		 */
2480 		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2481 			rxcp->vlanf = 0;
2482 
2483 		if (!lancer_chip(adapter))
2484 			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2485 
2486 		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2487 		    !test_bit(rxcp->vlan_tag, adapter->vids))
2488 			rxcp->vlanf = 0;
2489 	}
2490 
2491 	/* As the compl has been parsed, reset it; we wont touch it again */
2492 	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2493 
2494 	queue_tail_inc(&rxo->cq);
2495 	return rxcp;
2496 }
2497 
2498 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2499 {
2500 	u32 order = get_order(size);
2501 
2502 	if (order > 0)
2503 		gfp |= __GFP_COMP;
2504 	return  alloc_pages(gfp, order);
2505 }
2506 
2507 /*
2508  * Allocate a page, split it to fragments of size rx_frag_size and post as
2509  * receive buffers to BE
2510  */
2511 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2512 {
2513 	struct be_adapter *adapter = rxo->adapter;
2514 	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2515 	struct be_queue_info *rxq = &rxo->q;
2516 	struct page *pagep = NULL;
2517 	struct device *dev = &adapter->pdev->dev;
2518 	struct be_eth_rx_d *rxd;
2519 	u64 page_dmaaddr = 0, frag_dmaaddr;
2520 	u32 posted, page_offset = 0, notify = 0;
2521 
2522 	page_info = &rxo->page_info_tbl[rxq->head];
2523 	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2524 		if (!pagep) {
2525 			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2526 			if (unlikely(!pagep)) {
2527 				rx_stats(rxo)->rx_post_fail++;
2528 				break;
2529 			}
2530 			page_dmaaddr = dma_map_page(dev, pagep, 0,
2531 						    adapter->big_page_size,
2532 						    DMA_FROM_DEVICE);
2533 			if (dma_mapping_error(dev, page_dmaaddr)) {
2534 				put_page(pagep);
2535 				pagep = NULL;
2536 				adapter->drv_stats.dma_map_errors++;
2537 				break;
2538 			}
2539 			page_offset = 0;
2540 		} else {
2541 			get_page(pagep);
2542 			page_offset += rx_frag_size;
2543 		}
2544 		page_info->page_offset = page_offset;
2545 		page_info->page = pagep;
2546 
2547 		rxd = queue_head_node(rxq);
2548 		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2549 		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2550 		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2551 
2552 		/* Any space left in the current big page for another frag? */
2553 		if ((page_offset + rx_frag_size + rx_frag_size) >
2554 					adapter->big_page_size) {
2555 			pagep = NULL;
2556 			page_info->last_frag = true;
2557 			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2558 		} else {
2559 			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2560 		}
2561 
2562 		prev_page_info = page_info;
2563 		queue_head_inc(rxq);
2564 		page_info = &rxo->page_info_tbl[rxq->head];
2565 	}
2566 
2567 	/* Mark the last frag of a page when we break out of the above loop
2568 	 * with no more slots available in the RXQ
2569 	 */
2570 	if (pagep) {
2571 		prev_page_info->last_frag = true;
2572 		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2573 	}
2574 
2575 	if (posted) {
2576 		atomic_add(posted, &rxq->used);
2577 		if (rxo->rx_post_starved)
2578 			rxo->rx_post_starved = false;
2579 		do {
2580 			notify = min(MAX_NUM_POST_ERX_DB, posted);
2581 			be_rxq_notify(adapter, rxq->id, notify);
2582 			posted -= notify;
2583 		} while (posted);
2584 	} else if (atomic_read(&rxq->used) == 0) {
2585 		/* Let be_worker replenish when memory is available */
2586 		rxo->rx_post_starved = true;
2587 	}
2588 }
2589 
2590 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2591 {
2592 	struct be_queue_info *tx_cq = &txo->cq;
2593 	struct be_tx_compl_info *txcp = &txo->txcp;
2594 	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2595 
2596 	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2597 		return NULL;
2598 
2599 	/* Ensure load ordering of valid bit dword and other dwords below */
2600 	rmb();
2601 	be_dws_le_to_cpu(compl, sizeof(*compl));
2602 
2603 	txcp->status = GET_TX_COMPL_BITS(status, compl);
2604 	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2605 
2606 	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2607 	queue_tail_inc(tx_cq);
2608 	return txcp;
2609 }
2610 
2611 static u16 be_tx_compl_process(struct be_adapter *adapter,
2612 			       struct be_tx_obj *txo, u16 last_index)
2613 {
2614 	struct sk_buff **sent_skbs = txo->sent_skb_list;
2615 	struct be_queue_info *txq = &txo->q;
2616 	struct sk_buff *skb = NULL;
2617 	bool unmap_skb_hdr = false;
2618 	struct be_eth_wrb *wrb;
2619 	u16 num_wrbs = 0;
2620 	u32 frag_index;
2621 
2622 	do {
2623 		if (sent_skbs[txq->tail]) {
2624 			/* Free skb from prev req */
2625 			if (skb)
2626 				dev_consume_skb_any(skb);
2627 			skb = sent_skbs[txq->tail];
2628 			sent_skbs[txq->tail] = NULL;
2629 			queue_tail_inc(txq);  /* skip hdr wrb */
2630 			num_wrbs++;
2631 			unmap_skb_hdr = true;
2632 		}
2633 		wrb = queue_tail_node(txq);
2634 		frag_index = txq->tail;
2635 		unmap_tx_frag(&adapter->pdev->dev, wrb,
2636 			      (unmap_skb_hdr && skb_headlen(skb)));
2637 		unmap_skb_hdr = false;
2638 		queue_tail_inc(txq);
2639 		num_wrbs++;
2640 	} while (frag_index != last_index);
2641 	dev_consume_skb_any(skb);
2642 
2643 	return num_wrbs;
2644 }
2645 
2646 /* Return the number of events in the event queue */
2647 static inline int events_get(struct be_eq_obj *eqo)
2648 {
2649 	struct be_eq_entry *eqe;
2650 	int num = 0;
2651 
2652 	do {
2653 		eqe = queue_tail_node(&eqo->q);
2654 		if (eqe->evt == 0)
2655 			break;
2656 
2657 		rmb();
2658 		eqe->evt = 0;
2659 		num++;
2660 		queue_tail_inc(&eqo->q);
2661 	} while (true);
2662 
2663 	return num;
2664 }
2665 
2666 /* Leaves the EQ is disarmed state */
2667 static void be_eq_clean(struct be_eq_obj *eqo)
2668 {
2669 	int num = events_get(eqo);
2670 
2671 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2672 }
2673 
2674 /* Free posted rx buffers that were not used */
2675 static void be_rxq_clean(struct be_rx_obj *rxo)
2676 {
2677 	struct be_queue_info *rxq = &rxo->q;
2678 	struct be_rx_page_info *page_info;
2679 
2680 	while (atomic_read(&rxq->used) > 0) {
2681 		page_info = get_rx_page_info(rxo);
2682 		put_page(page_info->page);
2683 		memset(page_info, 0, sizeof(*page_info));
2684 	}
2685 	BUG_ON(atomic_read(&rxq->used));
2686 	rxq->tail = 0;
2687 	rxq->head = 0;
2688 }
2689 
2690 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2691 {
2692 	struct be_queue_info *rx_cq = &rxo->cq;
2693 	struct be_rx_compl_info *rxcp;
2694 	struct be_adapter *adapter = rxo->adapter;
2695 	int flush_wait = 0;
2696 
2697 	/* Consume pending rx completions.
2698 	 * Wait for the flush completion (identified by zero num_rcvd)
2699 	 * to arrive. Notify CQ even when there are no more CQ entries
2700 	 * for HW to flush partially coalesced CQ entries.
2701 	 * In Lancer, there is no need to wait for flush compl.
2702 	 */
2703 	for (;;) {
2704 		rxcp = be_rx_compl_get(rxo);
2705 		if (!rxcp) {
2706 			if (lancer_chip(adapter))
2707 				break;
2708 
2709 			if (flush_wait++ > 50 ||
2710 			    be_check_error(adapter,
2711 					   BE_ERROR_HW)) {
2712 				dev_warn(&adapter->pdev->dev,
2713 					 "did not receive flush compl\n");
2714 				break;
2715 			}
2716 			be_cq_notify(adapter, rx_cq->id, true, 0);
2717 			mdelay(1);
2718 		} else {
2719 			be_rx_compl_discard(rxo, rxcp);
2720 			be_cq_notify(adapter, rx_cq->id, false, 1);
2721 			if (rxcp->num_rcvd == 0)
2722 				break;
2723 		}
2724 	}
2725 
2726 	/* After cleanup, leave the CQ in unarmed state */
2727 	be_cq_notify(adapter, rx_cq->id, false, 0);
2728 }
2729 
2730 static void be_tx_compl_clean(struct be_adapter *adapter)
2731 {
2732 	struct device *dev = &adapter->pdev->dev;
2733 	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2734 	struct be_tx_compl_info *txcp;
2735 	struct be_queue_info *txq;
2736 	u32 end_idx, notified_idx;
2737 	struct be_tx_obj *txo;
2738 	int i, pending_txqs;
2739 
2740 	/* Stop polling for compls when HW has been silent for 10ms */
2741 	do {
2742 		pending_txqs = adapter->num_tx_qs;
2743 
2744 		for_all_tx_queues(adapter, txo, i) {
2745 			cmpl = 0;
2746 			num_wrbs = 0;
2747 			txq = &txo->q;
2748 			while ((txcp = be_tx_compl_get(txo))) {
2749 				num_wrbs +=
2750 					be_tx_compl_process(adapter, txo,
2751 							    txcp->end_index);
2752 				cmpl++;
2753 			}
2754 			if (cmpl) {
2755 				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2756 				atomic_sub(num_wrbs, &txq->used);
2757 				timeo = 0;
2758 			}
2759 			if (!be_is_tx_compl_pending(txo))
2760 				pending_txqs--;
2761 		}
2762 
2763 		if (pending_txqs == 0 || ++timeo > 10 ||
2764 		    be_check_error(adapter, BE_ERROR_HW))
2765 			break;
2766 
2767 		mdelay(1);
2768 	} while (true);
2769 
2770 	/* Free enqueued TX that was never notified to HW */
2771 	for_all_tx_queues(adapter, txo, i) {
2772 		txq = &txo->q;
2773 
2774 		if (atomic_read(&txq->used)) {
2775 			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2776 				 i, atomic_read(&txq->used));
2777 			notified_idx = txq->tail;
2778 			end_idx = txq->tail;
2779 			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2780 				  txq->len);
2781 			/* Use the tx-compl process logic to handle requests
2782 			 * that were not sent to the HW.
2783 			 */
2784 			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2785 			atomic_sub(num_wrbs, &txq->used);
2786 			BUG_ON(atomic_read(&txq->used));
2787 			txo->pend_wrb_cnt = 0;
2788 			/* Since hw was never notified of these requests,
2789 			 * reset TXQ indices
2790 			 */
2791 			txq->head = notified_idx;
2792 			txq->tail = notified_idx;
2793 		}
2794 	}
2795 }
2796 
2797 static void be_evt_queues_destroy(struct be_adapter *adapter)
2798 {
2799 	struct be_eq_obj *eqo;
2800 	int i;
2801 
2802 	for_all_evt_queues(adapter, eqo, i) {
2803 		if (eqo->q.created) {
2804 			be_eq_clean(eqo);
2805 			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2806 			napi_hash_del(&eqo->napi);
2807 			netif_napi_del(&eqo->napi);
2808 			free_cpumask_var(eqo->affinity_mask);
2809 		}
2810 		be_queue_free(adapter, &eqo->q);
2811 	}
2812 }
2813 
2814 static int be_evt_queues_create(struct be_adapter *adapter)
2815 {
2816 	struct be_queue_info *eq;
2817 	struct be_eq_obj *eqo;
2818 	struct be_aic_obj *aic;
2819 	int i, rc;
2820 
2821 	/* need enough EQs to service both RX and TX queues */
2822 	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2823 				    max(adapter->cfg_num_rx_irqs,
2824 					adapter->cfg_num_tx_irqs));
2825 
2826 	for_all_evt_queues(adapter, eqo, i) {
2827 		int numa_node = dev_to_node(&adapter->pdev->dev);
2828 
2829 		aic = &adapter->aic_obj[i];
2830 		eqo->adapter = adapter;
2831 		eqo->idx = i;
2832 		aic->max_eqd = BE_MAX_EQD;
2833 		aic->enable = true;
2834 
2835 		eq = &eqo->q;
2836 		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2837 				    sizeof(struct be_eq_entry));
2838 		if (rc)
2839 			return rc;
2840 
2841 		rc = be_cmd_eq_create(adapter, eqo);
2842 		if (rc)
2843 			return rc;
2844 
2845 		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2846 			return -ENOMEM;
2847 		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2848 				eqo->affinity_mask);
2849 		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2850 			       BE_NAPI_WEIGHT);
2851 	}
2852 	return 0;
2853 }
2854 
2855 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2856 {
2857 	struct be_queue_info *q;
2858 
2859 	q = &adapter->mcc_obj.q;
2860 	if (q->created)
2861 		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2862 	be_queue_free(adapter, q);
2863 
2864 	q = &adapter->mcc_obj.cq;
2865 	if (q->created)
2866 		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2867 	be_queue_free(adapter, q);
2868 }
2869 
2870 /* Must be called only after TX qs are created as MCC shares TX EQ */
2871 static int be_mcc_queues_create(struct be_adapter *adapter)
2872 {
2873 	struct be_queue_info *q, *cq;
2874 
2875 	cq = &adapter->mcc_obj.cq;
2876 	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2877 			   sizeof(struct be_mcc_compl)))
2878 		goto err;
2879 
2880 	/* Use the default EQ for MCC completions */
2881 	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2882 		goto mcc_cq_free;
2883 
2884 	q = &adapter->mcc_obj.q;
2885 	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2886 		goto mcc_cq_destroy;
2887 
2888 	if (be_cmd_mccq_create(adapter, q, cq))
2889 		goto mcc_q_free;
2890 
2891 	return 0;
2892 
2893 mcc_q_free:
2894 	be_queue_free(adapter, q);
2895 mcc_cq_destroy:
2896 	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2897 mcc_cq_free:
2898 	be_queue_free(adapter, cq);
2899 err:
2900 	return -1;
2901 }
2902 
2903 static void be_tx_queues_destroy(struct be_adapter *adapter)
2904 {
2905 	struct be_queue_info *q;
2906 	struct be_tx_obj *txo;
2907 	u8 i;
2908 
2909 	for_all_tx_queues(adapter, txo, i) {
2910 		q = &txo->q;
2911 		if (q->created)
2912 			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2913 		be_queue_free(adapter, q);
2914 
2915 		q = &txo->cq;
2916 		if (q->created)
2917 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2918 		be_queue_free(adapter, q);
2919 	}
2920 }
2921 
2922 static int be_tx_qs_create(struct be_adapter *adapter)
2923 {
2924 	struct be_queue_info *cq;
2925 	struct be_tx_obj *txo;
2926 	struct be_eq_obj *eqo;
2927 	int status, i;
2928 
2929 	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2930 
2931 	for_all_tx_queues(adapter, txo, i) {
2932 		cq = &txo->cq;
2933 		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2934 					sizeof(struct be_eth_tx_compl));
2935 		if (status)
2936 			return status;
2937 
2938 		u64_stats_init(&txo->stats.sync);
2939 		u64_stats_init(&txo->stats.sync_compl);
2940 
2941 		/* If num_evt_qs is less than num_tx_qs, then more than
2942 		 * one txq share an eq
2943 		 */
2944 		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2945 		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2946 		if (status)
2947 			return status;
2948 
2949 		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2950 					sizeof(struct be_eth_wrb));
2951 		if (status)
2952 			return status;
2953 
2954 		status = be_cmd_txq_create(adapter, txo);
2955 		if (status)
2956 			return status;
2957 
2958 		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2959 				    eqo->idx);
2960 	}
2961 
2962 	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2963 		 adapter->num_tx_qs);
2964 	return 0;
2965 }
2966 
2967 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2968 {
2969 	struct be_queue_info *q;
2970 	struct be_rx_obj *rxo;
2971 	int i;
2972 
2973 	for_all_rx_queues(adapter, rxo, i) {
2974 		q = &rxo->cq;
2975 		if (q->created)
2976 			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2977 		be_queue_free(adapter, q);
2978 	}
2979 }
2980 
2981 static int be_rx_cqs_create(struct be_adapter *adapter)
2982 {
2983 	struct be_queue_info *eq, *cq;
2984 	struct be_rx_obj *rxo;
2985 	int rc, i;
2986 
2987 	adapter->num_rss_qs =
2988 			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2989 
2990 	/* We'll use RSS only if atleast 2 RSS rings are supported. */
2991 	if (adapter->num_rss_qs < 2)
2992 		adapter->num_rss_qs = 0;
2993 
2994 	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2995 
2996 	/* When the interface is not capable of RSS rings (and there is no
2997 	 * need to create a default RXQ) we'll still need one RXQ
2998 	 */
2999 	if (adapter->num_rx_qs == 0)
3000 		adapter->num_rx_qs = 1;
3001 
3002 	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3003 	for_all_rx_queues(adapter, rxo, i) {
3004 		rxo->adapter = adapter;
3005 		cq = &rxo->cq;
3006 		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3007 				    sizeof(struct be_eth_rx_compl));
3008 		if (rc)
3009 			return rc;
3010 
3011 		u64_stats_init(&rxo->stats.sync);
3012 		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3013 		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3014 		if (rc)
3015 			return rc;
3016 	}
3017 
3018 	dev_info(&adapter->pdev->dev,
3019 		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3020 	return 0;
3021 }
3022 
3023 static irqreturn_t be_intx(int irq, void *dev)
3024 {
3025 	struct be_eq_obj *eqo = dev;
3026 	struct be_adapter *adapter = eqo->adapter;
3027 	int num_evts = 0;
3028 
3029 	/* IRQ is not expected when NAPI is scheduled as the EQ
3030 	 * will not be armed.
3031 	 * But, this can happen on Lancer INTx where it takes
3032 	 * a while to de-assert INTx or in BE2 where occasionaly
3033 	 * an interrupt may be raised even when EQ is unarmed.
3034 	 * If NAPI is already scheduled, then counting & notifying
3035 	 * events will orphan them.
3036 	 */
3037 	if (napi_schedule_prep(&eqo->napi)) {
3038 		num_evts = events_get(eqo);
3039 		__napi_schedule(&eqo->napi);
3040 		if (num_evts)
3041 			eqo->spurious_intr = 0;
3042 	}
3043 	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3044 
3045 	/* Return IRQ_HANDLED only for the the first spurious intr
3046 	 * after a valid intr to stop the kernel from branding
3047 	 * this irq as a bad one!
3048 	 */
3049 	if (num_evts || eqo->spurious_intr++ == 0)
3050 		return IRQ_HANDLED;
3051 	else
3052 		return IRQ_NONE;
3053 }
3054 
3055 static irqreturn_t be_msix(int irq, void *dev)
3056 {
3057 	struct be_eq_obj *eqo = dev;
3058 
3059 	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3060 	napi_schedule(&eqo->napi);
3061 	return IRQ_HANDLED;
3062 }
3063 
3064 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3065 {
3066 	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3067 }
3068 
3069 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3070 			 int budget, int polling)
3071 {
3072 	struct be_adapter *adapter = rxo->adapter;
3073 	struct be_queue_info *rx_cq = &rxo->cq;
3074 	struct be_rx_compl_info *rxcp;
3075 	u32 work_done;
3076 	u32 frags_consumed = 0;
3077 
3078 	for (work_done = 0; work_done < budget; work_done++) {
3079 		rxcp = be_rx_compl_get(rxo);
3080 		if (!rxcp)
3081 			break;
3082 
3083 		/* Is it a flush compl that has no data */
3084 		if (unlikely(rxcp->num_rcvd == 0))
3085 			goto loop_continue;
3086 
3087 		/* Discard compl with partial DMA Lancer B0 */
3088 		if (unlikely(!rxcp->pkt_size)) {
3089 			be_rx_compl_discard(rxo, rxcp);
3090 			goto loop_continue;
3091 		}
3092 
3093 		/* On BE drop pkts that arrive due to imperfect filtering in
3094 		 * promiscuous mode on some skews
3095 		 */
3096 		if (unlikely(rxcp->port != adapter->port_num &&
3097 			     !lancer_chip(adapter))) {
3098 			be_rx_compl_discard(rxo, rxcp);
3099 			goto loop_continue;
3100 		}
3101 
3102 		/* Don't do gro when we're busy_polling */
3103 		if (do_gro(rxcp) && polling != BUSY_POLLING)
3104 			be_rx_compl_process_gro(rxo, napi, rxcp);
3105 		else
3106 			be_rx_compl_process(rxo, napi, rxcp);
3107 
3108 loop_continue:
3109 		frags_consumed += rxcp->num_rcvd;
3110 		be_rx_stats_update(rxo, rxcp);
3111 	}
3112 
3113 	if (work_done) {
3114 		be_cq_notify(adapter, rx_cq->id, true, work_done);
3115 
3116 		/* When an rx-obj gets into post_starved state, just
3117 		 * let be_worker do the posting.
3118 		 */
3119 		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3120 		    !rxo->rx_post_starved)
3121 			be_post_rx_frags(rxo, GFP_ATOMIC,
3122 					 max_t(u32, MAX_RX_POST,
3123 					       frags_consumed));
3124 	}
3125 
3126 	return work_done;
3127 }
3128 
3129 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3130 {
3131 	switch (status) {
3132 	case BE_TX_COMP_HDR_PARSE_ERR:
3133 		tx_stats(txo)->tx_hdr_parse_err++;
3134 		break;
3135 	case BE_TX_COMP_NDMA_ERR:
3136 		tx_stats(txo)->tx_dma_err++;
3137 		break;
3138 	case BE_TX_COMP_ACL_ERR:
3139 		tx_stats(txo)->tx_spoof_check_err++;
3140 		break;
3141 	}
3142 }
3143 
3144 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3145 {
3146 	switch (status) {
3147 	case LANCER_TX_COMP_LSO_ERR:
3148 		tx_stats(txo)->tx_tso_err++;
3149 		break;
3150 	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3151 	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3152 		tx_stats(txo)->tx_spoof_check_err++;
3153 		break;
3154 	case LANCER_TX_COMP_QINQ_ERR:
3155 		tx_stats(txo)->tx_qinq_err++;
3156 		break;
3157 	case LANCER_TX_COMP_PARITY_ERR:
3158 		tx_stats(txo)->tx_internal_parity_err++;
3159 		break;
3160 	case LANCER_TX_COMP_DMA_ERR:
3161 		tx_stats(txo)->tx_dma_err++;
3162 		break;
3163 	}
3164 }
3165 
3166 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3167 			  int idx)
3168 {
3169 	int num_wrbs = 0, work_done = 0;
3170 	struct be_tx_compl_info *txcp;
3171 
3172 	while ((txcp = be_tx_compl_get(txo))) {
3173 		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3174 		work_done++;
3175 
3176 		if (txcp->status) {
3177 			if (lancer_chip(adapter))
3178 				lancer_update_tx_err(txo, txcp->status);
3179 			else
3180 				be_update_tx_err(txo, txcp->status);
3181 		}
3182 	}
3183 
3184 	if (work_done) {
3185 		be_cq_notify(adapter, txo->cq.id, true, work_done);
3186 		atomic_sub(num_wrbs, &txo->q.used);
3187 
3188 		/* As Tx wrbs have been freed up, wake up netdev queue
3189 		 * if it was stopped due to lack of tx wrbs.  */
3190 		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3191 		    be_can_txq_wake(txo)) {
3192 			netif_wake_subqueue(adapter->netdev, idx);
3193 		}
3194 
3195 		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3196 		tx_stats(txo)->tx_compl += work_done;
3197 		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3198 	}
3199 }
3200 
3201 #ifdef CONFIG_NET_RX_BUSY_POLL
3202 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3203 {
3204 	bool status = true;
3205 
3206 	spin_lock(&eqo->lock); /* BH is already disabled */
3207 	if (eqo->state & BE_EQ_LOCKED) {
3208 		WARN_ON(eqo->state & BE_EQ_NAPI);
3209 		eqo->state |= BE_EQ_NAPI_YIELD;
3210 		status = false;
3211 	} else {
3212 		eqo->state = BE_EQ_NAPI;
3213 	}
3214 	spin_unlock(&eqo->lock);
3215 	return status;
3216 }
3217 
3218 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3219 {
3220 	spin_lock(&eqo->lock); /* BH is already disabled */
3221 
3222 	WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3223 	eqo->state = BE_EQ_IDLE;
3224 
3225 	spin_unlock(&eqo->lock);
3226 }
3227 
3228 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3229 {
3230 	bool status = true;
3231 
3232 	spin_lock_bh(&eqo->lock);
3233 	if (eqo->state & BE_EQ_LOCKED) {
3234 		eqo->state |= BE_EQ_POLL_YIELD;
3235 		status = false;
3236 	} else {
3237 		eqo->state |= BE_EQ_POLL;
3238 	}
3239 	spin_unlock_bh(&eqo->lock);
3240 	return status;
3241 }
3242 
3243 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3244 {
3245 	spin_lock_bh(&eqo->lock);
3246 
3247 	WARN_ON(eqo->state & (BE_EQ_NAPI));
3248 	eqo->state = BE_EQ_IDLE;
3249 
3250 	spin_unlock_bh(&eqo->lock);
3251 }
3252 
3253 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3254 {
3255 	spin_lock_init(&eqo->lock);
3256 	eqo->state = BE_EQ_IDLE;
3257 }
3258 
3259 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3260 {
3261 	local_bh_disable();
3262 
3263 	/* It's enough to just acquire napi lock on the eqo to stop
3264 	 * be_busy_poll() from processing any queueus.
3265 	 */
3266 	while (!be_lock_napi(eqo))
3267 		mdelay(1);
3268 
3269 	local_bh_enable();
3270 }
3271 
3272 #else /* CONFIG_NET_RX_BUSY_POLL */
3273 
3274 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3275 {
3276 	return true;
3277 }
3278 
3279 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3280 {
3281 }
3282 
3283 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3284 {
3285 	return false;
3286 }
3287 
3288 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291 
3292 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3293 {
3294 }
3295 
3296 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3297 {
3298 }
3299 #endif /* CONFIG_NET_RX_BUSY_POLL */
3300 
3301 int be_poll(struct napi_struct *napi, int budget)
3302 {
3303 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3304 	struct be_adapter *adapter = eqo->adapter;
3305 	int max_work = 0, work, i, num_evts;
3306 	struct be_rx_obj *rxo;
3307 	struct be_tx_obj *txo;
3308 	u32 mult_enc = 0;
3309 
3310 	num_evts = events_get(eqo);
3311 
3312 	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3313 		be_process_tx(adapter, txo, i);
3314 
3315 	if (be_lock_napi(eqo)) {
3316 		/* This loop will iterate twice for EQ0 in which
3317 		 * completions of the last RXQ (default one) are also processed
3318 		 * For other EQs the loop iterates only once
3319 		 */
3320 		for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3321 			work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3322 			max_work = max(work, max_work);
3323 		}
3324 		be_unlock_napi(eqo);
3325 	} else {
3326 		max_work = budget;
3327 	}
3328 
3329 	if (is_mcc_eqo(eqo))
3330 		be_process_mcc(adapter);
3331 
3332 	if (max_work < budget) {
3333 		napi_complete(napi);
3334 
3335 		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3336 		 * delay via a delay multiplier encoding value
3337 		 */
3338 		if (skyhawk_chip(adapter))
3339 			mult_enc = be_get_eq_delay_mult_enc(eqo);
3340 
3341 		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3342 			     mult_enc);
3343 	} else {
3344 		/* As we'll continue in polling mode, count and clear events */
3345 		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3346 	}
3347 	return max_work;
3348 }
3349 
3350 #ifdef CONFIG_NET_RX_BUSY_POLL
3351 static int be_busy_poll(struct napi_struct *napi)
3352 {
3353 	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3354 	struct be_adapter *adapter = eqo->adapter;
3355 	struct be_rx_obj *rxo;
3356 	int i, work = 0;
3357 
3358 	if (!be_lock_busy_poll(eqo))
3359 		return LL_FLUSH_BUSY;
3360 
3361 	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3362 		work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3363 		if (work)
3364 			break;
3365 	}
3366 
3367 	be_unlock_busy_poll(eqo);
3368 	return work;
3369 }
3370 #endif
3371 
3372 void be_detect_error(struct be_adapter *adapter)
3373 {
3374 	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3375 	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3376 	u32 i;
3377 	struct device *dev = &adapter->pdev->dev;
3378 
3379 	if (be_check_error(adapter, BE_ERROR_HW))
3380 		return;
3381 
3382 	if (lancer_chip(adapter)) {
3383 		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3384 		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3385 			be_set_error(adapter, BE_ERROR_UE);
3386 			sliport_err1 = ioread32(adapter->db +
3387 						SLIPORT_ERROR1_OFFSET);
3388 			sliport_err2 = ioread32(adapter->db +
3389 						SLIPORT_ERROR2_OFFSET);
3390 			/* Do not log error messages if its a FW reset */
3391 			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3392 			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3393 				dev_info(dev, "Firmware update in progress\n");
3394 			} else {
3395 				dev_err(dev, "Error detected in the card\n");
3396 				dev_err(dev, "ERR: sliport status 0x%x\n",
3397 					sliport_status);
3398 				dev_err(dev, "ERR: sliport error1 0x%x\n",
3399 					sliport_err1);
3400 				dev_err(dev, "ERR: sliport error2 0x%x\n",
3401 					sliport_err2);
3402 			}
3403 		}
3404 	} else {
3405 		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3406 		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3407 		ue_lo_mask = ioread32(adapter->pcicfg +
3408 				      PCICFG_UE_STATUS_LOW_MASK);
3409 		ue_hi_mask = ioread32(adapter->pcicfg +
3410 				      PCICFG_UE_STATUS_HI_MASK);
3411 
3412 		ue_lo = (ue_lo & ~ue_lo_mask);
3413 		ue_hi = (ue_hi & ~ue_hi_mask);
3414 
3415 		/* On certain platforms BE hardware can indicate spurious UEs.
3416 		 * Allow HW to stop working completely in case of a real UE.
3417 		 * Hence not setting the hw_error for UE detection.
3418 		 */
3419 
3420 		if (ue_lo || ue_hi) {
3421 			dev_err(dev, "Error detected in the adapter");
3422 			if (skyhawk_chip(adapter))
3423 				be_set_error(adapter, BE_ERROR_UE);
3424 
3425 			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3426 				if (ue_lo & 1)
3427 					dev_err(dev, "UE: %s bit set\n",
3428 						ue_status_low_desc[i]);
3429 			}
3430 			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3431 				if (ue_hi & 1)
3432 					dev_err(dev, "UE: %s bit set\n",
3433 						ue_status_hi_desc[i]);
3434 			}
3435 		}
3436 	}
3437 }
3438 
3439 static void be_msix_disable(struct be_adapter *adapter)
3440 {
3441 	if (msix_enabled(adapter)) {
3442 		pci_disable_msix(adapter->pdev);
3443 		adapter->num_msix_vec = 0;
3444 		adapter->num_msix_roce_vec = 0;
3445 	}
3446 }
3447 
3448 static int be_msix_enable(struct be_adapter *adapter)
3449 {
3450 	unsigned int i, max_roce_eqs;
3451 	struct device *dev = &adapter->pdev->dev;
3452 	int num_vec;
3453 
3454 	/* If RoCE is supported, program the max number of vectors that
3455 	 * could be used for NIC and RoCE, else, just program the number
3456 	 * we'll use initially.
3457 	 */
3458 	if (be_roce_supported(adapter)) {
3459 		max_roce_eqs =
3460 			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3461 		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3462 		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3463 	} else {
3464 		num_vec = max(adapter->cfg_num_rx_irqs,
3465 			      adapter->cfg_num_tx_irqs);
3466 	}
3467 
3468 	for (i = 0; i < num_vec; i++)
3469 		adapter->msix_entries[i].entry = i;
3470 
3471 	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3472 					MIN_MSIX_VECTORS, num_vec);
3473 	if (num_vec < 0)
3474 		goto fail;
3475 
3476 	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3477 		adapter->num_msix_roce_vec = num_vec / 2;
3478 		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3479 			 adapter->num_msix_roce_vec);
3480 	}
3481 
3482 	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3483 
3484 	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3485 		 adapter->num_msix_vec);
3486 	return 0;
3487 
3488 fail:
3489 	dev_warn(dev, "MSIx enable failed\n");
3490 
3491 	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3492 	if (be_virtfn(adapter))
3493 		return num_vec;
3494 	return 0;
3495 }
3496 
3497 static inline int be_msix_vec_get(struct be_adapter *adapter,
3498 				  struct be_eq_obj *eqo)
3499 {
3500 	return adapter->msix_entries[eqo->msix_idx].vector;
3501 }
3502 
3503 static int be_msix_register(struct be_adapter *adapter)
3504 {
3505 	struct net_device *netdev = adapter->netdev;
3506 	struct be_eq_obj *eqo;
3507 	int status, i, vec;
3508 
3509 	for_all_evt_queues(adapter, eqo, i) {
3510 		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3511 		vec = be_msix_vec_get(adapter, eqo);
3512 		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3513 		if (status)
3514 			goto err_msix;
3515 
3516 		irq_set_affinity_hint(vec, eqo->affinity_mask);
3517 	}
3518 
3519 	return 0;
3520 err_msix:
3521 	for (i--; i >= 0; i--) {
3522 		eqo = &adapter->eq_obj[i];
3523 		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3524 	}
3525 	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3526 		 status);
3527 	be_msix_disable(adapter);
3528 	return status;
3529 }
3530 
3531 static int be_irq_register(struct be_adapter *adapter)
3532 {
3533 	struct net_device *netdev = adapter->netdev;
3534 	int status;
3535 
3536 	if (msix_enabled(adapter)) {
3537 		status = be_msix_register(adapter);
3538 		if (status == 0)
3539 			goto done;
3540 		/* INTx is not supported for VF */
3541 		if (be_virtfn(adapter))
3542 			return status;
3543 	}
3544 
3545 	/* INTx: only the first EQ is used */
3546 	netdev->irq = adapter->pdev->irq;
3547 	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3548 			     &adapter->eq_obj[0]);
3549 	if (status) {
3550 		dev_err(&adapter->pdev->dev,
3551 			"INTx request IRQ failed - err %d\n", status);
3552 		return status;
3553 	}
3554 done:
3555 	adapter->isr_registered = true;
3556 	return 0;
3557 }
3558 
3559 static void be_irq_unregister(struct be_adapter *adapter)
3560 {
3561 	struct net_device *netdev = adapter->netdev;
3562 	struct be_eq_obj *eqo;
3563 	int i, vec;
3564 
3565 	if (!adapter->isr_registered)
3566 		return;
3567 
3568 	/* INTx */
3569 	if (!msix_enabled(adapter)) {
3570 		free_irq(netdev->irq, &adapter->eq_obj[0]);
3571 		goto done;
3572 	}
3573 
3574 	/* MSIx */
3575 	for_all_evt_queues(adapter, eqo, i) {
3576 		vec = be_msix_vec_get(adapter, eqo);
3577 		irq_set_affinity_hint(vec, NULL);
3578 		free_irq(vec, eqo);
3579 	}
3580 
3581 done:
3582 	adapter->isr_registered = false;
3583 }
3584 
3585 static void be_rx_qs_destroy(struct be_adapter *adapter)
3586 {
3587 	struct rss_info *rss = &adapter->rss_info;
3588 	struct be_queue_info *q;
3589 	struct be_rx_obj *rxo;
3590 	int i;
3591 
3592 	for_all_rx_queues(adapter, rxo, i) {
3593 		q = &rxo->q;
3594 		if (q->created) {
3595 			/* If RXQs are destroyed while in an "out of buffer"
3596 			 * state, there is a possibility of an HW stall on
3597 			 * Lancer. So, post 64 buffers to each queue to relieve
3598 			 * the "out of buffer" condition.
3599 			 * Make sure there's space in the RXQ before posting.
3600 			 */
3601 			if (lancer_chip(adapter)) {
3602 				be_rx_cq_clean(rxo);
3603 				if (atomic_read(&q->used) == 0)
3604 					be_post_rx_frags(rxo, GFP_KERNEL,
3605 							 MAX_RX_POST);
3606 			}
3607 
3608 			be_cmd_rxq_destroy(adapter, q);
3609 			be_rx_cq_clean(rxo);
3610 			be_rxq_clean(rxo);
3611 		}
3612 		be_queue_free(adapter, q);
3613 	}
3614 
3615 	if (rss->rss_flags) {
3616 		rss->rss_flags = RSS_ENABLE_NONE;
3617 		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3618 				  128, rss->rss_hkey);
3619 	}
3620 }
3621 
3622 static void be_disable_if_filters(struct be_adapter *adapter)
3623 {
3624 	be_dev_mac_del(adapter, adapter->pmac_id[0]);
3625 	be_clear_uc_list(adapter);
3626 	be_clear_mc_list(adapter);
3627 
3628 	/* The IFACE flags are enabled in the open path and cleared
3629 	 * in the close path. When a VF gets detached from the host and
3630 	 * assigned to a VM the following happens:
3631 	 *	- VF's IFACE flags get cleared in the detach path
3632 	 *	- IFACE create is issued by the VF in the attach path
3633 	 * Due to a bug in the BE3/Skyhawk-R FW
3634 	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3635 	 * specified along with the IFACE create cmd issued by a VF are not
3636 	 * honoured by FW.  As a consequence, if a *new* driver
3637 	 * (that enables/disables IFACE flags in open/close)
3638 	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3639 	 * the IFACE gets created *without* the needed flags.
3640 	 * To avoid this, disable RX-filter flags only for Lancer.
3641 	 */
3642 	if (lancer_chip(adapter)) {
3643 		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3644 		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3645 	}
3646 }
3647 
3648 static int be_close(struct net_device *netdev)
3649 {
3650 	struct be_adapter *adapter = netdev_priv(netdev);
3651 	struct be_eq_obj *eqo;
3652 	int i;
3653 
3654 	/* This protection is needed as be_close() may be called even when the
3655 	 * adapter is in cleared state (after eeh perm failure)
3656 	 */
3657 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3658 		return 0;
3659 
3660 	/* Before attempting cleanup ensure all the pending cmds in the
3661 	 * config_wq have finished execution
3662 	 */
3663 	flush_workqueue(be_wq);
3664 
3665 	be_disable_if_filters(adapter);
3666 
3667 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3668 		for_all_evt_queues(adapter, eqo, i) {
3669 			napi_disable(&eqo->napi);
3670 			be_disable_busy_poll(eqo);
3671 		}
3672 		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3673 	}
3674 
3675 	be_async_mcc_disable(adapter);
3676 
3677 	/* Wait for all pending tx completions to arrive so that
3678 	 * all tx skbs are freed.
3679 	 */
3680 	netif_tx_disable(netdev);
3681 	be_tx_compl_clean(adapter);
3682 
3683 	be_rx_qs_destroy(adapter);
3684 
3685 	for_all_evt_queues(adapter, eqo, i) {
3686 		if (msix_enabled(adapter))
3687 			synchronize_irq(be_msix_vec_get(adapter, eqo));
3688 		else
3689 			synchronize_irq(netdev->irq);
3690 		be_eq_clean(eqo);
3691 	}
3692 
3693 	be_irq_unregister(adapter);
3694 
3695 	return 0;
3696 }
3697 
3698 static int be_rx_qs_create(struct be_adapter *adapter)
3699 {
3700 	struct rss_info *rss = &adapter->rss_info;
3701 	u8 rss_key[RSS_HASH_KEY_LEN];
3702 	struct be_rx_obj *rxo;
3703 	int rc, i, j;
3704 
3705 	for_all_rx_queues(adapter, rxo, i) {
3706 		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3707 				    sizeof(struct be_eth_rx_d));
3708 		if (rc)
3709 			return rc;
3710 	}
3711 
3712 	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3713 		rxo = default_rxo(adapter);
3714 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3715 				       rx_frag_size, adapter->if_handle,
3716 				       false, &rxo->rss_id);
3717 		if (rc)
3718 			return rc;
3719 	}
3720 
3721 	for_all_rss_queues(adapter, rxo, i) {
3722 		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3723 				       rx_frag_size, adapter->if_handle,
3724 				       true, &rxo->rss_id);
3725 		if (rc)
3726 			return rc;
3727 	}
3728 
3729 	if (be_multi_rxq(adapter)) {
3730 		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3731 			for_all_rss_queues(adapter, rxo, i) {
3732 				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3733 					break;
3734 				rss->rsstable[j + i] = rxo->rss_id;
3735 				rss->rss_queue[j + i] = i;
3736 			}
3737 		}
3738 		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3739 			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3740 
3741 		if (!BEx_chip(adapter))
3742 			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3743 				RSS_ENABLE_UDP_IPV6;
3744 
3745 		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3746 		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3747 				       RSS_INDIR_TABLE_LEN, rss_key);
3748 		if (rc) {
3749 			rss->rss_flags = RSS_ENABLE_NONE;
3750 			return rc;
3751 		}
3752 
3753 		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3754 	} else {
3755 		/* Disable RSS, if only default RX Q is created */
3756 		rss->rss_flags = RSS_ENABLE_NONE;
3757 	}
3758 
3759 
3760 	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3761 	 * which is a queue empty condition
3762 	 */
3763 	for_all_rx_queues(adapter, rxo, i)
3764 		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3765 
3766 	return 0;
3767 }
3768 
3769 static int be_enable_if_filters(struct be_adapter *adapter)
3770 {
3771 	int status;
3772 
3773 	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3774 	if (status)
3775 		return status;
3776 
3777 	/* For BE3 VFs, the PF programs the initial MAC address */
3778 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3779 		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780 		if (status)
3781 			return status;
3782 		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783 	}
3784 
3785 	if (adapter->vlans_added)
3786 		be_vid_config(adapter);
3787 
3788 	__be_set_rx_mode(adapter);
3789 
3790 	return 0;
3791 }
3792 
3793 static int be_open(struct net_device *netdev)
3794 {
3795 	struct be_adapter *adapter = netdev_priv(netdev);
3796 	struct be_eq_obj *eqo;
3797 	struct be_rx_obj *rxo;
3798 	struct be_tx_obj *txo;
3799 	u8 link_status;
3800 	int status, i;
3801 
3802 	status = be_rx_qs_create(adapter);
3803 	if (status)
3804 		goto err;
3805 
3806 	status = be_enable_if_filters(adapter);
3807 	if (status)
3808 		goto err;
3809 
3810 	status = be_irq_register(adapter);
3811 	if (status)
3812 		goto err;
3813 
3814 	for_all_rx_queues(adapter, rxo, i)
3815 		be_cq_notify(adapter, rxo->cq.id, true, 0);
3816 
3817 	for_all_tx_queues(adapter, txo, i)
3818 		be_cq_notify(adapter, txo->cq.id, true, 0);
3819 
3820 	be_async_mcc_enable(adapter);
3821 
3822 	for_all_evt_queues(adapter, eqo, i) {
3823 		napi_enable(&eqo->napi);
3824 		be_enable_busy_poll(eqo);
3825 		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3826 	}
3827 	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3828 
3829 	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3830 	if (!status)
3831 		be_link_status_update(adapter, link_status);
3832 
3833 	netif_tx_start_all_queues(netdev);
3834 	if (skyhawk_chip(adapter))
3835 		udp_tunnel_get_rx_info(netdev);
3836 
3837 	return 0;
3838 err:
3839 	be_close(adapter->netdev);
3840 	return -EIO;
3841 }
3842 
3843 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3844 {
3845 	u32 addr;
3846 
3847 	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3848 
3849 	mac[5] = (u8)(addr & 0xFF);
3850 	mac[4] = (u8)((addr >> 8) & 0xFF);
3851 	mac[3] = (u8)((addr >> 16) & 0xFF);
3852 	/* Use the OUI from the current MAC address */
3853 	memcpy(mac, adapter->netdev->dev_addr, 3);
3854 }
3855 
3856 /*
3857  * Generate a seed MAC address from the PF MAC Address using jhash.
3858  * MAC Address for VFs are assigned incrementally starting from the seed.
3859  * These addresses are programmed in the ASIC by the PF and the VF driver
3860  * queries for the MAC address during its probe.
3861  */
3862 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3863 {
3864 	u32 vf;
3865 	int status = 0;
3866 	u8 mac[ETH_ALEN];
3867 	struct be_vf_cfg *vf_cfg;
3868 
3869 	be_vf_eth_addr_generate(adapter, mac);
3870 
3871 	for_all_vfs(adapter, vf_cfg, vf) {
3872 		if (BEx_chip(adapter))
3873 			status = be_cmd_pmac_add(adapter, mac,
3874 						 vf_cfg->if_handle,
3875 						 &vf_cfg->pmac_id, vf + 1);
3876 		else
3877 			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3878 						vf + 1);
3879 
3880 		if (status)
3881 			dev_err(&adapter->pdev->dev,
3882 				"Mac address assignment failed for VF %d\n",
3883 				vf);
3884 		else
3885 			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3886 
3887 		mac[5] += 1;
3888 	}
3889 	return status;
3890 }
3891 
3892 static int be_vfs_mac_query(struct be_adapter *adapter)
3893 {
3894 	int status, vf;
3895 	u8 mac[ETH_ALEN];
3896 	struct be_vf_cfg *vf_cfg;
3897 
3898 	for_all_vfs(adapter, vf_cfg, vf) {
3899 		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3900 					       mac, vf_cfg->if_handle,
3901 					       false, vf+1);
3902 		if (status)
3903 			return status;
3904 		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905 	}
3906 	return 0;
3907 }
3908 
3909 static void be_vf_clear(struct be_adapter *adapter)
3910 {
3911 	struct be_vf_cfg *vf_cfg;
3912 	u32 vf;
3913 
3914 	if (pci_vfs_assigned(adapter->pdev)) {
3915 		dev_warn(&adapter->pdev->dev,
3916 			 "VFs are assigned to VMs: not disabling VFs\n");
3917 		goto done;
3918 	}
3919 
3920 	pci_disable_sriov(adapter->pdev);
3921 
3922 	for_all_vfs(adapter, vf_cfg, vf) {
3923 		if (BEx_chip(adapter))
3924 			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3925 					vf_cfg->pmac_id, vf + 1);
3926 		else
3927 			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3928 				       vf + 1);
3929 
3930 		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3931 	}
3932 
3933 	if (BE3_chip(adapter))
3934 		be_cmd_set_hsw_config(adapter, 0, 0,
3935 				      adapter->if_handle,
3936 				      PORT_FWD_TYPE_PASSTHRU, 0);
3937 done:
3938 	kfree(adapter->vf_cfg);
3939 	adapter->num_vfs = 0;
3940 	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3941 }
3942 
3943 static void be_clear_queues(struct be_adapter *adapter)
3944 {
3945 	be_mcc_queues_destroy(adapter);
3946 	be_rx_cqs_destroy(adapter);
3947 	be_tx_queues_destroy(adapter);
3948 	be_evt_queues_destroy(adapter);
3949 }
3950 
3951 static void be_cancel_worker(struct be_adapter *adapter)
3952 {
3953 	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3954 		cancel_delayed_work_sync(&adapter->work);
3955 		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3956 	}
3957 }
3958 
3959 static void be_cancel_err_detection(struct be_adapter *adapter)
3960 {
3961 	struct be_error_recovery *err_rec = &adapter->error_recovery;
3962 
3963 	if (!be_err_recovery_workq)
3964 		return;
3965 
3966 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3967 		cancel_delayed_work_sync(&err_rec->err_detection_work);
3968 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3969 	}
3970 }
3971 
3972 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3973 {
3974 	struct net_device *netdev = adapter->netdev;
3975 
3976 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3977 		be_cmd_manage_iface(adapter, adapter->if_handle,
3978 				    OP_CONVERT_TUNNEL_TO_NORMAL);
3979 
3980 	if (adapter->vxlan_port)
3981 		be_cmd_set_vxlan_port(adapter, 0);
3982 
3983 	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3984 	adapter->vxlan_port = 0;
3985 
3986 	netdev->hw_enc_features = 0;
3987 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3988 	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3989 }
3990 
3991 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3992 				struct be_resources *vft_res)
3993 {
3994 	struct be_resources res = adapter->pool_res;
3995 	u32 vf_if_cap_flags = res.vf_if_cap_flags;
3996 	struct be_resources res_mod = {0};
3997 	u16 num_vf_qs = 1;
3998 
3999 	/* Distribute the queue resources among the PF and it's VFs */
4000 	if (num_vfs) {
4001 		/* Divide the rx queues evenly among the VFs and the PF, capped
4002 		 * at VF-EQ-count. Any remainder queues belong to the PF.
4003 		 */
4004 		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4005 				res.max_rss_qs / (num_vfs + 1));
4006 
4007 		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4008 		 * RSS Tables per port. Provide RSS on VFs, only if number of
4009 		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4010 		 */
4011 		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4012 			num_vf_qs = 1;
4013 	}
4014 
4015 	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4016 	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4017 	 */
4018 	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4019 				  RESOURCE_MODIFIABLE, 0);
4020 
4021 	/* If RSS IFACE capability flags are modifiable for a VF, set the
4022 	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4023 	 * more than 1 RSSQ is available for a VF.
4024 	 * Otherwise, provision only 1 queue pair for VF.
4025 	 */
4026 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4027 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4028 		if (num_vf_qs > 1) {
4029 			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4030 			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4031 				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4032 		} else {
4033 			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4034 					     BE_IF_FLAGS_DEFQ_RSS);
4035 		}
4036 	} else {
4037 		num_vf_qs = 1;
4038 	}
4039 
4040 	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4041 		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4042 		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4043 	}
4044 
4045 	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4046 	vft_res->max_rx_qs = num_vf_qs;
4047 	vft_res->max_rss_qs = num_vf_qs;
4048 	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4049 	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4050 
4051 	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4052 	 * among the PF and it's VFs, if the fields are changeable
4053 	 */
4054 	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4055 		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4056 
4057 	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4058 		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4059 
4060 	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4061 		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4062 
4063 	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4064 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4065 }
4066 
4067 static void be_if_destroy(struct be_adapter *adapter)
4068 {
4069 	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4070 
4071 	kfree(adapter->pmac_id);
4072 	adapter->pmac_id = NULL;
4073 
4074 	kfree(adapter->mc_list);
4075 	adapter->mc_list = NULL;
4076 
4077 	kfree(adapter->uc_list);
4078 	adapter->uc_list = NULL;
4079 }
4080 
4081 static int be_clear(struct be_adapter *adapter)
4082 {
4083 	struct pci_dev *pdev = adapter->pdev;
4084 	struct  be_resources vft_res = {0};
4085 
4086 	be_cancel_worker(adapter);
4087 
4088 	flush_workqueue(be_wq);
4089 
4090 	if (sriov_enabled(adapter))
4091 		be_vf_clear(adapter);
4092 
4093 	/* Re-configure FW to distribute resources evenly across max-supported
4094 	 * number of VFs, only when VFs are not already enabled.
4095 	 */
4096 	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4097 	    !pci_vfs_assigned(pdev)) {
4098 		be_calculate_vf_res(adapter,
4099 				    pci_sriov_get_totalvfs(pdev),
4100 				    &vft_res);
4101 		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4102 					pci_sriov_get_totalvfs(pdev),
4103 					&vft_res);
4104 	}
4105 
4106 	be_disable_vxlan_offloads(adapter);
4107 
4108 	be_if_destroy(adapter);
4109 
4110 	be_clear_queues(adapter);
4111 
4112 	be_msix_disable(adapter);
4113 	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4114 	return 0;
4115 }
4116 
4117 static int be_vfs_if_create(struct be_adapter *adapter)
4118 {
4119 	struct be_resources res = {0};
4120 	u32 cap_flags, en_flags, vf;
4121 	struct be_vf_cfg *vf_cfg;
4122 	int status;
4123 
4124 	/* If a FW profile exists, then cap_flags are updated */
4125 	cap_flags = BE_VF_IF_EN_FLAGS;
4126 
4127 	for_all_vfs(adapter, vf_cfg, vf) {
4128 		if (!BE3_chip(adapter)) {
4129 			status = be_cmd_get_profile_config(adapter, &res, NULL,
4130 							   ACTIVE_PROFILE_TYPE,
4131 							   RESOURCE_LIMITS,
4132 							   vf + 1);
4133 			if (!status) {
4134 				cap_flags = res.if_cap_flags;
4135 				/* Prevent VFs from enabling VLAN promiscuous
4136 				 * mode
4137 				 */
4138 				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4139 			}
4140 		}
4141 
4142 		/* PF should enable IF flags during proxy if_create call */
4143 		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4144 		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4145 					  &vf_cfg->if_handle, vf + 1);
4146 		if (status)
4147 			return status;
4148 	}
4149 
4150 	return 0;
4151 }
4152 
4153 static int be_vf_setup_init(struct be_adapter *adapter)
4154 {
4155 	struct be_vf_cfg *vf_cfg;
4156 	int vf;
4157 
4158 	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4159 				  GFP_KERNEL);
4160 	if (!adapter->vf_cfg)
4161 		return -ENOMEM;
4162 
4163 	for_all_vfs(adapter, vf_cfg, vf) {
4164 		vf_cfg->if_handle = -1;
4165 		vf_cfg->pmac_id = -1;
4166 	}
4167 	return 0;
4168 }
4169 
4170 static int be_vf_setup(struct be_adapter *adapter)
4171 {
4172 	struct device *dev = &adapter->pdev->dev;
4173 	struct be_vf_cfg *vf_cfg;
4174 	int status, old_vfs, vf;
4175 	bool spoofchk;
4176 
4177 	old_vfs = pci_num_vf(adapter->pdev);
4178 
4179 	status = be_vf_setup_init(adapter);
4180 	if (status)
4181 		goto err;
4182 
4183 	if (old_vfs) {
4184 		for_all_vfs(adapter, vf_cfg, vf) {
4185 			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4186 			if (status)
4187 				goto err;
4188 		}
4189 
4190 		status = be_vfs_mac_query(adapter);
4191 		if (status)
4192 			goto err;
4193 	} else {
4194 		status = be_vfs_if_create(adapter);
4195 		if (status)
4196 			goto err;
4197 
4198 		status = be_vf_eth_addr_config(adapter);
4199 		if (status)
4200 			goto err;
4201 	}
4202 
4203 	for_all_vfs(adapter, vf_cfg, vf) {
4204 		/* Allow VFs to programs MAC/VLAN filters */
4205 		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4206 						  vf + 1);
4207 		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4208 			status = be_cmd_set_fn_privileges(adapter,
4209 							  vf_cfg->privileges |
4210 							  BE_PRIV_FILTMGMT,
4211 							  vf + 1);
4212 			if (!status) {
4213 				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4214 				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4215 					 vf);
4216 			}
4217 		}
4218 
4219 		/* Allow full available bandwidth */
4220 		if (!old_vfs)
4221 			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4222 
4223 		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4224 					       vf_cfg->if_handle, NULL,
4225 					       &spoofchk);
4226 		if (!status)
4227 			vf_cfg->spoofchk = spoofchk;
4228 
4229 		if (!old_vfs) {
4230 			be_cmd_enable_vf(adapter, vf + 1);
4231 			be_cmd_set_logical_link_config(adapter,
4232 						       IFLA_VF_LINK_STATE_AUTO,
4233 						       vf+1);
4234 		}
4235 	}
4236 
4237 	if (!old_vfs) {
4238 		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4239 		if (status) {
4240 			dev_err(dev, "SRIOV enable failed\n");
4241 			adapter->num_vfs = 0;
4242 			goto err;
4243 		}
4244 	}
4245 
4246 	if (BE3_chip(adapter)) {
4247 		/* On BE3, enable VEB only when SRIOV is enabled */
4248 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4249 					       adapter->if_handle,
4250 					       PORT_FWD_TYPE_VEB, 0);
4251 		if (status)
4252 			goto err;
4253 	}
4254 
4255 	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4256 	return 0;
4257 err:
4258 	dev_err(dev, "VF setup failed\n");
4259 	be_vf_clear(adapter);
4260 	return status;
4261 }
4262 
4263 /* Converting function_mode bits on BE3 to SH mc_type enums */
4264 
4265 static u8 be_convert_mc_type(u32 function_mode)
4266 {
4267 	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4268 		return vNIC1;
4269 	else if (function_mode & QNQ_MODE)
4270 		return FLEX10;
4271 	else if (function_mode & VNIC_MODE)
4272 		return vNIC2;
4273 	else if (function_mode & UMC_ENABLED)
4274 		return UMC;
4275 	else
4276 		return MC_NONE;
4277 }
4278 
4279 /* On BE2/BE3 FW does not suggest the supported limits */
4280 static void BEx_get_resources(struct be_adapter *adapter,
4281 			      struct be_resources *res)
4282 {
4283 	bool use_sriov = adapter->num_vfs ? 1 : 0;
4284 
4285 	if (be_physfn(adapter))
4286 		res->max_uc_mac = BE_UC_PMAC_COUNT;
4287 	else
4288 		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4289 
4290 	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4291 
4292 	if (be_is_mc(adapter)) {
4293 		/* Assuming that there are 4 channels per port,
4294 		 * when multi-channel is enabled
4295 		 */
4296 		if (be_is_qnq_mode(adapter))
4297 			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4298 		else
4299 			/* In a non-qnq multichannel mode, the pvid
4300 			 * takes up one vlan entry
4301 			 */
4302 			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4303 	} else {
4304 		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4305 	}
4306 
4307 	res->max_mcast_mac = BE_MAX_MC;
4308 
4309 	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4310 	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4311 	 *    *only* if it is RSS-capable.
4312 	 */
4313 	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4314 	    be_virtfn(adapter) ||
4315 	    (be_is_mc(adapter) &&
4316 	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4317 		res->max_tx_qs = 1;
4318 	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4319 		struct be_resources super_nic_res = {0};
4320 
4321 		/* On a SuperNIC profile, the driver needs to use the
4322 		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4323 		 */
4324 		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4325 					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4326 					  0);
4327 		/* Some old versions of BE3 FW don't report max_tx_qs value */
4328 		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4329 	} else {
4330 		res->max_tx_qs = BE3_MAX_TX_QS;
4331 	}
4332 
4333 	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4334 	    !use_sriov && be_physfn(adapter))
4335 		res->max_rss_qs = (adapter->be3_native) ?
4336 					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4337 	res->max_rx_qs = res->max_rss_qs + 1;
4338 
4339 	if (be_physfn(adapter))
4340 		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4341 					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4342 	else
4343 		res->max_evt_qs = 1;
4344 
4345 	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4346 	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4347 	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4348 		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4349 }
4350 
4351 static void be_setup_init(struct be_adapter *adapter)
4352 {
4353 	adapter->vlan_prio_bmap = 0xff;
4354 	adapter->phy.link_speed = -1;
4355 	adapter->if_handle = -1;
4356 	adapter->be3_native = false;
4357 	adapter->if_flags = 0;
4358 	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4359 	if (be_physfn(adapter))
4360 		adapter->cmd_privileges = MAX_PRIVILEGES;
4361 	else
4362 		adapter->cmd_privileges = MIN_PRIVILEGES;
4363 }
4364 
4365 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4366  * However, this HW limitation is not exposed to the host via any SLI cmd.
4367  * As a result, in the case of SRIOV and in particular multi-partition configs
4368  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4369  * for distribution between the VFs. This self-imposed limit will determine the
4370  * no: of VFs for which RSS can be enabled.
4371  */
4372 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4373 {
4374 	struct be_port_resources port_res = {0};
4375 	u8 rss_tables_on_port;
4376 	u16 max_vfs = be_max_vfs(adapter);
4377 
4378 	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4379 				  RESOURCE_LIMITS, 0);
4380 
4381 	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4382 
4383 	/* Each PF Pool's RSS Tables limit =
4384 	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4385 	 */
4386 	adapter->pool_res.max_rss_tables =
4387 		max_vfs * rss_tables_on_port / port_res.max_vfs;
4388 }
4389 
4390 static int be_get_sriov_config(struct be_adapter *adapter)
4391 {
4392 	struct be_resources res = {0};
4393 	int max_vfs, old_vfs;
4394 
4395 	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4396 				  RESOURCE_LIMITS, 0);
4397 
4398 	/* Some old versions of BE3 FW don't report max_vfs value */
4399 	if (BE3_chip(adapter) && !res.max_vfs) {
4400 		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4401 		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4402 	}
4403 
4404 	adapter->pool_res = res;
4405 
4406 	/* If during previous unload of the driver, the VFs were not disabled,
4407 	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4408 	 * Instead use the TotalVFs value stored in the pci-dev struct.
4409 	 */
4410 	old_vfs = pci_num_vf(adapter->pdev);
4411 	if (old_vfs) {
4412 		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4413 			 old_vfs);
4414 
4415 		adapter->pool_res.max_vfs =
4416 			pci_sriov_get_totalvfs(adapter->pdev);
4417 		adapter->num_vfs = old_vfs;
4418 	}
4419 
4420 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4421 		be_calculate_pf_pool_rss_tables(adapter);
4422 		dev_info(&adapter->pdev->dev,
4423 			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4424 			 be_max_pf_pool_rss_tables(adapter));
4425 	}
4426 	return 0;
4427 }
4428 
4429 static void be_alloc_sriov_res(struct be_adapter *adapter)
4430 {
4431 	int old_vfs = pci_num_vf(adapter->pdev);
4432 	struct  be_resources vft_res = {0};
4433 	int status;
4434 
4435 	be_get_sriov_config(adapter);
4436 
4437 	if (!old_vfs)
4438 		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4439 
4440 	/* When the HW is in SRIOV capable configuration, the PF-pool
4441 	 * resources are given to PF during driver load, if there are no
4442 	 * old VFs. This facility is not available in BE3 FW.
4443 	 * Also, this is done by FW in Lancer chip.
4444 	 */
4445 	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4446 		be_calculate_vf_res(adapter, 0, &vft_res);
4447 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4448 						 &vft_res);
4449 		if (status)
4450 			dev_err(&adapter->pdev->dev,
4451 				"Failed to optimize SRIOV resources\n");
4452 	}
4453 }
4454 
4455 static int be_get_resources(struct be_adapter *adapter)
4456 {
4457 	struct device *dev = &adapter->pdev->dev;
4458 	struct be_resources res = {0};
4459 	int status;
4460 
4461 	/* For Lancer, SH etc read per-function resource limits from FW.
4462 	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4463 	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4464 	 */
4465 	if (BEx_chip(adapter)) {
4466 		BEx_get_resources(adapter, &res);
4467 	} else {
4468 		status = be_cmd_get_func_config(adapter, &res);
4469 		if (status)
4470 			return status;
4471 
4472 		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4473 		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4474 		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4475 			res.max_rss_qs -= 1;
4476 	}
4477 
4478 	/* If RoCE is supported stash away half the EQs for RoCE */
4479 	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4480 				res.max_evt_qs / 2 : res.max_evt_qs;
4481 	adapter->res = res;
4482 
4483 	/* If FW supports RSS default queue, then skip creating non-RSS
4484 	 * queue for non-IP traffic.
4485 	 */
4486 	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4487 				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4488 
4489 	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4490 		 be_max_txqs(adapter), be_max_rxqs(adapter),
4491 		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4492 		 be_max_vfs(adapter));
4493 	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4494 		 be_max_uc(adapter), be_max_mc(adapter),
4495 		 be_max_vlans(adapter));
4496 
4497 	/* Ensure RX and TX queues are created in pairs at init time */
4498 	adapter->cfg_num_rx_irqs =
4499 				min_t(u16, netif_get_num_default_rss_queues(),
4500 				      be_max_qp_irqs(adapter));
4501 	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4502 	return 0;
4503 }
4504 
4505 static int be_get_config(struct be_adapter *adapter)
4506 {
4507 	int status, level;
4508 	u16 profile_id;
4509 
4510 	status = be_cmd_get_cntl_attributes(adapter);
4511 	if (status)
4512 		return status;
4513 
4514 	status = be_cmd_query_fw_cfg(adapter);
4515 	if (status)
4516 		return status;
4517 
4518 	if (!lancer_chip(adapter) && be_physfn(adapter))
4519 		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4520 
4521 	if (BEx_chip(adapter)) {
4522 		level = be_cmd_get_fw_log_level(adapter);
4523 		adapter->msg_enable =
4524 			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4525 	}
4526 
4527 	be_cmd_get_acpi_wol_cap(adapter);
4528 	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4529 	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4530 
4531 	be_cmd_query_port_name(adapter);
4532 
4533 	if (be_physfn(adapter)) {
4534 		status = be_cmd_get_active_profile(adapter, &profile_id);
4535 		if (!status)
4536 			dev_info(&adapter->pdev->dev,
4537 				 "Using profile 0x%x\n", profile_id);
4538 	}
4539 
4540 	return 0;
4541 }
4542 
4543 static int be_mac_setup(struct be_adapter *adapter)
4544 {
4545 	u8 mac[ETH_ALEN];
4546 	int status;
4547 
4548 	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4549 		status = be_cmd_get_perm_mac(adapter, mac);
4550 		if (status)
4551 			return status;
4552 
4553 		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4554 		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4555 	}
4556 
4557 	return 0;
4558 }
4559 
4560 static void be_schedule_worker(struct be_adapter *adapter)
4561 {
4562 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4563 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4564 }
4565 
4566 static void be_destroy_err_recovery_workq(void)
4567 {
4568 	if (!be_err_recovery_workq)
4569 		return;
4570 
4571 	flush_workqueue(be_err_recovery_workq);
4572 	destroy_workqueue(be_err_recovery_workq);
4573 	be_err_recovery_workq = NULL;
4574 }
4575 
4576 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4577 {
4578 	struct be_error_recovery *err_rec = &adapter->error_recovery;
4579 
4580 	if (!be_err_recovery_workq)
4581 		return;
4582 
4583 	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4584 			   msecs_to_jiffies(delay));
4585 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4586 }
4587 
4588 static int be_setup_queues(struct be_adapter *adapter)
4589 {
4590 	struct net_device *netdev = adapter->netdev;
4591 	int status;
4592 
4593 	status = be_evt_queues_create(adapter);
4594 	if (status)
4595 		goto err;
4596 
4597 	status = be_tx_qs_create(adapter);
4598 	if (status)
4599 		goto err;
4600 
4601 	status = be_rx_cqs_create(adapter);
4602 	if (status)
4603 		goto err;
4604 
4605 	status = be_mcc_queues_create(adapter);
4606 	if (status)
4607 		goto err;
4608 
4609 	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4610 	if (status)
4611 		goto err;
4612 
4613 	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4614 	if (status)
4615 		goto err;
4616 
4617 	return 0;
4618 err:
4619 	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4620 	return status;
4621 }
4622 
4623 static int be_if_create(struct be_adapter *adapter)
4624 {
4625 	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4626 	u32 cap_flags = be_if_cap_flags(adapter);
4627 	int status;
4628 
4629 	/* alloc required memory for other filtering fields */
4630 	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4631 				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4632 	if (!adapter->pmac_id)
4633 		return -ENOMEM;
4634 
4635 	adapter->mc_list = kcalloc(be_max_mc(adapter),
4636 				   sizeof(*adapter->mc_list), GFP_KERNEL);
4637 	if (!adapter->mc_list)
4638 		return -ENOMEM;
4639 
4640 	adapter->uc_list = kcalloc(be_max_uc(adapter),
4641 				   sizeof(*adapter->uc_list), GFP_KERNEL);
4642 	if (!adapter->uc_list)
4643 		return -ENOMEM;
4644 
4645 	if (adapter->cfg_num_rx_irqs == 1)
4646 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4647 
4648 	en_flags &= cap_flags;
4649 	/* will enable all the needed filter flags in be_open() */
4650 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4651 				  &adapter->if_handle, 0);
4652 
4653 	if (status)
4654 		return status;
4655 
4656 	return 0;
4657 }
4658 
4659 int be_update_queues(struct be_adapter *adapter)
4660 {
4661 	struct net_device *netdev = adapter->netdev;
4662 	int status;
4663 
4664 	if (netif_running(netdev))
4665 		be_close(netdev);
4666 
4667 	be_cancel_worker(adapter);
4668 
4669 	/* If any vectors have been shared with RoCE we cannot re-program
4670 	 * the MSIx table.
4671 	 */
4672 	if (!adapter->num_msix_roce_vec)
4673 		be_msix_disable(adapter);
4674 
4675 	be_clear_queues(adapter);
4676 	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4677 	if (status)
4678 		return status;
4679 
4680 	if (!msix_enabled(adapter)) {
4681 		status = be_msix_enable(adapter);
4682 		if (status)
4683 			return status;
4684 	}
4685 
4686 	status = be_if_create(adapter);
4687 	if (status)
4688 		return status;
4689 
4690 	status = be_setup_queues(adapter);
4691 	if (status)
4692 		return status;
4693 
4694 	be_schedule_worker(adapter);
4695 
4696 	if (netif_running(netdev))
4697 		status = be_open(netdev);
4698 
4699 	return status;
4700 }
4701 
4702 static inline int fw_major_num(const char *fw_ver)
4703 {
4704 	int fw_major = 0, i;
4705 
4706 	i = sscanf(fw_ver, "%d.", &fw_major);
4707 	if (i != 1)
4708 		return 0;
4709 
4710 	return fw_major;
4711 }
4712 
4713 /* If it is error recovery, FLR the PF
4714  * Else if any VFs are already enabled don't FLR the PF
4715  */
4716 static bool be_reset_required(struct be_adapter *adapter)
4717 {
4718 	if (be_error_recovering(adapter))
4719 		return true;
4720 	else
4721 		return pci_num_vf(adapter->pdev) == 0;
4722 }
4723 
4724 /* Wait for the FW to be ready and perform the required initialization */
4725 static int be_func_init(struct be_adapter *adapter)
4726 {
4727 	int status;
4728 
4729 	status = be_fw_wait_ready(adapter);
4730 	if (status)
4731 		return status;
4732 
4733 	/* FW is now ready; clear errors to allow cmds/doorbell */
4734 	be_clear_error(adapter, BE_CLEAR_ALL);
4735 
4736 	if (be_reset_required(adapter)) {
4737 		status = be_cmd_reset_function(adapter);
4738 		if (status)
4739 			return status;
4740 
4741 		/* Wait for interrupts to quiesce after an FLR */
4742 		msleep(100);
4743 	}
4744 
4745 	/* Tell FW we're ready to fire cmds */
4746 	status = be_cmd_fw_init(adapter);
4747 	if (status)
4748 		return status;
4749 
4750 	/* Allow interrupts for other ULPs running on NIC function */
4751 	be_intr_set(adapter, true);
4752 
4753 	return 0;
4754 }
4755 
4756 static int be_setup(struct be_adapter *adapter)
4757 {
4758 	struct device *dev = &adapter->pdev->dev;
4759 	int status;
4760 
4761 	status = be_func_init(adapter);
4762 	if (status)
4763 		return status;
4764 
4765 	be_setup_init(adapter);
4766 
4767 	if (!lancer_chip(adapter))
4768 		be_cmd_req_native_mode(adapter);
4769 
4770 	/* invoke this cmd first to get pf_num and vf_num which are needed
4771 	 * for issuing profile related cmds
4772 	 */
4773 	if (!BEx_chip(adapter)) {
4774 		status = be_cmd_get_func_config(adapter, NULL);
4775 		if (status)
4776 			return status;
4777 	}
4778 
4779 	status = be_get_config(adapter);
4780 	if (status)
4781 		goto err;
4782 
4783 	if (!BE2_chip(adapter) && be_physfn(adapter))
4784 		be_alloc_sriov_res(adapter);
4785 
4786 	status = be_get_resources(adapter);
4787 	if (status)
4788 		goto err;
4789 
4790 	status = be_msix_enable(adapter);
4791 	if (status)
4792 		goto err;
4793 
4794 	/* will enable all the needed filter flags in be_open() */
4795 	status = be_if_create(adapter);
4796 	if (status)
4797 		goto err;
4798 
4799 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4800 	rtnl_lock();
4801 	status = be_setup_queues(adapter);
4802 	rtnl_unlock();
4803 	if (status)
4804 		goto err;
4805 
4806 	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4807 
4808 	status = be_mac_setup(adapter);
4809 	if (status)
4810 		goto err;
4811 
4812 	be_cmd_get_fw_ver(adapter);
4813 	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4814 
4815 	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4816 		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4817 			adapter->fw_ver);
4818 		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4819 	}
4820 
4821 	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4822 					 adapter->rx_fc);
4823 	if (status)
4824 		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4825 					&adapter->rx_fc);
4826 
4827 	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4828 		 adapter->tx_fc, adapter->rx_fc);
4829 
4830 	if (be_physfn(adapter))
4831 		be_cmd_set_logical_link_config(adapter,
4832 					       IFLA_VF_LINK_STATE_AUTO, 0);
4833 
4834 	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4835 	 * confusing a linux bridge or OVS that it might be connected to.
4836 	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4837 	 * when SRIOV is not enabled.
4838 	 */
4839 	if (BE3_chip(adapter))
4840 		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4841 				      PORT_FWD_TYPE_PASSTHRU, 0);
4842 
4843 	if (adapter->num_vfs)
4844 		be_vf_setup(adapter);
4845 
4846 	status = be_cmd_get_phy_info(adapter);
4847 	if (!status && be_pause_supported(adapter))
4848 		adapter->phy.fc_autoneg = 1;
4849 
4850 	if (be_physfn(adapter) && !lancer_chip(adapter))
4851 		be_cmd_set_features(adapter);
4852 
4853 	be_schedule_worker(adapter);
4854 	adapter->flags |= BE_FLAGS_SETUP_DONE;
4855 	return 0;
4856 err:
4857 	be_clear(adapter);
4858 	return status;
4859 }
4860 
4861 #ifdef CONFIG_NET_POLL_CONTROLLER
4862 static void be_netpoll(struct net_device *netdev)
4863 {
4864 	struct be_adapter *adapter = netdev_priv(netdev);
4865 	struct be_eq_obj *eqo;
4866 	int i;
4867 
4868 	for_all_evt_queues(adapter, eqo, i) {
4869 		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4870 		napi_schedule(&eqo->napi);
4871 	}
4872 }
4873 #endif
4874 
4875 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4876 {
4877 	const struct firmware *fw;
4878 	int status;
4879 
4880 	if (!netif_running(adapter->netdev)) {
4881 		dev_err(&adapter->pdev->dev,
4882 			"Firmware load not allowed (interface is down)\n");
4883 		return -ENETDOWN;
4884 	}
4885 
4886 	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4887 	if (status)
4888 		goto fw_exit;
4889 
4890 	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4891 
4892 	if (lancer_chip(adapter))
4893 		status = lancer_fw_download(adapter, fw);
4894 	else
4895 		status = be_fw_download(adapter, fw);
4896 
4897 	if (!status)
4898 		be_cmd_get_fw_ver(adapter);
4899 
4900 fw_exit:
4901 	release_firmware(fw);
4902 	return status;
4903 }
4904 
4905 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4906 				 u16 flags)
4907 {
4908 	struct be_adapter *adapter = netdev_priv(dev);
4909 	struct nlattr *attr, *br_spec;
4910 	int rem;
4911 	int status = 0;
4912 	u16 mode = 0;
4913 
4914 	if (!sriov_enabled(adapter))
4915 		return -EOPNOTSUPP;
4916 
4917 	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4918 	if (!br_spec)
4919 		return -EINVAL;
4920 
4921 	nla_for_each_nested(attr, br_spec, rem) {
4922 		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4923 			continue;
4924 
4925 		if (nla_len(attr) < sizeof(mode))
4926 			return -EINVAL;
4927 
4928 		mode = nla_get_u16(attr);
4929 		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4930 			return -EOPNOTSUPP;
4931 
4932 		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4933 			return -EINVAL;
4934 
4935 		status = be_cmd_set_hsw_config(adapter, 0, 0,
4936 					       adapter->if_handle,
4937 					       mode == BRIDGE_MODE_VEPA ?
4938 					       PORT_FWD_TYPE_VEPA :
4939 					       PORT_FWD_TYPE_VEB, 0);
4940 		if (status)
4941 			goto err;
4942 
4943 		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4944 			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4945 
4946 		return status;
4947 	}
4948 err:
4949 	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4950 		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4951 
4952 	return status;
4953 }
4954 
4955 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4956 				 struct net_device *dev, u32 filter_mask,
4957 				 int nlflags)
4958 {
4959 	struct be_adapter *adapter = netdev_priv(dev);
4960 	int status = 0;
4961 	u8 hsw_mode;
4962 
4963 	/* BE and Lancer chips support VEB mode only */
4964 	if (BEx_chip(adapter) || lancer_chip(adapter)) {
4965 		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4966 		if (!pci_sriov_get_totalvfs(adapter->pdev))
4967 			return 0;
4968 		hsw_mode = PORT_FWD_TYPE_VEB;
4969 	} else {
4970 		status = be_cmd_get_hsw_config(adapter, NULL, 0,
4971 					       adapter->if_handle, &hsw_mode,
4972 					       NULL);
4973 		if (status)
4974 			return 0;
4975 
4976 		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4977 			return 0;
4978 	}
4979 
4980 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4981 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
4982 				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4983 				       0, 0, nlflags, filter_mask, NULL);
4984 }
4985 
4986 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4987 					 void (*func)(struct work_struct *))
4988 {
4989 	struct be_cmd_work *work;
4990 
4991 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
4992 	if (!work) {
4993 		dev_err(&adapter->pdev->dev,
4994 			"be_work memory allocation failed\n");
4995 		return NULL;
4996 	}
4997 
4998 	INIT_WORK(&work->work, func);
4999 	work->adapter = adapter;
5000 	return work;
5001 }
5002 
5003 /* VxLAN offload Notes:
5004  *
5005  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5006  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5007  * is expected to work across all types of IP tunnels once exported. Skyhawk
5008  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5009  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5010  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5011  * those other tunnels are unexported on the fly through ndo_features_check().
5012  *
5013  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5014  * adds more than one port, disable offloads and don't re-enable them again
5015  * until after all the tunnels are removed.
5016  */
5017 static void be_work_add_vxlan_port(struct work_struct *work)
5018 {
5019 	struct be_cmd_work *cmd_work =
5020 				container_of(work, struct be_cmd_work, work);
5021 	struct be_adapter *adapter = cmd_work->adapter;
5022 	struct net_device *netdev = adapter->netdev;
5023 	struct device *dev = &adapter->pdev->dev;
5024 	__be16 port = cmd_work->info.vxlan_port;
5025 	int status;
5026 
5027 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5028 		adapter->vxlan_port_aliases++;
5029 		goto done;
5030 	}
5031 
5032 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5033 		dev_info(dev,
5034 			 "Only one UDP port supported for VxLAN offloads\n");
5035 		dev_info(dev, "Disabling VxLAN offloads\n");
5036 		adapter->vxlan_port_count++;
5037 		goto err;
5038 	}
5039 
5040 	if (adapter->vxlan_port_count++ >= 1)
5041 		goto done;
5042 
5043 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
5044 				     OP_CONVERT_NORMAL_TO_TUNNEL);
5045 	if (status) {
5046 		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5047 		goto err;
5048 	}
5049 
5050 	status = be_cmd_set_vxlan_port(adapter, port);
5051 	if (status) {
5052 		dev_warn(dev, "Failed to add VxLAN port\n");
5053 		goto err;
5054 	}
5055 	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5056 	adapter->vxlan_port = port;
5057 
5058 	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5059 				   NETIF_F_TSO | NETIF_F_TSO6 |
5060 				   NETIF_F_GSO_UDP_TUNNEL;
5061 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5062 	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5063 
5064 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5065 		 be16_to_cpu(port));
5066 	goto done;
5067 err:
5068 	be_disable_vxlan_offloads(adapter);
5069 done:
5070 	kfree(cmd_work);
5071 }
5072 
5073 static void be_work_del_vxlan_port(struct work_struct *work)
5074 {
5075 	struct be_cmd_work *cmd_work =
5076 				container_of(work, struct be_cmd_work, work);
5077 	struct be_adapter *adapter = cmd_work->adapter;
5078 	__be16 port = cmd_work->info.vxlan_port;
5079 
5080 	if (adapter->vxlan_port != port)
5081 		goto done;
5082 
5083 	if (adapter->vxlan_port_aliases) {
5084 		adapter->vxlan_port_aliases--;
5085 		goto out;
5086 	}
5087 
5088 	be_disable_vxlan_offloads(adapter);
5089 
5090 	dev_info(&adapter->pdev->dev,
5091 		 "Disabled VxLAN offloads for UDP port %d\n",
5092 		 be16_to_cpu(port));
5093 done:
5094 	adapter->vxlan_port_count--;
5095 out:
5096 	kfree(cmd_work);
5097 }
5098 
5099 static void be_cfg_vxlan_port(struct net_device *netdev,
5100 			      struct udp_tunnel_info *ti,
5101 			      void (*func)(struct work_struct *))
5102 {
5103 	struct be_adapter *adapter = netdev_priv(netdev);
5104 	struct be_cmd_work *cmd_work;
5105 
5106 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5107 		return;
5108 
5109 	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5110 		return;
5111 
5112 	cmd_work = be_alloc_work(adapter, func);
5113 	if (cmd_work) {
5114 		cmd_work->info.vxlan_port = ti->port;
5115 		queue_work(be_wq, &cmd_work->work);
5116 	}
5117 }
5118 
5119 static void be_del_vxlan_port(struct net_device *netdev,
5120 			      struct udp_tunnel_info *ti)
5121 {
5122 	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5123 }
5124 
5125 static void be_add_vxlan_port(struct net_device *netdev,
5126 			      struct udp_tunnel_info *ti)
5127 {
5128 	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5129 }
5130 
5131 static netdev_features_t be_features_check(struct sk_buff *skb,
5132 					   struct net_device *dev,
5133 					   netdev_features_t features)
5134 {
5135 	struct be_adapter *adapter = netdev_priv(dev);
5136 	u8 l4_hdr = 0;
5137 
5138 	/* The code below restricts offload features for some tunneled packets.
5139 	 * Offload features for normal (non tunnel) packets are unchanged.
5140 	 */
5141 	if (!skb->encapsulation ||
5142 	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5143 		return features;
5144 
5145 	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5146 	 * should disable tunnel offload features if it's not a VxLAN packet,
5147 	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5148 	 * allow other tunneled traffic like GRE work fine while VxLAN
5149 	 * offloads are configured in Skyhawk-R.
5150 	 */
5151 	switch (vlan_get_protocol(skb)) {
5152 	case htons(ETH_P_IP):
5153 		l4_hdr = ip_hdr(skb)->protocol;
5154 		break;
5155 	case htons(ETH_P_IPV6):
5156 		l4_hdr = ipv6_hdr(skb)->nexthdr;
5157 		break;
5158 	default:
5159 		return features;
5160 	}
5161 
5162 	if (l4_hdr != IPPROTO_UDP ||
5163 	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5164 	    skb->inner_protocol != htons(ETH_P_TEB) ||
5165 	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5166 	    sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5167 		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5168 
5169 	return features;
5170 }
5171 
5172 static int be_get_phys_port_id(struct net_device *dev,
5173 			       struct netdev_phys_item_id *ppid)
5174 {
5175 	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5176 	struct be_adapter *adapter = netdev_priv(dev);
5177 	u8 *id;
5178 
5179 	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5180 		return -ENOSPC;
5181 
5182 	ppid->id[0] = adapter->hba_port_num + 1;
5183 	id = &ppid->id[1];
5184 	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5185 	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5186 		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5187 
5188 	ppid->id_len = id_len;
5189 
5190 	return 0;
5191 }
5192 
5193 static void be_set_rx_mode(struct net_device *dev)
5194 {
5195 	struct be_adapter *adapter = netdev_priv(dev);
5196 	struct be_cmd_work *work;
5197 
5198 	work = be_alloc_work(adapter, be_work_set_rx_mode);
5199 	if (work)
5200 		queue_work(be_wq, &work->work);
5201 }
5202 
5203 static const struct net_device_ops be_netdev_ops = {
5204 	.ndo_open		= be_open,
5205 	.ndo_stop		= be_close,
5206 	.ndo_start_xmit		= be_xmit,
5207 	.ndo_set_rx_mode	= be_set_rx_mode,
5208 	.ndo_set_mac_address	= be_mac_addr_set,
5209 	.ndo_change_mtu		= be_change_mtu,
5210 	.ndo_get_stats64	= be_get_stats64,
5211 	.ndo_validate_addr	= eth_validate_addr,
5212 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5213 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5214 	.ndo_set_vf_mac		= be_set_vf_mac,
5215 	.ndo_set_vf_vlan	= be_set_vf_vlan,
5216 	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5217 	.ndo_get_vf_config	= be_get_vf_config,
5218 	.ndo_set_vf_link_state  = be_set_vf_link_state,
5219 	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5220 #ifdef CONFIG_NET_POLL_CONTROLLER
5221 	.ndo_poll_controller	= be_netpoll,
5222 #endif
5223 	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5224 	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5225 #ifdef CONFIG_NET_RX_BUSY_POLL
5226 	.ndo_busy_poll		= be_busy_poll,
5227 #endif
5228 	.ndo_udp_tunnel_add	= be_add_vxlan_port,
5229 	.ndo_udp_tunnel_del	= be_del_vxlan_port,
5230 	.ndo_features_check	= be_features_check,
5231 	.ndo_get_phys_port_id   = be_get_phys_port_id,
5232 };
5233 
5234 static void be_netdev_init(struct net_device *netdev)
5235 {
5236 	struct be_adapter *adapter = netdev_priv(netdev);
5237 
5238 	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5239 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5240 		NETIF_F_HW_VLAN_CTAG_TX;
5241 	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5242 		netdev->hw_features |= NETIF_F_RXHASH;
5243 
5244 	netdev->features |= netdev->hw_features |
5245 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5246 
5247 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5248 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5249 
5250 	netdev->priv_flags |= IFF_UNICAST_FLT;
5251 
5252 	netdev->flags |= IFF_MULTICAST;
5253 
5254 	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5255 
5256 	netdev->netdev_ops = &be_netdev_ops;
5257 
5258 	netdev->ethtool_ops = &be_ethtool_ops;
5259 }
5260 
5261 static void be_cleanup(struct be_adapter *adapter)
5262 {
5263 	struct net_device *netdev = adapter->netdev;
5264 
5265 	rtnl_lock();
5266 	netif_device_detach(netdev);
5267 	if (netif_running(netdev))
5268 		be_close(netdev);
5269 	rtnl_unlock();
5270 
5271 	be_clear(adapter);
5272 }
5273 
5274 static int be_resume(struct be_adapter *adapter)
5275 {
5276 	struct net_device *netdev = adapter->netdev;
5277 	int status;
5278 
5279 	status = be_setup(adapter);
5280 	if (status)
5281 		return status;
5282 
5283 	rtnl_lock();
5284 	if (netif_running(netdev))
5285 		status = be_open(netdev);
5286 	rtnl_unlock();
5287 
5288 	if (status)
5289 		return status;
5290 
5291 	netif_device_attach(netdev);
5292 
5293 	return 0;
5294 }
5295 
5296 static void be_soft_reset(struct be_adapter *adapter)
5297 {
5298 	u32 val;
5299 
5300 	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5301 	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5302 	val |= SLIPORT_SOFTRESET_SR_MASK;
5303 	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5304 }
5305 
5306 static bool be_err_is_recoverable(struct be_adapter *adapter)
5307 {
5308 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5309 	unsigned long initial_idle_time =
5310 		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5311 	unsigned long recovery_interval =
5312 		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5313 	u16 ue_err_code;
5314 	u32 val;
5315 
5316 	val = be_POST_stage_get(adapter);
5317 	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5318 		return false;
5319 	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5320 	if (ue_err_code == 0)
5321 		return false;
5322 
5323 	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5324 		ue_err_code);
5325 
5326 	if (jiffies - err_rec->probe_time <= initial_idle_time) {
5327 		dev_err(&adapter->pdev->dev,
5328 			"Cannot recover within %lu sec from driver load\n",
5329 			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5330 		return false;
5331 	}
5332 
5333 	if (err_rec->last_recovery_time &&
5334 	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5335 		dev_err(&adapter->pdev->dev,
5336 			"Cannot recover within %lu sec from last recovery\n",
5337 			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5338 		return false;
5339 	}
5340 
5341 	if (ue_err_code == err_rec->last_err_code) {
5342 		dev_err(&adapter->pdev->dev,
5343 			"Cannot recover from a consecutive TPE error\n");
5344 		return false;
5345 	}
5346 
5347 	err_rec->last_recovery_time = jiffies;
5348 	err_rec->last_err_code = ue_err_code;
5349 	return true;
5350 }
5351 
5352 static int be_tpe_recover(struct be_adapter *adapter)
5353 {
5354 	struct be_error_recovery *err_rec = &adapter->error_recovery;
5355 	int status = -EAGAIN;
5356 	u32 val;
5357 
5358 	switch (err_rec->recovery_state) {
5359 	case ERR_RECOVERY_ST_NONE:
5360 		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5361 		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5362 		break;
5363 
5364 	case ERR_RECOVERY_ST_DETECT:
5365 		val = be_POST_stage_get(adapter);
5366 		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5367 		    POST_STAGE_RECOVERABLE_ERR) {
5368 			dev_err(&adapter->pdev->dev,
5369 				"Unrecoverable HW error detected: 0x%x\n", val);
5370 			status = -EINVAL;
5371 			err_rec->resched_delay = 0;
5372 			break;
5373 		}
5374 
5375 		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5376 
5377 		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5378 		 * milliseconds before it checks for final error status in
5379 		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5380 		 * If it does, then PF0 initiates a Soft Reset.
5381 		 */
5382 		if (adapter->pf_num == 0) {
5383 			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5384 			err_rec->resched_delay = err_rec->ue_to_reset_time -
5385 					ERR_RECOVERY_UE_DETECT_DURATION;
5386 			break;
5387 		}
5388 
5389 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5390 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5391 					ERR_RECOVERY_UE_DETECT_DURATION;
5392 		break;
5393 
5394 	case ERR_RECOVERY_ST_RESET:
5395 		if (!be_err_is_recoverable(adapter)) {
5396 			dev_err(&adapter->pdev->dev,
5397 				"Failed to meet recovery criteria\n");
5398 			status = -EIO;
5399 			err_rec->resched_delay = 0;
5400 			break;
5401 		}
5402 		be_soft_reset(adapter);
5403 		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5404 		err_rec->resched_delay = err_rec->ue_to_poll_time -
5405 					err_rec->ue_to_reset_time;
5406 		break;
5407 
5408 	case ERR_RECOVERY_ST_PRE_POLL:
5409 		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5410 		err_rec->resched_delay = 0;
5411 		status = 0;			/* done */
5412 		break;
5413 
5414 	default:
5415 		status = -EINVAL;
5416 		err_rec->resched_delay = 0;
5417 		break;
5418 	}
5419 
5420 	return status;
5421 }
5422 
5423 static int be_err_recover(struct be_adapter *adapter)
5424 {
5425 	int status;
5426 
5427 	if (!lancer_chip(adapter)) {
5428 		if (!adapter->error_recovery.recovery_supported ||
5429 		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5430 			return -EIO;
5431 		status = be_tpe_recover(adapter);
5432 		if (status)
5433 			goto err;
5434 	}
5435 
5436 	/* Wait for adapter to reach quiescent state before
5437 	 * destroying queues
5438 	 */
5439 	status = be_fw_wait_ready(adapter);
5440 	if (status)
5441 		goto err;
5442 
5443 	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5444 
5445 	be_cleanup(adapter);
5446 
5447 	status = be_resume(adapter);
5448 	if (status)
5449 		goto err;
5450 
5451 	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5452 
5453 err:
5454 	return status;
5455 }
5456 
5457 static void be_err_detection_task(struct work_struct *work)
5458 {
5459 	struct be_error_recovery *err_rec =
5460 			container_of(work, struct be_error_recovery,
5461 				     err_detection_work.work);
5462 	struct be_adapter *adapter =
5463 			container_of(err_rec, struct be_adapter,
5464 				     error_recovery);
5465 	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5466 	struct device *dev = &adapter->pdev->dev;
5467 	int recovery_status;
5468 
5469 	be_detect_error(adapter);
5470 	if (!be_check_error(adapter, BE_ERROR_HW))
5471 		goto reschedule_task;
5472 
5473 	recovery_status = be_err_recover(adapter);
5474 	if (!recovery_status) {
5475 		err_rec->recovery_retries = 0;
5476 		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5477 		dev_info(dev, "Adapter recovery successful\n");
5478 		goto reschedule_task;
5479 	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5480 		/* BEx/SH recovery state machine */
5481 		if (adapter->pf_num == 0 &&
5482 		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5483 			dev_err(&adapter->pdev->dev,
5484 				"Adapter recovery in progress\n");
5485 		resched_delay = err_rec->resched_delay;
5486 		goto reschedule_task;
5487 	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5488 		/* For VFs, check if PF have allocated resources
5489 		 * every second.
5490 		 */
5491 		dev_err(dev, "Re-trying adapter recovery\n");
5492 		goto reschedule_task;
5493 	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5494 		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5495 		/* In case of another error during recovery, it takes 30 sec
5496 		 * for adapter to come out of error. Retry error recovery after
5497 		 * this time interval.
5498 		 */
5499 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5500 		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5501 		goto reschedule_task;
5502 	} else {
5503 		dev_err(dev, "Adapter recovery failed\n");
5504 		dev_err(dev, "Please reboot server to recover\n");
5505 	}
5506 
5507 	return;
5508 
5509 reschedule_task:
5510 	be_schedule_err_detection(adapter, resched_delay);
5511 }
5512 
5513 static void be_log_sfp_info(struct be_adapter *adapter)
5514 {
5515 	int status;
5516 
5517 	status = be_cmd_query_sfp_info(adapter);
5518 	if (!status) {
5519 		dev_err(&adapter->pdev->dev,
5520 			"Port %c: %s Vendor: %s part no: %s",
5521 			adapter->port_name,
5522 			be_misconfig_evt_port_state[adapter->phy_state],
5523 			adapter->phy.vendor_name,
5524 			adapter->phy.vendor_pn);
5525 	}
5526 	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5527 }
5528 
5529 static void be_worker(struct work_struct *work)
5530 {
5531 	struct be_adapter *adapter =
5532 		container_of(work, struct be_adapter, work.work);
5533 	struct be_rx_obj *rxo;
5534 	int i;
5535 
5536 	if (be_physfn(adapter) &&
5537 	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5538 		be_cmd_get_die_temperature(adapter);
5539 
5540 	/* when interrupts are not yet enabled, just reap any pending
5541 	 * mcc completions
5542 	 */
5543 	if (!netif_running(adapter->netdev)) {
5544 		local_bh_disable();
5545 		be_process_mcc(adapter);
5546 		local_bh_enable();
5547 		goto reschedule;
5548 	}
5549 
5550 	if (!adapter->stats_cmd_sent) {
5551 		if (lancer_chip(adapter))
5552 			lancer_cmd_get_pport_stats(adapter,
5553 						   &adapter->stats_cmd);
5554 		else
5555 			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5556 	}
5557 
5558 	for_all_rx_queues(adapter, rxo, i) {
5559 		/* Replenish RX-queues starved due to memory
5560 		 * allocation failures.
5561 		 */
5562 		if (rxo->rx_post_starved)
5563 			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5564 	}
5565 
5566 	/* EQ-delay update for Skyhawk is done while notifying EQ */
5567 	if (!skyhawk_chip(adapter))
5568 		be_eqd_update(adapter, false);
5569 
5570 	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5571 		be_log_sfp_info(adapter);
5572 
5573 reschedule:
5574 	adapter->work_counter++;
5575 	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5576 }
5577 
5578 static void be_unmap_pci_bars(struct be_adapter *adapter)
5579 {
5580 	if (adapter->csr)
5581 		pci_iounmap(adapter->pdev, adapter->csr);
5582 	if (adapter->db)
5583 		pci_iounmap(adapter->pdev, adapter->db);
5584 	if (adapter->pcicfg && adapter->pcicfg_mapped)
5585 		pci_iounmap(adapter->pdev, adapter->pcicfg);
5586 }
5587 
5588 static int db_bar(struct be_adapter *adapter)
5589 {
5590 	if (lancer_chip(adapter) || be_virtfn(adapter))
5591 		return 0;
5592 	else
5593 		return 4;
5594 }
5595 
5596 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5597 {
5598 	if (skyhawk_chip(adapter)) {
5599 		adapter->roce_db.size = 4096;
5600 		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5601 							      db_bar(adapter));
5602 		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5603 							       db_bar(adapter));
5604 	}
5605 	return 0;
5606 }
5607 
5608 static int be_map_pci_bars(struct be_adapter *adapter)
5609 {
5610 	struct pci_dev *pdev = adapter->pdev;
5611 	u8 __iomem *addr;
5612 	u32 sli_intf;
5613 
5614 	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5615 	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5616 				SLI_INTF_FAMILY_SHIFT;
5617 	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5618 
5619 	if (BEx_chip(adapter) && be_physfn(adapter)) {
5620 		adapter->csr = pci_iomap(pdev, 2, 0);
5621 		if (!adapter->csr)
5622 			return -ENOMEM;
5623 	}
5624 
5625 	addr = pci_iomap(pdev, db_bar(adapter), 0);
5626 	if (!addr)
5627 		goto pci_map_err;
5628 	adapter->db = addr;
5629 
5630 	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5631 		if (be_physfn(adapter)) {
5632 			/* PCICFG is the 2nd BAR in BE2 */
5633 			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5634 			if (!addr)
5635 				goto pci_map_err;
5636 			adapter->pcicfg = addr;
5637 			adapter->pcicfg_mapped = true;
5638 		} else {
5639 			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5640 			adapter->pcicfg_mapped = false;
5641 		}
5642 	}
5643 
5644 	be_roce_map_pci_bars(adapter);
5645 	return 0;
5646 
5647 pci_map_err:
5648 	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5649 	be_unmap_pci_bars(adapter);
5650 	return -ENOMEM;
5651 }
5652 
5653 static void be_drv_cleanup(struct be_adapter *adapter)
5654 {
5655 	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5656 	struct device *dev = &adapter->pdev->dev;
5657 
5658 	if (mem->va)
5659 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660 
5661 	mem = &adapter->rx_filter;
5662 	if (mem->va)
5663 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664 
5665 	mem = &adapter->stats_cmd;
5666 	if (mem->va)
5667 		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5668 }
5669 
5670 /* Allocate and initialize various fields in be_adapter struct */
5671 static int be_drv_init(struct be_adapter *adapter)
5672 {
5673 	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5674 	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5675 	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5676 	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5677 	struct device *dev = &adapter->pdev->dev;
5678 	int status = 0;
5679 
5680 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5681 	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5682 						 &mbox_mem_alloc->dma,
5683 						 GFP_KERNEL);
5684 	if (!mbox_mem_alloc->va)
5685 		return -ENOMEM;
5686 
5687 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5688 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5689 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5690 
5691 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5692 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5693 					    &rx_filter->dma, GFP_KERNEL);
5694 	if (!rx_filter->va) {
5695 		status = -ENOMEM;
5696 		goto free_mbox;
5697 	}
5698 
5699 	if (lancer_chip(adapter))
5700 		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5701 	else if (BE2_chip(adapter))
5702 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5703 	else if (BE3_chip(adapter))
5704 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5705 	else
5706 		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5707 	stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5708 					    &stats_cmd->dma, GFP_KERNEL);
5709 	if (!stats_cmd->va) {
5710 		status = -ENOMEM;
5711 		goto free_rx_filter;
5712 	}
5713 
5714 	mutex_init(&adapter->mbox_lock);
5715 	mutex_init(&adapter->mcc_lock);
5716 	mutex_init(&adapter->rx_filter_lock);
5717 	spin_lock_init(&adapter->mcc_cq_lock);
5718 	init_completion(&adapter->et_cmd_compl);
5719 
5720 	pci_save_state(adapter->pdev);
5721 
5722 	INIT_DELAYED_WORK(&adapter->work, be_worker);
5723 
5724 	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5725 	adapter->error_recovery.resched_delay = 0;
5726 	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5727 			  be_err_detection_task);
5728 
5729 	adapter->rx_fc = true;
5730 	adapter->tx_fc = true;
5731 
5732 	/* Must be a power of 2 or else MODULO will BUG_ON */
5733 	adapter->be_get_temp_freq = 64;
5734 
5735 	return 0;
5736 
5737 free_rx_filter:
5738 	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5739 free_mbox:
5740 	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5741 			  mbox_mem_alloc->dma);
5742 	return status;
5743 }
5744 
5745 static void be_remove(struct pci_dev *pdev)
5746 {
5747 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5748 
5749 	if (!adapter)
5750 		return;
5751 
5752 	be_roce_dev_remove(adapter);
5753 	be_intr_set(adapter, false);
5754 
5755 	be_cancel_err_detection(adapter);
5756 
5757 	unregister_netdev(adapter->netdev);
5758 
5759 	be_clear(adapter);
5760 
5761 	if (!pci_vfs_assigned(adapter->pdev))
5762 		be_cmd_reset_function(adapter);
5763 
5764 	/* tell fw we're done with firing cmds */
5765 	be_cmd_fw_clean(adapter);
5766 
5767 	be_unmap_pci_bars(adapter);
5768 	be_drv_cleanup(adapter);
5769 
5770 	pci_disable_pcie_error_reporting(pdev);
5771 
5772 	pci_release_regions(pdev);
5773 	pci_disable_device(pdev);
5774 
5775 	free_netdev(adapter->netdev);
5776 }
5777 
5778 static ssize_t be_hwmon_show_temp(struct device *dev,
5779 				  struct device_attribute *dev_attr,
5780 				  char *buf)
5781 {
5782 	struct be_adapter *adapter = dev_get_drvdata(dev);
5783 
5784 	/* Unit: millidegree Celsius */
5785 	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5786 		return -EIO;
5787 	else
5788 		return sprintf(buf, "%u\n",
5789 			       adapter->hwmon_info.be_on_die_temp * 1000);
5790 }
5791 
5792 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5793 			  be_hwmon_show_temp, NULL, 1);
5794 
5795 static struct attribute *be_hwmon_attrs[] = {
5796 	&sensor_dev_attr_temp1_input.dev_attr.attr,
5797 	NULL
5798 };
5799 
5800 ATTRIBUTE_GROUPS(be_hwmon);
5801 
5802 static char *mc_name(struct be_adapter *adapter)
5803 {
5804 	char *str = "";	/* default */
5805 
5806 	switch (adapter->mc_type) {
5807 	case UMC:
5808 		str = "UMC";
5809 		break;
5810 	case FLEX10:
5811 		str = "FLEX10";
5812 		break;
5813 	case vNIC1:
5814 		str = "vNIC-1";
5815 		break;
5816 	case nPAR:
5817 		str = "nPAR";
5818 		break;
5819 	case UFP:
5820 		str = "UFP";
5821 		break;
5822 	case vNIC2:
5823 		str = "vNIC-2";
5824 		break;
5825 	default:
5826 		str = "";
5827 	}
5828 
5829 	return str;
5830 }
5831 
5832 static inline char *func_name(struct be_adapter *adapter)
5833 {
5834 	return be_physfn(adapter) ? "PF" : "VF";
5835 }
5836 
5837 static inline char *nic_name(struct pci_dev *pdev)
5838 {
5839 	switch (pdev->device) {
5840 	case OC_DEVICE_ID1:
5841 		return OC_NAME;
5842 	case OC_DEVICE_ID2:
5843 		return OC_NAME_BE;
5844 	case OC_DEVICE_ID3:
5845 	case OC_DEVICE_ID4:
5846 		return OC_NAME_LANCER;
5847 	case BE_DEVICE_ID2:
5848 		return BE3_NAME;
5849 	case OC_DEVICE_ID5:
5850 	case OC_DEVICE_ID6:
5851 		return OC_NAME_SH;
5852 	default:
5853 		return BE_NAME;
5854 	}
5855 }
5856 
5857 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5858 {
5859 	struct be_adapter *adapter;
5860 	struct net_device *netdev;
5861 	int status = 0;
5862 
5863 	dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5864 
5865 	status = pci_enable_device(pdev);
5866 	if (status)
5867 		goto do_none;
5868 
5869 	status = pci_request_regions(pdev, DRV_NAME);
5870 	if (status)
5871 		goto disable_dev;
5872 	pci_set_master(pdev);
5873 
5874 	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5875 	if (!netdev) {
5876 		status = -ENOMEM;
5877 		goto rel_reg;
5878 	}
5879 	adapter = netdev_priv(netdev);
5880 	adapter->pdev = pdev;
5881 	pci_set_drvdata(pdev, adapter);
5882 	adapter->netdev = netdev;
5883 	SET_NETDEV_DEV(netdev, &pdev->dev);
5884 
5885 	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5886 	if (!status) {
5887 		netdev->features |= NETIF_F_HIGHDMA;
5888 	} else {
5889 		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5890 		if (status) {
5891 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5892 			goto free_netdev;
5893 		}
5894 	}
5895 
5896 	status = pci_enable_pcie_error_reporting(pdev);
5897 	if (!status)
5898 		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5899 
5900 	status = be_map_pci_bars(adapter);
5901 	if (status)
5902 		goto free_netdev;
5903 
5904 	status = be_drv_init(adapter);
5905 	if (status)
5906 		goto unmap_bars;
5907 
5908 	status = be_setup(adapter);
5909 	if (status)
5910 		goto drv_cleanup;
5911 
5912 	be_netdev_init(netdev);
5913 	status = register_netdev(netdev);
5914 	if (status != 0)
5915 		goto unsetup;
5916 
5917 	be_roce_dev_add(adapter);
5918 
5919 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5920 	adapter->error_recovery.probe_time = jiffies;
5921 
5922 	/* On Die temperature not supported for VF. */
5923 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5924 		adapter->hwmon_info.hwmon_dev =
5925 			devm_hwmon_device_register_with_groups(&pdev->dev,
5926 							       DRV_NAME,
5927 							       adapter,
5928 							       be_hwmon_groups);
5929 		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5930 	}
5931 
5932 	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5933 		 func_name(adapter), mc_name(adapter), adapter->port_name);
5934 
5935 	return 0;
5936 
5937 unsetup:
5938 	be_clear(adapter);
5939 drv_cleanup:
5940 	be_drv_cleanup(adapter);
5941 unmap_bars:
5942 	be_unmap_pci_bars(adapter);
5943 free_netdev:
5944 	free_netdev(netdev);
5945 rel_reg:
5946 	pci_release_regions(pdev);
5947 disable_dev:
5948 	pci_disable_device(pdev);
5949 do_none:
5950 	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5951 	return status;
5952 }
5953 
5954 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5955 {
5956 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5957 
5958 	be_intr_set(adapter, false);
5959 	be_cancel_err_detection(adapter);
5960 
5961 	be_cleanup(adapter);
5962 
5963 	pci_save_state(pdev);
5964 	pci_disable_device(pdev);
5965 	pci_set_power_state(pdev, pci_choose_state(pdev, state));
5966 	return 0;
5967 }
5968 
5969 static int be_pci_resume(struct pci_dev *pdev)
5970 {
5971 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5972 	int status = 0;
5973 
5974 	status = pci_enable_device(pdev);
5975 	if (status)
5976 		return status;
5977 
5978 	pci_restore_state(pdev);
5979 
5980 	status = be_resume(adapter);
5981 	if (status)
5982 		return status;
5983 
5984 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5985 
5986 	return 0;
5987 }
5988 
5989 /*
5990  * An FLR will stop BE from DMAing any data.
5991  */
5992 static void be_shutdown(struct pci_dev *pdev)
5993 {
5994 	struct be_adapter *adapter = pci_get_drvdata(pdev);
5995 
5996 	if (!adapter)
5997 		return;
5998 
5999 	be_roce_dev_shutdown(adapter);
6000 	cancel_delayed_work_sync(&adapter->work);
6001 	be_cancel_err_detection(adapter);
6002 
6003 	netif_device_detach(adapter->netdev);
6004 
6005 	be_cmd_reset_function(adapter);
6006 
6007 	pci_disable_device(pdev);
6008 }
6009 
6010 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6011 					    pci_channel_state_t state)
6012 {
6013 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6014 
6015 	dev_err(&adapter->pdev->dev, "EEH error detected\n");
6016 
6017 	be_roce_dev_remove(adapter);
6018 
6019 	if (!be_check_error(adapter, BE_ERROR_EEH)) {
6020 		be_set_error(adapter, BE_ERROR_EEH);
6021 
6022 		be_cancel_err_detection(adapter);
6023 
6024 		be_cleanup(adapter);
6025 	}
6026 
6027 	if (state == pci_channel_io_perm_failure)
6028 		return PCI_ERS_RESULT_DISCONNECT;
6029 
6030 	pci_disable_device(pdev);
6031 
6032 	/* The error could cause the FW to trigger a flash debug dump.
6033 	 * Resetting the card while flash dump is in progress
6034 	 * can cause it not to recover; wait for it to finish.
6035 	 * Wait only for first function as it is needed only once per
6036 	 * adapter.
6037 	 */
6038 	if (pdev->devfn == 0)
6039 		ssleep(30);
6040 
6041 	return PCI_ERS_RESULT_NEED_RESET;
6042 }
6043 
6044 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6045 {
6046 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6047 	int status;
6048 
6049 	dev_info(&adapter->pdev->dev, "EEH reset\n");
6050 
6051 	status = pci_enable_device(pdev);
6052 	if (status)
6053 		return PCI_ERS_RESULT_DISCONNECT;
6054 
6055 	pci_set_master(pdev);
6056 	pci_restore_state(pdev);
6057 
6058 	/* Check if card is ok and fw is ready */
6059 	dev_info(&adapter->pdev->dev,
6060 		 "Waiting for FW to be ready after EEH reset\n");
6061 	status = be_fw_wait_ready(adapter);
6062 	if (status)
6063 		return PCI_ERS_RESULT_DISCONNECT;
6064 
6065 	pci_cleanup_aer_uncorrect_error_status(pdev);
6066 	be_clear_error(adapter, BE_CLEAR_ALL);
6067 	return PCI_ERS_RESULT_RECOVERED;
6068 }
6069 
6070 static void be_eeh_resume(struct pci_dev *pdev)
6071 {
6072 	int status = 0;
6073 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6074 
6075 	dev_info(&adapter->pdev->dev, "EEH resume\n");
6076 
6077 	pci_save_state(pdev);
6078 
6079 	status = be_resume(adapter);
6080 	if (status)
6081 		goto err;
6082 
6083 	be_roce_dev_add(adapter);
6084 
6085 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6086 	return;
6087 err:
6088 	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6089 }
6090 
6091 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6092 {
6093 	struct be_adapter *adapter = pci_get_drvdata(pdev);
6094 	struct be_resources vft_res = {0};
6095 	int status;
6096 
6097 	if (!num_vfs)
6098 		be_vf_clear(adapter);
6099 
6100 	adapter->num_vfs = num_vfs;
6101 
6102 	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6103 		dev_warn(&pdev->dev,
6104 			 "Cannot disable VFs while they are assigned\n");
6105 		return -EBUSY;
6106 	}
6107 
6108 	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6109 	 * are equally distributed across the max-number of VFs. The user may
6110 	 * request only a subset of the max-vfs to be enabled.
6111 	 * Based on num_vfs, redistribute the resources across num_vfs so that
6112 	 * each VF will have access to more number of resources.
6113 	 * This facility is not available in BE3 FW.
6114 	 * Also, this is done by FW in Lancer chip.
6115 	 */
6116 	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6117 		be_calculate_vf_res(adapter, adapter->num_vfs,
6118 				    &vft_res);
6119 		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6120 						 adapter->num_vfs, &vft_res);
6121 		if (status)
6122 			dev_err(&pdev->dev,
6123 				"Failed to optimize SR-IOV resources\n");
6124 	}
6125 
6126 	status = be_get_resources(adapter);
6127 	if (status)
6128 		return be_cmd_status(status);
6129 
6130 	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6131 	rtnl_lock();
6132 	status = be_update_queues(adapter);
6133 	rtnl_unlock();
6134 	if (status)
6135 		return be_cmd_status(status);
6136 
6137 	if (adapter->num_vfs)
6138 		status = be_vf_setup(adapter);
6139 
6140 	if (!status)
6141 		return adapter->num_vfs;
6142 
6143 	return 0;
6144 }
6145 
6146 static const struct pci_error_handlers be_eeh_handlers = {
6147 	.error_detected = be_eeh_err_detected,
6148 	.slot_reset = be_eeh_reset,
6149 	.resume = be_eeh_resume,
6150 };
6151 
6152 static struct pci_driver be_driver = {
6153 	.name = DRV_NAME,
6154 	.id_table = be_dev_ids,
6155 	.probe = be_probe,
6156 	.remove = be_remove,
6157 	.suspend = be_suspend,
6158 	.resume = be_pci_resume,
6159 	.shutdown = be_shutdown,
6160 	.sriov_configure = be_pci_sriov_configure,
6161 	.err_handler = &be_eeh_handlers
6162 };
6163 
6164 static int __init be_init_module(void)
6165 {
6166 	int status;
6167 
6168 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6169 	    rx_frag_size != 2048) {
6170 		printk(KERN_WARNING DRV_NAME
6171 			" : Module param rx_frag_size must be 2048/4096/8192."
6172 			" Using 2048\n");
6173 		rx_frag_size = 2048;
6174 	}
6175 
6176 	if (num_vfs > 0) {
6177 		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6178 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6179 	}
6180 
6181 	be_wq = create_singlethread_workqueue("be_wq");
6182 	if (!be_wq) {
6183 		pr_warn(DRV_NAME "workqueue creation failed\n");
6184 		return -1;
6185 	}
6186 
6187 	be_err_recovery_workq =
6188 		create_singlethread_workqueue("be_err_recover");
6189 	if (!be_err_recovery_workq)
6190 		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6191 
6192 	status = pci_register_driver(&be_driver);
6193 	if (status) {
6194 		destroy_workqueue(be_wq);
6195 		be_destroy_err_recovery_workq();
6196 	}
6197 	return status;
6198 }
6199 module_init(be_init_module);
6200 
6201 static void __exit be_exit_module(void)
6202 {
6203 	pci_unregister_driver(&be_driver);
6204 
6205 	be_destroy_err_recovery_workq();
6206 
6207 	if (be_wq)
6208 		destroy_workqueue(be_wq);
6209 }
6210 module_exit(be_exit_module);
6211