xref: /linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_VERSION "2.0.0-ko"
59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
60 
61 /*
62  * Module Parameters.
63  * ==================
64  */
65 
66 /*
67  * Default ethtool "message level" for adapters.
68  */
69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72 
73 static int dflt_msg_enable = DFLT_MSG_ENABLE;
74 
75 module_param(dflt_msg_enable, int, 0644);
76 MODULE_PARM_DESC(dflt_msg_enable,
77 		 "default adapter ethtool message level bitmap, "
78 		 "deprecated parameter");
79 
80 /*
81  * The driver uses the best interrupt scheme available on a platform in the
82  * order MSI-X then MSI.  This parameter determines which of these schemes the
83  * driver may consider as follows:
84  *
85  *     msi = 2: choose from among MSI-X and MSI
86  *     msi = 1: only consider MSI interrupts
87  *
88  * Note that unlike the Physical Function driver, this Virtual Function driver
89  * does _not_ support legacy INTx interrupts (this limitation is mandated by
90  * the PCI-E SR-IOV standard).
91  */
92 #define MSI_MSIX	2
93 #define MSI_MSI		1
94 #define MSI_DEFAULT	MSI_MSIX
95 
96 static int msi = MSI_DEFAULT;
97 
98 module_param(msi, int, 0644);
99 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
100 
101 /*
102  * Fundamental constants.
103  * ======================
104  */
105 
106 enum {
107 	MAX_TXQ_ENTRIES		= 16384,
108 	MAX_RSPQ_ENTRIES	= 16384,
109 	MAX_RX_BUFFERS		= 16384,
110 
111 	MIN_TXQ_ENTRIES		= 32,
112 	MIN_RSPQ_ENTRIES	= 128,
113 	MIN_FL_ENTRIES		= 16,
114 
115 	/*
116 	 * For purposes of manipulating the Free List size we need to
117 	 * recognize that Free Lists are actually Egress Queues (the host
118 	 * produces free buffers which the hardware consumes), Egress Queues
119 	 * indices are all in units of Egress Context Units bytes, and free
120 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
121 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
122 	 * always have at least one Egress Unit's worth of Free List entries
123 	 * unused.  See sge.c for more details ...
124 	 */
125 	EQ_UNIT = SGE_EQ_IDXSIZE,
126 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
127 	MIN_FL_RESID = FL_PER_EQ_UNIT,
128 };
129 
130 /*
131  * Global driver state.
132  * ====================
133  */
134 
135 static struct dentry *cxgb4vf_debugfs_root;
136 
137 /*
138  * OS "Callback" functions.
139  * ========================
140  */
141 
142 /*
143  * The link status has changed on the indicated "port" (Virtual Interface).
144  */
145 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
146 {
147 	struct net_device *dev = adapter->port[pidx];
148 
149 	/*
150 	 * If the port is disabled or the current recorded "link up"
151 	 * status matches the new status, just return.
152 	 */
153 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
154 		return;
155 
156 	/*
157 	 * Tell the OS that the link status has changed and print a short
158 	 * informative message on the console about the event.
159 	 */
160 	if (link_ok) {
161 		const char *s;
162 		const char *fc;
163 		const struct port_info *pi = netdev_priv(dev);
164 
165 		netif_carrier_on(dev);
166 
167 		switch (pi->link_cfg.speed) {
168 		case 40000:
169 			s = "40Gbps";
170 			break;
171 
172 		case 10000:
173 			s = "10Gbps";
174 			break;
175 
176 		case 1000:
177 			s = "1000Mbps";
178 			break;
179 
180 		case 100:
181 			s = "100Mbps";
182 			break;
183 
184 		default:
185 			s = "unknown";
186 			break;
187 		}
188 
189 		switch (pi->link_cfg.fc) {
190 		case PAUSE_RX:
191 			fc = "RX";
192 			break;
193 
194 		case PAUSE_TX:
195 			fc = "TX";
196 			break;
197 
198 		case PAUSE_RX|PAUSE_TX:
199 			fc = "RX/TX";
200 			break;
201 
202 		default:
203 			fc = "no";
204 			break;
205 		}
206 
207 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
208 	} else {
209 		netif_carrier_off(dev);
210 		netdev_info(dev, "link down\n");
211 	}
212 }
213 
214 /*
215  * THe port module type has changed on the indicated "port" (Virtual
216  * Interface).
217  */
218 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
219 {
220 	static const char * const mod_str[] = {
221 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
222 	};
223 	const struct net_device *dev = adapter->port[pidx];
224 	const struct port_info *pi = netdev_priv(dev);
225 
226 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
227 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
228 			 dev->name);
229 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
230 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
231 			 dev->name, mod_str[pi->mod_type]);
232 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
233 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
234 			 "module inserted\n", dev->name);
235 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
236 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
237 			 "forcing TWINAX\n", dev->name);
238 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
239 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
240 			 dev->name);
241 	else
242 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
243 			 "inserted\n", dev->name, pi->mod_type);
244 }
245 
246 /*
247  * Net device operations.
248  * ======================
249  */
250 
251 
252 
253 
254 /*
255  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
256  * Interface).
257  */
258 static int link_start(struct net_device *dev)
259 {
260 	int ret;
261 	struct port_info *pi = netdev_priv(dev);
262 
263 	/*
264 	 * We do not set address filters and promiscuity here, the stack does
265 	 * that step explicitly. Enable vlan accel.
266 	 */
267 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
268 			      true);
269 	if (ret == 0) {
270 		ret = t4vf_change_mac(pi->adapter, pi->viid,
271 				      pi->xact_addr_filt, dev->dev_addr, true);
272 		if (ret >= 0) {
273 			pi->xact_addr_filt = ret;
274 			ret = 0;
275 		}
276 	}
277 
278 	/*
279 	 * We don't need to actually "start the link" itself since the
280 	 * firmware will do that for us when the first Virtual Interface
281 	 * is enabled on a port.
282 	 */
283 	if (ret == 0)
284 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
285 	return ret;
286 }
287 
288 /*
289  * Name the MSI-X interrupts.
290  */
291 static void name_msix_vecs(struct adapter *adapter)
292 {
293 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
294 	int pidx;
295 
296 	/*
297 	 * Firmware events.
298 	 */
299 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
300 		 "%s-FWeventq", adapter->name);
301 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
302 
303 	/*
304 	 * Ethernet queues.
305 	 */
306 	for_each_port(adapter, pidx) {
307 		struct net_device *dev = adapter->port[pidx];
308 		const struct port_info *pi = netdev_priv(dev);
309 		int qs, msi;
310 
311 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
312 			snprintf(adapter->msix_info[msi].desc, namelen,
313 				 "%s-%d", dev->name, qs);
314 			adapter->msix_info[msi].desc[namelen] = 0;
315 		}
316 	}
317 }
318 
319 /*
320  * Request all of our MSI-X resources.
321  */
322 static int request_msix_queue_irqs(struct adapter *adapter)
323 {
324 	struct sge *s = &adapter->sge;
325 	int rxq, msi, err;
326 
327 	/*
328 	 * Firmware events.
329 	 */
330 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
331 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
332 	if (err)
333 		return err;
334 
335 	/*
336 	 * Ethernet queues.
337 	 */
338 	msi = MSIX_IQFLINT;
339 	for_each_ethrxq(s, rxq) {
340 		err = request_irq(adapter->msix_info[msi].vec,
341 				  t4vf_sge_intr_msix, 0,
342 				  adapter->msix_info[msi].desc,
343 				  &s->ethrxq[rxq].rspq);
344 		if (err)
345 			goto err_free_irqs;
346 		msi++;
347 	}
348 	return 0;
349 
350 err_free_irqs:
351 	while (--rxq >= 0)
352 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
353 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
354 	return err;
355 }
356 
357 /*
358  * Free our MSI-X resources.
359  */
360 static void free_msix_queue_irqs(struct adapter *adapter)
361 {
362 	struct sge *s = &adapter->sge;
363 	int rxq, msi;
364 
365 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
366 	msi = MSIX_IQFLINT;
367 	for_each_ethrxq(s, rxq)
368 		free_irq(adapter->msix_info[msi++].vec,
369 			 &s->ethrxq[rxq].rspq);
370 }
371 
372 /*
373  * Turn on NAPI and start up interrupts on a response queue.
374  */
375 static void qenable(struct sge_rspq *rspq)
376 {
377 	napi_enable(&rspq->napi);
378 
379 	/*
380 	 * 0-increment the Going To Sleep register to start the timer and
381 	 * enable interrupts.
382 	 */
383 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
384 		     CIDXINC_V(0) |
385 		     SEINTARM_V(rspq->intr_params) |
386 		     INGRESSQID_V(rspq->cntxt_id));
387 }
388 
389 /*
390  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
391  */
392 static void enable_rx(struct adapter *adapter)
393 {
394 	int rxq;
395 	struct sge *s = &adapter->sge;
396 
397 	for_each_ethrxq(s, rxq)
398 		qenable(&s->ethrxq[rxq].rspq);
399 	qenable(&s->fw_evtq);
400 
401 	/*
402 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
403 	 * its Going To Sleep register here to get it started.
404 	 */
405 	if (adapter->flags & USING_MSI)
406 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
407 			     CIDXINC_V(0) |
408 			     SEINTARM_V(s->intrq.intr_params) |
409 			     INGRESSQID_V(s->intrq.cntxt_id));
410 
411 }
412 
413 /*
414  * Wait until all NAPI handlers are descheduled.
415  */
416 static void quiesce_rx(struct adapter *adapter)
417 {
418 	struct sge *s = &adapter->sge;
419 	int rxq;
420 
421 	for_each_ethrxq(s, rxq)
422 		napi_disable(&s->ethrxq[rxq].rspq.napi);
423 	napi_disable(&s->fw_evtq.napi);
424 }
425 
426 /*
427  * Response queue handler for the firmware event queue.
428  */
429 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
430 			  const struct pkt_gl *gl)
431 {
432 	/*
433 	 * Extract response opcode and get pointer to CPL message body.
434 	 */
435 	struct adapter *adapter = rspq->adapter;
436 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
437 	void *cpl = (void *)(rsp + 1);
438 
439 	switch (opcode) {
440 	case CPL_FW6_MSG: {
441 		/*
442 		 * We've received an asynchronous message from the firmware.
443 		 */
444 		const struct cpl_fw6_msg *fw_msg = cpl;
445 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
446 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
447 		break;
448 	}
449 
450 	case CPL_FW4_MSG: {
451 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
452 		 */
453 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
454 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
455 		if (opcode != CPL_SGE_EGR_UPDATE) {
456 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
457 				, opcode);
458 			break;
459 		}
460 		cpl = (void *)p;
461 		/*FALLTHROUGH*/
462 	}
463 
464 	case CPL_SGE_EGR_UPDATE: {
465 		/*
466 		 * We've received an Egress Queue Status Update message.  We
467 		 * get these, if the SGE is configured to send these when the
468 		 * firmware passes certain points in processing our TX
469 		 * Ethernet Queue or if we make an explicit request for one.
470 		 * We use these updates to determine when we may need to
471 		 * restart a TX Ethernet Queue which was stopped for lack of
472 		 * free TX Queue Descriptors ...
473 		 */
474 		const struct cpl_sge_egr_update *p = cpl;
475 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
476 		struct sge *s = &adapter->sge;
477 		struct sge_txq *tq;
478 		struct sge_eth_txq *txq;
479 		unsigned int eq_idx;
480 
481 		/*
482 		 * Perform sanity checking on the Queue ID to make sure it
483 		 * really refers to one of our TX Ethernet Egress Queues which
484 		 * is active and matches the queue's ID.  None of these error
485 		 * conditions should ever happen so we may want to either make
486 		 * them fatal and/or conditionalized under DEBUG.
487 		 */
488 		eq_idx = EQ_IDX(s, qid);
489 		if (unlikely(eq_idx >= MAX_EGRQ)) {
490 			dev_err(adapter->pdev_dev,
491 				"Egress Update QID %d out of range\n", qid);
492 			break;
493 		}
494 		tq = s->egr_map[eq_idx];
495 		if (unlikely(tq == NULL)) {
496 			dev_err(adapter->pdev_dev,
497 				"Egress Update QID %d TXQ=NULL\n", qid);
498 			break;
499 		}
500 		txq = container_of(tq, struct sge_eth_txq, q);
501 		if (unlikely(tq->abs_id != qid)) {
502 			dev_err(adapter->pdev_dev,
503 				"Egress Update QID %d refers to TXQ %d\n",
504 				qid, tq->abs_id);
505 			break;
506 		}
507 
508 		/*
509 		 * Restart a stopped TX Queue which has less than half of its
510 		 * TX ring in use ...
511 		 */
512 		txq->q.restarts++;
513 		netif_tx_wake_queue(txq->txq);
514 		break;
515 	}
516 
517 	default:
518 		dev_err(adapter->pdev_dev,
519 			"unexpected CPL %#x on FW event queue\n", opcode);
520 	}
521 
522 	return 0;
523 }
524 
525 /*
526  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
527  * to use and initializes them.  We support multiple "Queue Sets" per port if
528  * we have MSI-X, otherwise just one queue set per port.
529  */
530 static int setup_sge_queues(struct adapter *adapter)
531 {
532 	struct sge *s = &adapter->sge;
533 	int err, pidx, msix;
534 
535 	/*
536 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
537 	 * state.
538 	 */
539 	bitmap_zero(s->starving_fl, MAX_EGRQ);
540 
541 	/*
542 	 * If we're using MSI interrupt mode we need to set up a "forwarded
543 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
544 	 * of the ingress queues will be set up to forward their interrupts to
545 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
546 	 * the intrq's queue ID as the interrupt forwarding queue for the
547 	 * subsequent calls ...
548 	 */
549 	if (adapter->flags & USING_MSI) {
550 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
551 					 adapter->port[0], 0, NULL, NULL);
552 		if (err)
553 			goto err_free_queues;
554 	}
555 
556 	/*
557 	 * Allocate our ingress queue for asynchronous firmware messages.
558 	 */
559 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
560 				 MSIX_FW, NULL, fwevtq_handler);
561 	if (err)
562 		goto err_free_queues;
563 
564 	/*
565 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
566 	 * later on ... up to the point where any interface on the adapter is
567 	 * brought up at which point lots of things get nailed down
568 	 * permanently ...
569 	 */
570 	msix = MSIX_IQFLINT;
571 	for_each_port(adapter, pidx) {
572 		struct net_device *dev = adapter->port[pidx];
573 		struct port_info *pi = netdev_priv(dev);
574 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
575 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
576 		int qs;
577 
578 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
579 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
580 						 dev, msix++,
581 						 &rxq->fl, t4vf_ethrx_handler);
582 			if (err)
583 				goto err_free_queues;
584 
585 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
586 					     netdev_get_tx_queue(dev, qs),
587 					     s->fw_evtq.cntxt_id);
588 			if (err)
589 				goto err_free_queues;
590 
591 			rxq->rspq.idx = qs;
592 			memset(&rxq->stats, 0, sizeof(rxq->stats));
593 		}
594 	}
595 
596 	/*
597 	 * Create the reverse mappings for the queues.
598 	 */
599 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
600 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
601 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
602 	for_each_port(adapter, pidx) {
603 		struct net_device *dev = adapter->port[pidx];
604 		struct port_info *pi = netdev_priv(dev);
605 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
606 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
607 		int qs;
608 
609 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
610 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
611 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
612 
613 			/*
614 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
615 			 * for Free Lists but since all of the Egress Queues
616 			 * (including Free Lists) have Relative Queue IDs
617 			 * which are computed as Absolute - Base Queue ID, we
618 			 * can synthesize the Absolute Queue IDs for the Free
619 			 * Lists.  This is useful for debugging purposes when
620 			 * we want to dump Queue Contexts via the PF Driver.
621 			 */
622 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
623 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
624 		}
625 	}
626 	return 0;
627 
628 err_free_queues:
629 	t4vf_free_sge_resources(adapter);
630 	return err;
631 }
632 
633 /*
634  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
635  * queues.  We configure the RSS CPU lookup table to distribute to the number
636  * of HW receive queues, and the response queue lookup table to narrow that
637  * down to the response queues actually configured for each "port" (Virtual
638  * Interface).  We always configure the RSS mapping for all ports since the
639  * mapping table has plenty of entries.
640  */
641 static int setup_rss(struct adapter *adapter)
642 {
643 	int pidx;
644 
645 	for_each_port(adapter, pidx) {
646 		struct port_info *pi = adap2pinfo(adapter, pidx);
647 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
648 		u16 rss[MAX_PORT_QSETS];
649 		int qs, err;
650 
651 		for (qs = 0; qs < pi->nqsets; qs++)
652 			rss[qs] = rxq[qs].rspq.abs_id;
653 
654 		err = t4vf_config_rss_range(adapter, pi->viid,
655 					    0, pi->rss_size, rss, pi->nqsets);
656 		if (err)
657 			return err;
658 
659 		/*
660 		 * Perform Global RSS Mode-specific initialization.
661 		 */
662 		switch (adapter->params.rss.mode) {
663 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
664 			/*
665 			 * If Tunnel All Lookup isn't specified in the global
666 			 * RSS Configuration, then we need to specify a
667 			 * default Ingress Queue for any ingress packets which
668 			 * aren't hashed.  We'll use our first ingress queue
669 			 * ...
670 			 */
671 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
672 				union rss_vi_config config;
673 				err = t4vf_read_rss_vi_config(adapter,
674 							      pi->viid,
675 							      &config);
676 				if (err)
677 					return err;
678 				config.basicvirtual.defaultq =
679 					rxq[0].rspq.abs_id;
680 				err = t4vf_write_rss_vi_config(adapter,
681 							       pi->viid,
682 							       &config);
683 				if (err)
684 					return err;
685 			}
686 			break;
687 		}
688 	}
689 
690 	return 0;
691 }
692 
693 /*
694  * Bring the adapter up.  Called whenever we go from no "ports" open to having
695  * one open.  This function performs the actions necessary to make an adapter
696  * operational, such as completing the initialization of HW modules, and
697  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
698  * this is called "cxgb_up" in the PF Driver.)
699  */
700 static int adapter_up(struct adapter *adapter)
701 {
702 	int err;
703 
704 	/*
705 	 * If this is the first time we've been called, perform basic
706 	 * adapter setup.  Once we've done this, many of our adapter
707 	 * parameters can no longer be changed ...
708 	 */
709 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
710 		err = setup_sge_queues(adapter);
711 		if (err)
712 			return err;
713 		err = setup_rss(adapter);
714 		if (err) {
715 			t4vf_free_sge_resources(adapter);
716 			return err;
717 		}
718 
719 		if (adapter->flags & USING_MSIX)
720 			name_msix_vecs(adapter);
721 		adapter->flags |= FULL_INIT_DONE;
722 	}
723 
724 	/*
725 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
726 	 */
727 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
728 	if (adapter->flags & USING_MSIX)
729 		err = request_msix_queue_irqs(adapter);
730 	else
731 		err = request_irq(adapter->pdev->irq,
732 				  t4vf_intr_handler(adapter), 0,
733 				  adapter->name, adapter);
734 	if (err) {
735 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
736 			err);
737 		return err;
738 	}
739 
740 	/*
741 	 * Enable NAPI ingress processing and return success.
742 	 */
743 	enable_rx(adapter);
744 	t4vf_sge_start(adapter);
745 
746 	/* Initialize hash mac addr list*/
747 	INIT_LIST_HEAD(&adapter->mac_hlist);
748 	return 0;
749 }
750 
751 /*
752  * Bring the adapter down.  Called whenever the last "port" (Virtual
753  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
754  * Driver.)
755  */
756 static void adapter_down(struct adapter *adapter)
757 {
758 	/*
759 	 * Free interrupt resources.
760 	 */
761 	if (adapter->flags & USING_MSIX)
762 		free_msix_queue_irqs(adapter);
763 	else
764 		free_irq(adapter->pdev->irq, adapter);
765 
766 	/*
767 	 * Wait for NAPI handlers to finish.
768 	 */
769 	quiesce_rx(adapter);
770 }
771 
772 /*
773  * Start up a net device.
774  */
775 static int cxgb4vf_open(struct net_device *dev)
776 {
777 	int err;
778 	struct port_info *pi = netdev_priv(dev);
779 	struct adapter *adapter = pi->adapter;
780 
781 	/*
782 	 * If this is the first interface that we're opening on the "adapter",
783 	 * bring the "adapter" up now.
784 	 */
785 	if (adapter->open_device_map == 0) {
786 		err = adapter_up(adapter);
787 		if (err)
788 			return err;
789 	}
790 
791 	/*
792 	 * Note that this interface is up and start everything up ...
793 	 */
794 	err = link_start(dev);
795 	if (err)
796 		goto err_unwind;
797 
798 	netif_tx_start_all_queues(dev);
799 	set_bit(pi->port_id, &adapter->open_device_map);
800 	return 0;
801 
802 err_unwind:
803 	if (adapter->open_device_map == 0)
804 		adapter_down(adapter);
805 	return err;
806 }
807 
808 /*
809  * Shut down a net device.  This routine is called "cxgb_close" in the PF
810  * Driver ...
811  */
812 static int cxgb4vf_stop(struct net_device *dev)
813 {
814 	struct port_info *pi = netdev_priv(dev);
815 	struct adapter *adapter = pi->adapter;
816 
817 	netif_tx_stop_all_queues(dev);
818 	netif_carrier_off(dev);
819 	t4vf_enable_vi(adapter, pi->viid, false, false);
820 	pi->link_cfg.link_ok = 0;
821 
822 	clear_bit(pi->port_id, &adapter->open_device_map);
823 	if (adapter->open_device_map == 0)
824 		adapter_down(adapter);
825 	return 0;
826 }
827 
828 /*
829  * Translate our basic statistics into the standard "ifconfig" statistics.
830  */
831 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832 {
833 	struct t4vf_port_stats stats;
834 	struct port_info *pi = netdev2pinfo(dev);
835 	struct adapter *adapter = pi->adapter;
836 	struct net_device_stats *ns = &dev->stats;
837 	int err;
838 
839 	spin_lock(&adapter->stats_lock);
840 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841 	spin_unlock(&adapter->stats_lock);
842 
843 	memset(ns, 0, sizeof(*ns));
844 	if (err)
845 		return ns;
846 
847 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850 			  stats.tx_ucast_frames + stats.tx_offload_frames);
851 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852 			stats.rx_ucast_bytes);
853 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854 			  stats.rx_ucast_frames);
855 	ns->multicast = stats.rx_mcast_frames;
856 	ns->tx_errors = stats.tx_drop_frames;
857 	ns->rx_errors = stats.rx_err_frames;
858 
859 	return ns;
860 }
861 
862 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
863 {
864 	struct adapter *adapter = pi->adapter;
865 	u64 vec = 0;
866 	bool ucast = false;
867 	struct hash_mac_addr *entry;
868 
869 	/* Calculate the hash vector for the updated list and program it */
870 	list_for_each_entry(entry, &adapter->mac_hlist, list) {
871 		ucast |= is_unicast_ether_addr(entry->addr);
872 		vec |= (1ULL << hash_mac_addr(entry->addr));
873 	}
874 	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
875 }
876 
877 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
878 {
879 	struct port_info *pi = netdev_priv(netdev);
880 	struct adapter *adapter = pi->adapter;
881 	int ret;
882 	u64 mhash = 0;
883 	u64 uhash = 0;
884 	bool free = false;
885 	bool ucast = is_unicast_ether_addr(mac_addr);
886 	const u8 *maclist[1] = {mac_addr};
887 	struct hash_mac_addr *new_entry;
888 
889 	ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
890 				  NULL, ucast ? &uhash : &mhash, false);
891 	if (ret < 0)
892 		goto out;
893 	/* if hash != 0, then add the addr to hash addr list
894 	 * so on the end we will calculate the hash for the
895 	 * list and program it
896 	 */
897 	if (uhash || mhash) {
898 		new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
899 		if (!new_entry)
900 			return -ENOMEM;
901 		ether_addr_copy(new_entry->addr, mac_addr);
902 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
903 		ret = cxgb4vf_set_addr_hash(pi);
904 	}
905 out:
906 	return ret < 0 ? ret : 0;
907 }
908 
909 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
910 {
911 	struct port_info *pi = netdev_priv(netdev);
912 	struct adapter *adapter = pi->adapter;
913 	int ret;
914 	const u8 *maclist[1] = {mac_addr};
915 	struct hash_mac_addr *entry, *tmp;
916 
917 	/* If the MAC address to be removed is in the hash addr
918 	 * list, delete it from the list and update hash vector
919 	 */
920 	list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
921 		if (ether_addr_equal(entry->addr, mac_addr)) {
922 			list_del(&entry->list);
923 			kfree(entry);
924 			return cxgb4vf_set_addr_hash(pi);
925 		}
926 	}
927 
928 	ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
929 	return ret < 0 ? -EINVAL : 0;
930 }
931 
932 /*
933  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
934  * If @mtu is -1 it is left unchanged.
935  */
936 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
937 {
938 	struct port_info *pi = netdev_priv(dev);
939 
940 	if (!(dev->flags & IFF_PROMISC)) {
941 		__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
942 		if (!(dev->flags & IFF_ALLMULTI))
943 			__dev_mc_sync(dev, cxgb4vf_mac_sync,
944 				      cxgb4vf_mac_unsync);
945 	}
946 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
947 			       (dev->flags & IFF_PROMISC) != 0,
948 			       (dev->flags & IFF_ALLMULTI) != 0,
949 			       1, -1, sleep_ok);
950 }
951 
952 /*
953  * Set the current receive modes on the device.
954  */
955 static void cxgb4vf_set_rxmode(struct net_device *dev)
956 {
957 	/* unfortunately we can't return errors to the stack */
958 	set_rxmode(dev, -1, false);
959 }
960 
961 /*
962  * Find the entry in the interrupt holdoff timer value array which comes
963  * closest to the specified interrupt holdoff value.
964  */
965 static int closest_timer(const struct sge *s, int us)
966 {
967 	int i, timer_idx = 0, min_delta = INT_MAX;
968 
969 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
970 		int delta = us - s->timer_val[i];
971 		if (delta < 0)
972 			delta = -delta;
973 		if (delta < min_delta) {
974 			min_delta = delta;
975 			timer_idx = i;
976 		}
977 	}
978 	return timer_idx;
979 }
980 
981 static int closest_thres(const struct sge *s, int thres)
982 {
983 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
984 
985 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
986 		delta = thres - s->counter_val[i];
987 		if (delta < 0)
988 			delta = -delta;
989 		if (delta < min_delta) {
990 			min_delta = delta;
991 			pktcnt_idx = i;
992 		}
993 	}
994 	return pktcnt_idx;
995 }
996 
997 /*
998  * Return a queue's interrupt hold-off time in us.  0 means no timer.
999  */
1000 static unsigned int qtimer_val(const struct adapter *adapter,
1001 			       const struct sge_rspq *rspq)
1002 {
1003 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1004 
1005 	return timer_idx < SGE_NTIMERS
1006 		? adapter->sge.timer_val[timer_idx]
1007 		: 0;
1008 }
1009 
1010 /**
1011  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1012  *	@adapter: the adapter
1013  *	@rspq: the RX response queue
1014  *	@us: the hold-off time in us, or 0 to disable timer
1015  *	@cnt: the hold-off packet count, or 0 to disable counter
1016  *
1017  *	Sets an RX response queue's interrupt hold-off time and packet count.
1018  *	At least one of the two needs to be enabled for the queue to generate
1019  *	interrupts.
1020  */
1021 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1022 			       unsigned int us, unsigned int cnt)
1023 {
1024 	unsigned int timer_idx;
1025 
1026 	/*
1027 	 * If both the interrupt holdoff timer and count are specified as
1028 	 * zero, default to a holdoff count of 1 ...
1029 	 */
1030 	if ((us | cnt) == 0)
1031 		cnt = 1;
1032 
1033 	/*
1034 	 * If an interrupt holdoff count has been specified, then find the
1035 	 * closest configured holdoff count and use that.  If the response
1036 	 * queue has already been created, then update its queue context
1037 	 * parameters ...
1038 	 */
1039 	if (cnt) {
1040 		int err;
1041 		u32 v, pktcnt_idx;
1042 
1043 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1044 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1045 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1046 			    FW_PARAMS_PARAM_X_V(
1047 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1048 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1049 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1050 			if (err)
1051 				return err;
1052 		}
1053 		rspq->pktcnt_idx = pktcnt_idx;
1054 	}
1055 
1056 	/*
1057 	 * Compute the closest holdoff timer index from the supplied holdoff
1058 	 * timer value.
1059 	 */
1060 	timer_idx = (us == 0
1061 		     ? SGE_TIMER_RSTRT_CNTR
1062 		     : closest_timer(&adapter->sge, us));
1063 
1064 	/*
1065 	 * Update the response queue's interrupt coalescing parameters and
1066 	 * return success.
1067 	 */
1068 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1069 			     QINTR_CNT_EN_V(cnt > 0));
1070 	return 0;
1071 }
1072 
1073 /*
1074  * Return a version number to identify the type of adapter.  The scheme is:
1075  * - bits 0..9: chip version
1076  * - bits 10..15: chip revision
1077  */
1078 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1079 {
1080 	/*
1081 	 * Chip version 4, revision 0x3f (cxgb4vf).
1082 	 */
1083 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1084 }
1085 
1086 /*
1087  * Execute the specified ioctl command.
1088  */
1089 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1090 {
1091 	int ret = 0;
1092 
1093 	switch (cmd) {
1094 	    /*
1095 	     * The VF Driver doesn't have access to any of the other
1096 	     * common Ethernet device ioctl()'s (like reading/writing
1097 	     * PHY registers, etc.
1098 	     */
1099 
1100 	default:
1101 		ret = -EOPNOTSUPP;
1102 		break;
1103 	}
1104 	return ret;
1105 }
1106 
1107 /*
1108  * Change the device's MTU.
1109  */
1110 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1111 {
1112 	int ret;
1113 	struct port_info *pi = netdev_priv(dev);
1114 
1115 	/* accommodate SACK */
1116 	if (new_mtu < 81)
1117 		return -EINVAL;
1118 
1119 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1120 			      -1, -1, -1, -1, true);
1121 	if (!ret)
1122 		dev->mtu = new_mtu;
1123 	return ret;
1124 }
1125 
1126 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1127 	netdev_features_t features)
1128 {
1129 	/*
1130 	 * Since there is no support for separate rx/tx vlan accel
1131 	 * enable/disable make sure tx flag is always in same state as rx.
1132 	 */
1133 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1134 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1135 	else
1136 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1137 
1138 	return features;
1139 }
1140 
1141 static int cxgb4vf_set_features(struct net_device *dev,
1142 	netdev_features_t features)
1143 {
1144 	struct port_info *pi = netdev_priv(dev);
1145 	netdev_features_t changed = dev->features ^ features;
1146 
1147 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1148 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1149 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1150 
1151 	return 0;
1152 }
1153 
1154 /*
1155  * Change the devices MAC address.
1156  */
1157 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1158 {
1159 	int ret;
1160 	struct sockaddr *addr = _addr;
1161 	struct port_info *pi = netdev_priv(dev);
1162 
1163 	if (!is_valid_ether_addr(addr->sa_data))
1164 		return -EADDRNOTAVAIL;
1165 
1166 	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1167 			      addr->sa_data, true);
1168 	if (ret < 0)
1169 		return ret;
1170 
1171 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1172 	pi->xact_addr_filt = ret;
1173 	return 0;
1174 }
1175 
1176 #ifdef CONFIG_NET_POLL_CONTROLLER
1177 /*
1178  * Poll all of our receive queues.  This is called outside of normal interrupt
1179  * context.
1180  */
1181 static void cxgb4vf_poll_controller(struct net_device *dev)
1182 {
1183 	struct port_info *pi = netdev_priv(dev);
1184 	struct adapter *adapter = pi->adapter;
1185 
1186 	if (adapter->flags & USING_MSIX) {
1187 		struct sge_eth_rxq *rxq;
1188 		int nqsets;
1189 
1190 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1191 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1192 			t4vf_sge_intr_msix(0, &rxq->rspq);
1193 			rxq++;
1194 		}
1195 	} else
1196 		t4vf_intr_handler(adapter)(0, adapter);
1197 }
1198 #endif
1199 
1200 /*
1201  * Ethtool operations.
1202  * ===================
1203  *
1204  * Note that we don't support any ethtool operations which change the physical
1205  * state of the port to which we're linked.
1206  */
1207 
1208 static unsigned int t4vf_from_fw_linkcaps(enum fw_port_type type,
1209 					  unsigned int caps)
1210 {
1211 	unsigned int v = 0;
1212 
1213 	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1214 	    type == FW_PORT_TYPE_BT_XAUI) {
1215 		v |= SUPPORTED_TP;
1216 		if (caps & FW_PORT_CAP_SPEED_100M)
1217 			v |= SUPPORTED_100baseT_Full;
1218 		if (caps & FW_PORT_CAP_SPEED_1G)
1219 			v |= SUPPORTED_1000baseT_Full;
1220 		if (caps & FW_PORT_CAP_SPEED_10G)
1221 			v |= SUPPORTED_10000baseT_Full;
1222 	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1223 		v |= SUPPORTED_Backplane;
1224 		if (caps & FW_PORT_CAP_SPEED_1G)
1225 			v |= SUPPORTED_1000baseKX_Full;
1226 		if (caps & FW_PORT_CAP_SPEED_10G)
1227 			v |= SUPPORTED_10000baseKX4_Full;
1228 	} else if (type == FW_PORT_TYPE_KR)
1229 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1230 	else if (type == FW_PORT_TYPE_BP_AP)
1231 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1232 		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1233 	else if (type == FW_PORT_TYPE_BP4_AP)
1234 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1235 		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1236 		     SUPPORTED_10000baseKX4_Full;
1237 	else if (type == FW_PORT_TYPE_FIBER_XFI ||
1238 		 type == FW_PORT_TYPE_FIBER_XAUI ||
1239 		 type == FW_PORT_TYPE_SFP ||
1240 		 type == FW_PORT_TYPE_QSFP_10G ||
1241 		 type == FW_PORT_TYPE_QSA) {
1242 		v |= SUPPORTED_FIBRE;
1243 		if (caps & FW_PORT_CAP_SPEED_1G)
1244 			v |= SUPPORTED_1000baseT_Full;
1245 		if (caps & FW_PORT_CAP_SPEED_10G)
1246 			v |= SUPPORTED_10000baseT_Full;
1247 	} else if (type == FW_PORT_TYPE_BP40_BA ||
1248 		   type == FW_PORT_TYPE_QSFP) {
1249 		v |= SUPPORTED_40000baseSR4_Full;
1250 		v |= SUPPORTED_FIBRE;
1251 	}
1252 
1253 	if (caps & FW_PORT_CAP_ANEG)
1254 		v |= SUPPORTED_Autoneg;
1255 	return v;
1256 }
1257 
1258 static int cxgb4vf_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1259 {
1260 	const struct port_info *p = netdev_priv(dev);
1261 
1262 	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1263 	    p->port_type == FW_PORT_TYPE_BT_XFI ||
1264 	    p->port_type == FW_PORT_TYPE_BT_XAUI)
1265 		cmd->port = PORT_TP;
1266 	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1267 		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1268 		cmd->port = PORT_FIBRE;
1269 	else if (p->port_type == FW_PORT_TYPE_SFP ||
1270 		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
1271 		 p->port_type == FW_PORT_TYPE_QSA ||
1272 		 p->port_type == FW_PORT_TYPE_QSFP) {
1273 		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
1274 		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
1275 		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
1276 		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
1277 			cmd->port = PORT_FIBRE;
1278 		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1279 			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1280 			cmd->port = PORT_DA;
1281 		else
1282 			cmd->port = PORT_OTHER;
1283 	} else
1284 		cmd->port = PORT_OTHER;
1285 
1286 	if (p->mdio_addr >= 0) {
1287 		cmd->phy_address = p->mdio_addr;
1288 		cmd->transceiver = XCVR_EXTERNAL;
1289 		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1290 			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1291 	} else {
1292 		cmd->phy_address = 0;  /* not really, but no better option */
1293 		cmd->transceiver = XCVR_INTERNAL;
1294 		cmd->mdio_support = 0;
1295 	}
1296 
1297 	cmd->supported = t4vf_from_fw_linkcaps(p->port_type,
1298 					       p->link_cfg.supported);
1299 	cmd->advertising = t4vf_from_fw_linkcaps(p->port_type,
1300 					    p->link_cfg.advertising);
1301 	ethtool_cmd_speed_set(cmd,
1302 			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
1303 	cmd->duplex = DUPLEX_FULL;
1304 	cmd->autoneg = p->link_cfg.autoneg;
1305 	cmd->maxtxpkt = 0;
1306 	cmd->maxrxpkt = 0;
1307 	return 0;
1308 }
1309 
1310 /*
1311  * Return our driver information.
1312  */
1313 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1314 				struct ethtool_drvinfo *drvinfo)
1315 {
1316 	struct adapter *adapter = netdev2adap(dev);
1317 
1318 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1319 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1320 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1321 		sizeof(drvinfo->bus_info));
1322 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1323 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1324 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1325 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1326 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1327 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1328 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1329 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1330 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1331 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1332 }
1333 
1334 /*
1335  * Return current adapter message level.
1336  */
1337 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1338 {
1339 	return netdev2adap(dev)->msg_enable;
1340 }
1341 
1342 /*
1343  * Set current adapter message level.
1344  */
1345 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1346 {
1347 	netdev2adap(dev)->msg_enable = msglevel;
1348 }
1349 
1350 /*
1351  * Return the device's current Queue Set ring size parameters along with the
1352  * allowed maximum values.  Since ethtool doesn't understand the concept of
1353  * multi-queue devices, we just return the current values associated with the
1354  * first Queue Set.
1355  */
1356 static void cxgb4vf_get_ringparam(struct net_device *dev,
1357 				  struct ethtool_ringparam *rp)
1358 {
1359 	const struct port_info *pi = netdev_priv(dev);
1360 	const struct sge *s = &pi->adapter->sge;
1361 
1362 	rp->rx_max_pending = MAX_RX_BUFFERS;
1363 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1364 	rp->rx_jumbo_max_pending = 0;
1365 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1366 
1367 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1368 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1369 	rp->rx_jumbo_pending = 0;
1370 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1371 }
1372 
1373 /*
1374  * Set the Queue Set ring size parameters for the device.  Again, since
1375  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1376  * apply these new values across all of the Queue Sets associated with the
1377  * device -- after vetting them of course!
1378  */
1379 static int cxgb4vf_set_ringparam(struct net_device *dev,
1380 				 struct ethtool_ringparam *rp)
1381 {
1382 	const struct port_info *pi = netdev_priv(dev);
1383 	struct adapter *adapter = pi->adapter;
1384 	struct sge *s = &adapter->sge;
1385 	int qs;
1386 
1387 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1388 	    rp->rx_jumbo_pending ||
1389 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1390 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1391 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1392 	    rp->rx_pending < MIN_FL_ENTRIES ||
1393 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1394 		return -EINVAL;
1395 
1396 	if (adapter->flags & FULL_INIT_DONE)
1397 		return -EBUSY;
1398 
1399 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1400 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1401 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1402 		s->ethtxq[qs].q.size = rp->tx_pending;
1403 	}
1404 	return 0;
1405 }
1406 
1407 /*
1408  * Return the interrupt holdoff timer and count for the first Queue Set on the
1409  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1410  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1411  */
1412 static int cxgb4vf_get_coalesce(struct net_device *dev,
1413 				struct ethtool_coalesce *coalesce)
1414 {
1415 	const struct port_info *pi = netdev_priv(dev);
1416 	const struct adapter *adapter = pi->adapter;
1417 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1418 
1419 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1420 	coalesce->rx_max_coalesced_frames =
1421 		((rspq->intr_params & QINTR_CNT_EN_F)
1422 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1423 		 : 0);
1424 	return 0;
1425 }
1426 
1427 /*
1428  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1429  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1430  * the interrupt holdoff timer on any of the device's Queue Sets.
1431  */
1432 static int cxgb4vf_set_coalesce(struct net_device *dev,
1433 				struct ethtool_coalesce *coalesce)
1434 {
1435 	const struct port_info *pi = netdev_priv(dev);
1436 	struct adapter *adapter = pi->adapter;
1437 
1438 	return set_rxq_intr_params(adapter,
1439 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1440 				   coalesce->rx_coalesce_usecs,
1441 				   coalesce->rx_max_coalesced_frames);
1442 }
1443 
1444 /*
1445  * Report current port link pause parameter settings.
1446  */
1447 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1448 				   struct ethtool_pauseparam *pauseparam)
1449 {
1450 	struct port_info *pi = netdev_priv(dev);
1451 
1452 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1453 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1454 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1455 }
1456 
1457 /*
1458  * Identify the port by blinking the port's LED.
1459  */
1460 static int cxgb4vf_phys_id(struct net_device *dev,
1461 			   enum ethtool_phys_id_state state)
1462 {
1463 	unsigned int val;
1464 	struct port_info *pi = netdev_priv(dev);
1465 
1466 	if (state == ETHTOOL_ID_ACTIVE)
1467 		val = 0xffff;
1468 	else if (state == ETHTOOL_ID_INACTIVE)
1469 		val = 0;
1470 	else
1471 		return -EINVAL;
1472 
1473 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1474 }
1475 
1476 /*
1477  * Port stats maintained per queue of the port.
1478  */
1479 struct queue_port_stats {
1480 	u64 tso;
1481 	u64 tx_csum;
1482 	u64 rx_csum;
1483 	u64 vlan_ex;
1484 	u64 vlan_ins;
1485 	u64 lro_pkts;
1486 	u64 lro_merged;
1487 };
1488 
1489 /*
1490  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1491  * these need to match the order of statistics returned by
1492  * t4vf_get_port_stats().
1493  */
1494 static const char stats_strings[][ETH_GSTRING_LEN] = {
1495 	/*
1496 	 * These must match the layout of the t4vf_port_stats structure.
1497 	 */
1498 	"TxBroadcastBytes  ",
1499 	"TxBroadcastFrames ",
1500 	"TxMulticastBytes  ",
1501 	"TxMulticastFrames ",
1502 	"TxUnicastBytes    ",
1503 	"TxUnicastFrames   ",
1504 	"TxDroppedFrames   ",
1505 	"TxOffloadBytes    ",
1506 	"TxOffloadFrames   ",
1507 	"RxBroadcastBytes  ",
1508 	"RxBroadcastFrames ",
1509 	"RxMulticastBytes  ",
1510 	"RxMulticastFrames ",
1511 	"RxUnicastBytes    ",
1512 	"RxUnicastFrames   ",
1513 	"RxErrorFrames     ",
1514 
1515 	/*
1516 	 * These are accumulated per-queue statistics and must match the
1517 	 * order of the fields in the queue_port_stats structure.
1518 	 */
1519 	"TSO               ",
1520 	"TxCsumOffload     ",
1521 	"RxCsumGood        ",
1522 	"VLANextractions   ",
1523 	"VLANinsertions    ",
1524 	"GROPackets        ",
1525 	"GROMerged         ",
1526 };
1527 
1528 /*
1529  * Return the number of statistics in the specified statistics set.
1530  */
1531 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1532 {
1533 	switch (sset) {
1534 	case ETH_SS_STATS:
1535 		return ARRAY_SIZE(stats_strings);
1536 	default:
1537 		return -EOPNOTSUPP;
1538 	}
1539 	/*NOTREACHED*/
1540 }
1541 
1542 /*
1543  * Return the strings for the specified statistics set.
1544  */
1545 static void cxgb4vf_get_strings(struct net_device *dev,
1546 				u32 sset,
1547 				u8 *data)
1548 {
1549 	switch (sset) {
1550 	case ETH_SS_STATS:
1551 		memcpy(data, stats_strings, sizeof(stats_strings));
1552 		break;
1553 	}
1554 }
1555 
1556 /*
1557  * Small utility routine to accumulate queue statistics across the queues of
1558  * a "port".
1559  */
1560 static void collect_sge_port_stats(const struct adapter *adapter,
1561 				   const struct port_info *pi,
1562 				   struct queue_port_stats *stats)
1563 {
1564 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1565 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1566 	int qs;
1567 
1568 	memset(stats, 0, sizeof(*stats));
1569 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1570 		stats->tso += txq->tso;
1571 		stats->tx_csum += txq->tx_cso;
1572 		stats->rx_csum += rxq->stats.rx_cso;
1573 		stats->vlan_ex += rxq->stats.vlan_ex;
1574 		stats->vlan_ins += txq->vlan_ins;
1575 		stats->lro_pkts += rxq->stats.lro_pkts;
1576 		stats->lro_merged += rxq->stats.lro_merged;
1577 	}
1578 }
1579 
1580 /*
1581  * Return the ETH_SS_STATS statistics set.
1582  */
1583 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1584 				      struct ethtool_stats *stats,
1585 				      u64 *data)
1586 {
1587 	struct port_info *pi = netdev2pinfo(dev);
1588 	struct adapter *adapter = pi->adapter;
1589 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1590 				      (struct t4vf_port_stats *)data);
1591 	if (err)
1592 		memset(data, 0, sizeof(struct t4vf_port_stats));
1593 
1594 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1595 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1596 }
1597 
1598 /*
1599  * Return the size of our register map.
1600  */
1601 static int cxgb4vf_get_regs_len(struct net_device *dev)
1602 {
1603 	return T4VF_REGMAP_SIZE;
1604 }
1605 
1606 /*
1607  * Dump a block of registers, start to end inclusive, into a buffer.
1608  */
1609 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1610 			   unsigned int start, unsigned int end)
1611 {
1612 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1613 
1614 	for ( ; start <= end; start += sizeof(u32)) {
1615 		/*
1616 		 * Avoid reading the Mailbox Control register since that
1617 		 * can trigger a Mailbox Ownership Arbitration cycle and
1618 		 * interfere with communication with the firmware.
1619 		 */
1620 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1621 			*bp++ = 0xffff;
1622 		else
1623 			*bp++ = t4_read_reg(adapter, start);
1624 	}
1625 }
1626 
1627 /*
1628  * Copy our entire register map into the provided buffer.
1629  */
1630 static void cxgb4vf_get_regs(struct net_device *dev,
1631 			     struct ethtool_regs *regs,
1632 			     void *regbuf)
1633 {
1634 	struct adapter *adapter = netdev2adap(dev);
1635 
1636 	regs->version = mk_adap_vers(adapter);
1637 
1638 	/*
1639 	 * Fill in register buffer with our register map.
1640 	 */
1641 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1642 
1643 	reg_block_dump(adapter, regbuf,
1644 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1645 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1646 	reg_block_dump(adapter, regbuf,
1647 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1648 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1649 
1650 	/* T5 adds new registers in the PL Register map.
1651 	 */
1652 	reg_block_dump(adapter, regbuf,
1653 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1654 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1655 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1656 	reg_block_dump(adapter, regbuf,
1657 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1658 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1659 
1660 	reg_block_dump(adapter, regbuf,
1661 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1662 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1663 }
1664 
1665 /*
1666  * Report current Wake On LAN settings.
1667  */
1668 static void cxgb4vf_get_wol(struct net_device *dev,
1669 			    struct ethtool_wolinfo *wol)
1670 {
1671 	wol->supported = 0;
1672 	wol->wolopts = 0;
1673 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1674 }
1675 
1676 /*
1677  * TCP Segmentation Offload flags which we support.
1678  */
1679 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1680 
1681 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1682 	.get_settings		= cxgb4vf_get_settings,
1683 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1684 	.get_msglevel		= cxgb4vf_get_msglevel,
1685 	.set_msglevel		= cxgb4vf_set_msglevel,
1686 	.get_ringparam		= cxgb4vf_get_ringparam,
1687 	.set_ringparam		= cxgb4vf_set_ringparam,
1688 	.get_coalesce		= cxgb4vf_get_coalesce,
1689 	.set_coalesce		= cxgb4vf_set_coalesce,
1690 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1691 	.get_link		= ethtool_op_get_link,
1692 	.get_strings		= cxgb4vf_get_strings,
1693 	.set_phys_id		= cxgb4vf_phys_id,
1694 	.get_sset_count		= cxgb4vf_get_sset_count,
1695 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1696 	.get_regs_len		= cxgb4vf_get_regs_len,
1697 	.get_regs		= cxgb4vf_get_regs,
1698 	.get_wol		= cxgb4vf_get_wol,
1699 };
1700 
1701 /*
1702  * /sys/kernel/debug/cxgb4vf support code and data.
1703  * ================================================
1704  */
1705 
1706 /*
1707  * Show Firmware Mailbox Command/Reply Log
1708  *
1709  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1710  * it's possible that we can catch things during a log update and therefore
1711  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1712  * If we ever decide that we want to make sure that we're dumping a coherent
1713  * log, we'd need to perform locking in the mailbox logging and in
1714  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1715  * like we do for the Firmware Device Log.  But as stated above, meh ...
1716  */
1717 static int mboxlog_show(struct seq_file *seq, void *v)
1718 {
1719 	struct adapter *adapter = seq->private;
1720 	struct mbox_cmd_log *log = adapter->mbox_log;
1721 	struct mbox_cmd *entry;
1722 	int entry_idx, i;
1723 
1724 	if (v == SEQ_START_TOKEN) {
1725 		seq_printf(seq,
1726 			   "%10s  %15s  %5s  %5s  %s\n",
1727 			   "Seq#", "Tstamp", "Atime", "Etime",
1728 			   "Command/Reply");
1729 		return 0;
1730 	}
1731 
1732 	entry_idx = log->cursor + ((uintptr_t)v - 2);
1733 	if (entry_idx >= log->size)
1734 		entry_idx -= log->size;
1735 	entry = mbox_cmd_log_entry(log, entry_idx);
1736 
1737 	/* skip over unused entries */
1738 	if (entry->timestamp == 0)
1739 		return 0;
1740 
1741 	seq_printf(seq, "%10u  %15llu  %5d  %5d",
1742 		   entry->seqno, entry->timestamp,
1743 		   entry->access, entry->execute);
1744 	for (i = 0; i < MBOX_LEN / 8; i++) {
1745 		u64 flit = entry->cmd[i];
1746 		u32 hi = (u32)(flit >> 32);
1747 		u32 lo = (u32)flit;
1748 
1749 		seq_printf(seq, "  %08x %08x", hi, lo);
1750 	}
1751 	seq_puts(seq, "\n");
1752 	return 0;
1753 }
1754 
1755 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1756 {
1757 	struct adapter *adapter = seq->private;
1758 	struct mbox_cmd_log *log = adapter->mbox_log;
1759 
1760 	return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1761 }
1762 
1763 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1764 {
1765 	return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1766 }
1767 
1768 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1769 {
1770 	++*pos;
1771 	return mboxlog_get_idx(seq, *pos);
1772 }
1773 
1774 static void mboxlog_stop(struct seq_file *seq, void *v)
1775 {
1776 }
1777 
1778 static const struct seq_operations mboxlog_seq_ops = {
1779 	.start = mboxlog_start,
1780 	.next  = mboxlog_next,
1781 	.stop  = mboxlog_stop,
1782 	.show  = mboxlog_show
1783 };
1784 
1785 static int mboxlog_open(struct inode *inode, struct file *file)
1786 {
1787 	int res = seq_open(file, &mboxlog_seq_ops);
1788 
1789 	if (!res) {
1790 		struct seq_file *seq = file->private_data;
1791 
1792 		seq->private = inode->i_private;
1793 	}
1794 	return res;
1795 }
1796 
1797 static const struct file_operations mboxlog_fops = {
1798 	.owner   = THIS_MODULE,
1799 	.open    = mboxlog_open,
1800 	.read    = seq_read,
1801 	.llseek  = seq_lseek,
1802 	.release = seq_release,
1803 };
1804 
1805 /*
1806  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1807  */
1808 #define QPL	4
1809 
1810 static int sge_qinfo_show(struct seq_file *seq, void *v)
1811 {
1812 	struct adapter *adapter = seq->private;
1813 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1814 	int qs, r = (uintptr_t)v - 1;
1815 
1816 	if (r)
1817 		seq_putc(seq, '\n');
1818 
1819 	#define S3(fmt_spec, s, v) \
1820 		do {\
1821 			seq_printf(seq, "%-12s", s); \
1822 			for (qs = 0; qs < n; ++qs) \
1823 				seq_printf(seq, " %16" fmt_spec, v); \
1824 			seq_putc(seq, '\n'); \
1825 		} while (0)
1826 	#define S(s, v)		S3("s", s, v)
1827 	#define T(s, v)		S3("u", s, txq[qs].v)
1828 	#define R(s, v)		S3("u", s, rxq[qs].v)
1829 
1830 	if (r < eth_entries) {
1831 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1832 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1833 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1834 
1835 		S("QType:", "Ethernet");
1836 		S("Interface:",
1837 		  (rxq[qs].rspq.netdev
1838 		   ? rxq[qs].rspq.netdev->name
1839 		   : "N/A"));
1840 		S3("d", "Port:",
1841 		   (rxq[qs].rspq.netdev
1842 		    ? ((struct port_info *)
1843 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1844 		    : -1));
1845 		T("TxQ ID:", q.abs_id);
1846 		T("TxQ size:", q.size);
1847 		T("TxQ inuse:", q.in_use);
1848 		T("TxQ PIdx:", q.pidx);
1849 		T("TxQ CIdx:", q.cidx);
1850 		R("RspQ ID:", rspq.abs_id);
1851 		R("RspQ size:", rspq.size);
1852 		R("RspQE size:", rspq.iqe_len);
1853 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1854 		S3("u", "Intr pktcnt:",
1855 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1856 		R("RspQ CIdx:", rspq.cidx);
1857 		R("RspQ Gen:", rspq.gen);
1858 		R("FL ID:", fl.abs_id);
1859 		R("FL size:", fl.size - MIN_FL_RESID);
1860 		R("FL avail:", fl.avail);
1861 		R("FL PIdx:", fl.pidx);
1862 		R("FL CIdx:", fl.cidx);
1863 		return 0;
1864 	}
1865 
1866 	r -= eth_entries;
1867 	if (r == 0) {
1868 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1869 
1870 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1871 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1872 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1873 			   qtimer_val(adapter, evtq));
1874 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1875 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1876 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1877 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1878 	} else if (r == 1) {
1879 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1880 
1881 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1882 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1883 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1884 			   qtimer_val(adapter, intrq));
1885 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1886 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1887 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1888 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1889 	}
1890 
1891 	#undef R
1892 	#undef T
1893 	#undef S
1894 	#undef S3
1895 
1896 	return 0;
1897 }
1898 
1899 /*
1900  * Return the number of "entries" in our "file".  We group the multi-Queue
1901  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1902  *
1903  *     Ethernet RX/TX Queue Sets
1904  *     Firmware Event Queue
1905  *     Forwarded Interrupt Queue (if in MSI mode)
1906  */
1907 static int sge_queue_entries(const struct adapter *adapter)
1908 {
1909 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1910 		((adapter->flags & USING_MSI) != 0);
1911 }
1912 
1913 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1914 {
1915 	int entries = sge_queue_entries(seq->private);
1916 
1917 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1918 }
1919 
1920 static void sge_queue_stop(struct seq_file *seq, void *v)
1921 {
1922 }
1923 
1924 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1925 {
1926 	int entries = sge_queue_entries(seq->private);
1927 
1928 	++*pos;
1929 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1930 }
1931 
1932 static const struct seq_operations sge_qinfo_seq_ops = {
1933 	.start = sge_queue_start,
1934 	.next  = sge_queue_next,
1935 	.stop  = sge_queue_stop,
1936 	.show  = sge_qinfo_show
1937 };
1938 
1939 static int sge_qinfo_open(struct inode *inode, struct file *file)
1940 {
1941 	int res = seq_open(file, &sge_qinfo_seq_ops);
1942 
1943 	if (!res) {
1944 		struct seq_file *seq = file->private_data;
1945 		seq->private = inode->i_private;
1946 	}
1947 	return res;
1948 }
1949 
1950 static const struct file_operations sge_qinfo_debugfs_fops = {
1951 	.owner   = THIS_MODULE,
1952 	.open    = sge_qinfo_open,
1953 	.read    = seq_read,
1954 	.llseek  = seq_lseek,
1955 	.release = seq_release,
1956 };
1957 
1958 /*
1959  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1960  */
1961 #define QPL	4
1962 
1963 static int sge_qstats_show(struct seq_file *seq, void *v)
1964 {
1965 	struct adapter *adapter = seq->private;
1966 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1967 	int qs, r = (uintptr_t)v - 1;
1968 
1969 	if (r)
1970 		seq_putc(seq, '\n');
1971 
1972 	#define S3(fmt, s, v) \
1973 		do { \
1974 			seq_printf(seq, "%-16s", s); \
1975 			for (qs = 0; qs < n; ++qs) \
1976 				seq_printf(seq, " %8" fmt, v); \
1977 			seq_putc(seq, '\n'); \
1978 		} while (0)
1979 	#define S(s, v)		S3("s", s, v)
1980 
1981 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1982 	#define T(s, v)		T3("lu", s, v)
1983 
1984 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1985 	#define R(s, v)		R3("lu", s, v)
1986 
1987 	if (r < eth_entries) {
1988 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1989 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1990 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1991 
1992 		S("QType:", "Ethernet");
1993 		S("Interface:",
1994 		  (rxq[qs].rspq.netdev
1995 		   ? rxq[qs].rspq.netdev->name
1996 		   : "N/A"));
1997 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1998 		R("RxPackets:", stats.pkts);
1999 		R("RxCSO:", stats.rx_cso);
2000 		R("VLANxtract:", stats.vlan_ex);
2001 		R("LROmerged:", stats.lro_merged);
2002 		R("LROpackets:", stats.lro_pkts);
2003 		R("RxDrops:", stats.rx_drops);
2004 		T("TSO:", tso);
2005 		T("TxCSO:", tx_cso);
2006 		T("VLANins:", vlan_ins);
2007 		T("TxQFull:", q.stops);
2008 		T("TxQRestarts:", q.restarts);
2009 		T("TxMapErr:", mapping_err);
2010 		R("FLAllocErr:", fl.alloc_failed);
2011 		R("FLLrgAlcErr:", fl.large_alloc_failed);
2012 		R("FLStarving:", fl.starving);
2013 		return 0;
2014 	}
2015 
2016 	r -= eth_entries;
2017 	if (r == 0) {
2018 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2019 
2020 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2021 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2022 			   evtq->unhandled_irqs);
2023 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2024 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2025 	} else if (r == 1) {
2026 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2027 
2028 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2029 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2030 			   intrq->unhandled_irqs);
2031 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2032 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2033 	}
2034 
2035 	#undef R
2036 	#undef T
2037 	#undef S
2038 	#undef R3
2039 	#undef T3
2040 	#undef S3
2041 
2042 	return 0;
2043 }
2044 
2045 /*
2046  * Return the number of "entries" in our "file".  We group the multi-Queue
2047  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2048  *
2049  *     Ethernet RX/TX Queue Sets
2050  *     Firmware Event Queue
2051  *     Forwarded Interrupt Queue (if in MSI mode)
2052  */
2053 static int sge_qstats_entries(const struct adapter *adapter)
2054 {
2055 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2056 		((adapter->flags & USING_MSI) != 0);
2057 }
2058 
2059 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2060 {
2061 	int entries = sge_qstats_entries(seq->private);
2062 
2063 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2064 }
2065 
2066 static void sge_qstats_stop(struct seq_file *seq, void *v)
2067 {
2068 }
2069 
2070 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2071 {
2072 	int entries = sge_qstats_entries(seq->private);
2073 
2074 	(*pos)++;
2075 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2076 }
2077 
2078 static const struct seq_operations sge_qstats_seq_ops = {
2079 	.start = sge_qstats_start,
2080 	.next  = sge_qstats_next,
2081 	.stop  = sge_qstats_stop,
2082 	.show  = sge_qstats_show
2083 };
2084 
2085 static int sge_qstats_open(struct inode *inode, struct file *file)
2086 {
2087 	int res = seq_open(file, &sge_qstats_seq_ops);
2088 
2089 	if (res == 0) {
2090 		struct seq_file *seq = file->private_data;
2091 		seq->private = inode->i_private;
2092 	}
2093 	return res;
2094 }
2095 
2096 static const struct file_operations sge_qstats_proc_fops = {
2097 	.owner   = THIS_MODULE,
2098 	.open    = sge_qstats_open,
2099 	.read    = seq_read,
2100 	.llseek  = seq_lseek,
2101 	.release = seq_release,
2102 };
2103 
2104 /*
2105  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2106  */
2107 static int resources_show(struct seq_file *seq, void *v)
2108 {
2109 	struct adapter *adapter = seq->private;
2110 	struct vf_resources *vfres = &adapter->params.vfres;
2111 
2112 	#define S(desc, fmt, var) \
2113 		seq_printf(seq, "%-60s " fmt "\n", \
2114 			   desc " (" #var "):", vfres->var)
2115 
2116 	S("Virtual Interfaces", "%d", nvi);
2117 	S("Egress Queues", "%d", neq);
2118 	S("Ethernet Control", "%d", nethctrl);
2119 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2120 	S("Ingress Queues", "%d", niq);
2121 	S("Traffic Class", "%d", tc);
2122 	S("Port Access Rights Mask", "%#x", pmask);
2123 	S("MAC Address Filters", "%d", nexactf);
2124 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2125 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2126 
2127 	#undef S
2128 
2129 	return 0;
2130 }
2131 
2132 static int resources_open(struct inode *inode, struct file *file)
2133 {
2134 	return single_open(file, resources_show, inode->i_private);
2135 }
2136 
2137 static const struct file_operations resources_proc_fops = {
2138 	.owner   = THIS_MODULE,
2139 	.open    = resources_open,
2140 	.read    = seq_read,
2141 	.llseek  = seq_lseek,
2142 	.release = single_release,
2143 };
2144 
2145 /*
2146  * Show Virtual Interfaces.
2147  */
2148 static int interfaces_show(struct seq_file *seq, void *v)
2149 {
2150 	if (v == SEQ_START_TOKEN) {
2151 		seq_puts(seq, "Interface  Port   VIID\n");
2152 	} else {
2153 		struct adapter *adapter = seq->private;
2154 		int pidx = (uintptr_t)v - 2;
2155 		struct net_device *dev = adapter->port[pidx];
2156 		struct port_info *pi = netdev_priv(dev);
2157 
2158 		seq_printf(seq, "%9s  %4d  %#5x\n",
2159 			   dev->name, pi->port_id, pi->viid);
2160 	}
2161 	return 0;
2162 }
2163 
2164 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2165 {
2166 	return pos <= adapter->params.nports
2167 		? (void *)(uintptr_t)(pos + 1)
2168 		: NULL;
2169 }
2170 
2171 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2172 {
2173 	return *pos
2174 		? interfaces_get_idx(seq->private, *pos)
2175 		: SEQ_START_TOKEN;
2176 }
2177 
2178 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2179 {
2180 	(*pos)++;
2181 	return interfaces_get_idx(seq->private, *pos);
2182 }
2183 
2184 static void interfaces_stop(struct seq_file *seq, void *v)
2185 {
2186 }
2187 
2188 static const struct seq_operations interfaces_seq_ops = {
2189 	.start = interfaces_start,
2190 	.next  = interfaces_next,
2191 	.stop  = interfaces_stop,
2192 	.show  = interfaces_show
2193 };
2194 
2195 static int interfaces_open(struct inode *inode, struct file *file)
2196 {
2197 	int res = seq_open(file, &interfaces_seq_ops);
2198 
2199 	if (res == 0) {
2200 		struct seq_file *seq = file->private_data;
2201 		seq->private = inode->i_private;
2202 	}
2203 	return res;
2204 }
2205 
2206 static const struct file_operations interfaces_proc_fops = {
2207 	.owner   = THIS_MODULE,
2208 	.open    = interfaces_open,
2209 	.read    = seq_read,
2210 	.llseek  = seq_lseek,
2211 	.release = seq_release,
2212 };
2213 
2214 /*
2215  * /sys/kernel/debugfs/cxgb4vf/ files list.
2216  */
2217 struct cxgb4vf_debugfs_entry {
2218 	const char *name;		/* name of debugfs node */
2219 	umode_t mode;			/* file system mode */
2220 	const struct file_operations *fops;
2221 };
2222 
2223 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2224 	{ "mboxlog",    S_IRUGO, &mboxlog_fops },
2225 	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2226 	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2227 	{ "resources",  S_IRUGO, &resources_proc_fops },
2228 	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2229 };
2230 
2231 /*
2232  * Module and device initialization and cleanup code.
2233  * ==================================================
2234  */
2235 
2236 /*
2237  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2238  * directory (debugfs_root) has already been set up.
2239  */
2240 static int setup_debugfs(struct adapter *adapter)
2241 {
2242 	int i;
2243 
2244 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2245 
2246 	/*
2247 	 * Debugfs support is best effort.
2248 	 */
2249 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2250 		(void)debugfs_create_file(debugfs_files[i].name,
2251 				  debugfs_files[i].mode,
2252 				  adapter->debugfs_root,
2253 				  (void *)adapter,
2254 				  debugfs_files[i].fops);
2255 
2256 	return 0;
2257 }
2258 
2259 /*
2260  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2261  * it to our caller to tear down the directory (debugfs_root).
2262  */
2263 static void cleanup_debugfs(struct adapter *adapter)
2264 {
2265 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2266 
2267 	/*
2268 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2269 	 * individual entries because a call will be made to
2270 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2271 	 * persistent state.
2272 	 */
2273 	/* nothing to do */
2274 }
2275 
2276 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2277  * knowing our Virtual Function Resources and may be called a second time if
2278  * we fall back from MSI-X to MSI Interrupt Mode.
2279  */
2280 static void size_nports_qsets(struct adapter *adapter)
2281 {
2282 	struct vf_resources *vfres = &adapter->params.vfres;
2283 	unsigned int ethqsets, pmask_nports;
2284 
2285 	/* The number of "ports" which we support is equal to the number of
2286 	 * Virtual Interfaces with which we've been provisioned.
2287 	 */
2288 	adapter->params.nports = vfres->nvi;
2289 	if (adapter->params.nports > MAX_NPORTS) {
2290 		dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2291 			 " allowed virtual interfaces\n", MAX_NPORTS,
2292 			 adapter->params.nports);
2293 		adapter->params.nports = MAX_NPORTS;
2294 	}
2295 
2296 	/* We may have been provisioned with more VIs than the number of
2297 	 * ports we're allowed to access (our Port Access Rights Mask).
2298 	 * This is obviously a configuration conflict but we don't want to
2299 	 * crash the kernel or anything silly just because of that.
2300 	 */
2301 	pmask_nports = hweight32(adapter->params.vfres.pmask);
2302 	if (pmask_nports < adapter->params.nports) {
2303 		dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
2304 			 " virtual interfaces; limited by Port Access Rights"
2305 			 " mask %#x\n", pmask_nports, adapter->params.nports,
2306 			 adapter->params.vfres.pmask);
2307 		adapter->params.nports = pmask_nports;
2308 	}
2309 
2310 	/* We need to reserve an Ingress Queue for the Asynchronous Firmware
2311 	 * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2312 	 * reserve an Ingress Queue for a Forwarded Interrupts.
2313 	 *
2314 	 * The rest of the FL/Intr-capable ingress queues will be matched up
2315 	 * one-for-one with Ethernet/Control egress queues in order to form
2316 	 * "Queue Sets" which will be aportioned between the "ports".  For
2317 	 * each Queue Set, we'll need the ability to allocate two Egress
2318 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2319 	 * Ethernet Queue.
2320 	 *
2321 	 * Note that even if we're currently configured to use MSI-X
2322 	 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2323 	 * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2324 	 * happens we'll need to adjust things later.
2325 	 */
2326 	ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2327 	if (vfres->nethctrl != ethqsets)
2328 		ethqsets = min(vfres->nethctrl, ethqsets);
2329 	if (vfres->neq < ethqsets*2)
2330 		ethqsets = vfres->neq/2;
2331 	if (ethqsets > MAX_ETH_QSETS)
2332 		ethqsets = MAX_ETH_QSETS;
2333 	adapter->sge.max_ethqsets = ethqsets;
2334 
2335 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2336 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2337 			 " virtual interfaces (too few Queue Sets)\n",
2338 			 adapter->sge.max_ethqsets, adapter->params.nports);
2339 		adapter->params.nports = adapter->sge.max_ethqsets;
2340 	}
2341 }
2342 
2343 /*
2344  * Perform early "adapter" initialization.  This is where we discover what
2345  * adapter parameters we're going to be using and initialize basic adapter
2346  * hardware support.
2347  */
2348 static int adap_init0(struct adapter *adapter)
2349 {
2350 	struct sge_params *sge_params = &adapter->params.sge;
2351 	struct sge *s = &adapter->sge;
2352 	int err;
2353 	u32 param, val = 0;
2354 
2355 	/*
2356 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2357 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2358 	 * issue an FLR because of a self- deadlock on the device semaphore.
2359 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2360 	 * cases where they're needed -- for instance, some versions of KVM
2361 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2362 	 * use the firmware based reset in order to reset any per function
2363 	 * state.
2364 	 */
2365 	err = t4vf_fw_reset(adapter);
2366 	if (err < 0) {
2367 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2368 		return err;
2369 	}
2370 
2371 	/*
2372 	 * Grab basic operational parameters.  These will predominantly have
2373 	 * been set up by the Physical Function Driver or will be hard coded
2374 	 * into the adapter.  We just have to live with them ...  Note that
2375 	 * we _must_ get our VPD parameters before our SGE parameters because
2376 	 * we need to know the adapter's core clock from the VPD in order to
2377 	 * properly decode the SGE Timer Values.
2378 	 */
2379 	err = t4vf_get_dev_params(adapter);
2380 	if (err) {
2381 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2382 			" device parameters: err=%d\n", err);
2383 		return err;
2384 	}
2385 	err = t4vf_get_vpd_params(adapter);
2386 	if (err) {
2387 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2388 			" VPD parameters: err=%d\n", err);
2389 		return err;
2390 	}
2391 	err = t4vf_get_sge_params(adapter);
2392 	if (err) {
2393 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2394 			" SGE parameters: err=%d\n", err);
2395 		return err;
2396 	}
2397 	err = t4vf_get_rss_glb_config(adapter);
2398 	if (err) {
2399 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2400 			" RSS parameters: err=%d\n", err);
2401 		return err;
2402 	}
2403 	if (adapter->params.rss.mode !=
2404 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2405 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2406 			" mode %d\n", adapter->params.rss.mode);
2407 		return -EINVAL;
2408 	}
2409 	err = t4vf_sge_init(adapter);
2410 	if (err) {
2411 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2412 			" err=%d\n", err);
2413 		return err;
2414 	}
2415 
2416 	/* If we're running on newer firmware, let it know that we're
2417 	 * prepared to deal with encapsulated CPL messages.  Older
2418 	 * firmware won't understand this and we'll just get
2419 	 * unencapsulated messages ...
2420 	 */
2421 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2422 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2423 	val = 1;
2424 	(void) t4vf_set_params(adapter, 1, &param, &val);
2425 
2426 	/*
2427 	 * Retrieve our RX interrupt holdoff timer values and counter
2428 	 * threshold values from the SGE parameters.
2429 	 */
2430 	s->timer_val[0] = core_ticks_to_us(adapter,
2431 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2432 	s->timer_val[1] = core_ticks_to_us(adapter,
2433 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2434 	s->timer_val[2] = core_ticks_to_us(adapter,
2435 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2436 	s->timer_val[3] = core_ticks_to_us(adapter,
2437 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2438 	s->timer_val[4] = core_ticks_to_us(adapter,
2439 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2440 	s->timer_val[5] = core_ticks_to_us(adapter,
2441 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2442 
2443 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2444 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2445 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2446 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2447 
2448 	/*
2449 	 * Grab our Virtual Interface resource allocation, extract the
2450 	 * features that we're interested in and do a bit of sanity testing on
2451 	 * what we discover.
2452 	 */
2453 	err = t4vf_get_vfres(adapter);
2454 	if (err) {
2455 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2456 			" resources: err=%d\n", err);
2457 		return err;
2458 	}
2459 
2460 	/* Check for various parameter sanity issues */
2461 	if (adapter->params.vfres.pmask == 0) {
2462 		dev_err(adapter->pdev_dev, "no port access configured\n"
2463 			"usable!\n");
2464 		return -EINVAL;
2465 	}
2466 	if (adapter->params.vfres.nvi == 0) {
2467 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2468 			"usable!\n");
2469 		return -EINVAL;
2470 	}
2471 
2472 	/* Initialize nports and max_ethqsets now that we have our Virtual
2473 	 * Function Resources.
2474 	 */
2475 	size_nports_qsets(adapter);
2476 
2477 	return 0;
2478 }
2479 
2480 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2481 			     u8 pkt_cnt_idx, unsigned int size,
2482 			     unsigned int iqe_size)
2483 {
2484 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2485 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2486 			      QINTR_CNT_EN_F : 0));
2487 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2488 			    ? pkt_cnt_idx
2489 			    : 0);
2490 	rspq->iqe_len = iqe_size;
2491 	rspq->size = size;
2492 }
2493 
2494 /*
2495  * Perform default configuration of DMA queues depending on the number and
2496  * type of ports we found and the number of available CPUs.  Most settings can
2497  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2498  * being brought up for the first time.
2499  */
2500 static void cfg_queues(struct adapter *adapter)
2501 {
2502 	struct sge *s = &adapter->sge;
2503 	int q10g, n10g, qidx, pidx, qs;
2504 	size_t iqe_size;
2505 
2506 	/*
2507 	 * We should not be called till we know how many Queue Sets we can
2508 	 * support.  In particular, this means that we need to know what kind
2509 	 * of interrupts we'll be using ...
2510 	 */
2511 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2512 
2513 	/*
2514 	 * Count the number of 10GbE Virtual Interfaces that we have.
2515 	 */
2516 	n10g = 0;
2517 	for_each_port(adapter, pidx)
2518 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2519 
2520 	/*
2521 	 * We default to 1 queue per non-10G port and up to # of cores queues
2522 	 * per 10G port.
2523 	 */
2524 	if (n10g == 0)
2525 		q10g = 0;
2526 	else {
2527 		int n1g = (adapter->params.nports - n10g);
2528 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2529 		if (q10g > num_online_cpus())
2530 			q10g = num_online_cpus();
2531 	}
2532 
2533 	/*
2534 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2535 	 * The layout will be established in setup_sge_queues() when the
2536 	 * adapter is brough up for the first time.
2537 	 */
2538 	qidx = 0;
2539 	for_each_port(adapter, pidx) {
2540 		struct port_info *pi = adap2pinfo(adapter, pidx);
2541 
2542 		pi->first_qset = qidx;
2543 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2544 		qidx += pi->nqsets;
2545 	}
2546 	s->ethqsets = qidx;
2547 
2548 	/*
2549 	 * The Ingress Queue Entry Size for our various Response Queues needs
2550 	 * to be big enough to accommodate the largest message we can receive
2551 	 * from the chip/firmware; which is 64 bytes ...
2552 	 */
2553 	iqe_size = 64;
2554 
2555 	/*
2556 	 * Set up default Queue Set parameters ...  Start off with the
2557 	 * shortest interrupt holdoff timer.
2558 	 */
2559 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2560 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2561 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2562 
2563 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2564 		rxq->fl.size = 72;
2565 		txq->q.size = 1024;
2566 	}
2567 
2568 	/*
2569 	 * The firmware event queue is used for link state changes and
2570 	 * notifications of TX DMA completions.
2571 	 */
2572 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2573 
2574 	/*
2575 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2576 	 * mode.  In this mode all interrupts associated with RX queues will
2577 	 * be forwarded to a single queue which we'll associate with our MSI
2578 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2579 	 * queue will indicate which ingress queue needs servicing ...  This
2580 	 * queue needs to be large enough to accommodate all of the ingress
2581 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2582 	 * from equalling the CIDX if every ingress queue has an outstanding
2583 	 * interrupt).  The queue doesn't need to be any larger because no
2584 	 * ingress queue will ever have more than one outstanding interrupt at
2585 	 * any time ...
2586 	 */
2587 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2588 		  iqe_size);
2589 }
2590 
2591 /*
2592  * Reduce the number of Ethernet queues across all ports to at most n.
2593  * n provides at least one queue per port.
2594  */
2595 static void reduce_ethqs(struct adapter *adapter, int n)
2596 {
2597 	int i;
2598 	struct port_info *pi;
2599 
2600 	/*
2601 	 * While we have too many active Ether Queue Sets, interate across the
2602 	 * "ports" and reduce their individual Queue Set allocations.
2603 	 */
2604 	BUG_ON(n < adapter->params.nports);
2605 	while (n < adapter->sge.ethqsets)
2606 		for_each_port(adapter, i) {
2607 			pi = adap2pinfo(adapter, i);
2608 			if (pi->nqsets > 1) {
2609 				pi->nqsets--;
2610 				adapter->sge.ethqsets--;
2611 				if (adapter->sge.ethqsets <= n)
2612 					break;
2613 			}
2614 		}
2615 
2616 	/*
2617 	 * Reassign the starting Queue Sets for each of the "ports" ...
2618 	 */
2619 	n = 0;
2620 	for_each_port(adapter, i) {
2621 		pi = adap2pinfo(adapter, i);
2622 		pi->first_qset = n;
2623 		n += pi->nqsets;
2624 	}
2625 }
2626 
2627 /*
2628  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2629  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2630  * need.  Minimally we need one for every Virtual Interface plus those needed
2631  * for our "extras".  Note that this process may lower the maximum number of
2632  * allowed Queue Sets ...
2633  */
2634 static int enable_msix(struct adapter *adapter)
2635 {
2636 	int i, want, need, nqsets;
2637 	struct msix_entry entries[MSIX_ENTRIES];
2638 	struct sge *s = &adapter->sge;
2639 
2640 	for (i = 0; i < MSIX_ENTRIES; ++i)
2641 		entries[i].entry = i;
2642 
2643 	/*
2644 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2645 	 * plus those needed for our "extras" (for example, the firmware
2646 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2647 	 * Interface plus those needed for our "extras".  So now we get to see
2648 	 * if the song is right ...
2649 	 */
2650 	want = s->max_ethqsets + MSIX_EXTRAS;
2651 	need = adapter->params.nports + MSIX_EXTRAS;
2652 
2653 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2654 	if (want < 0)
2655 		return want;
2656 
2657 	nqsets = want - MSIX_EXTRAS;
2658 	if (nqsets < s->max_ethqsets) {
2659 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2660 			 " for %d Queue Sets\n", nqsets);
2661 		s->max_ethqsets = nqsets;
2662 		if (nqsets < s->ethqsets)
2663 			reduce_ethqs(adapter, nqsets);
2664 	}
2665 	for (i = 0; i < want; ++i)
2666 		adapter->msix_info[i].vec = entries[i].vector;
2667 
2668 	return 0;
2669 }
2670 
2671 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2672 	.ndo_open		= cxgb4vf_open,
2673 	.ndo_stop		= cxgb4vf_stop,
2674 	.ndo_start_xmit		= t4vf_eth_xmit,
2675 	.ndo_get_stats		= cxgb4vf_get_stats,
2676 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2677 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2678 	.ndo_validate_addr	= eth_validate_addr,
2679 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2680 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2681 	.ndo_fix_features	= cxgb4vf_fix_features,
2682 	.ndo_set_features	= cxgb4vf_set_features,
2683 #ifdef CONFIG_NET_POLL_CONTROLLER
2684 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2685 #endif
2686 };
2687 
2688 /*
2689  * "Probe" a device: initialize a device and construct all kernel and driver
2690  * state needed to manage the device.  This routine is called "init_one" in
2691  * the PF Driver ...
2692  */
2693 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2694 			     const struct pci_device_id *ent)
2695 {
2696 	int pci_using_dac;
2697 	int err, pidx;
2698 	unsigned int pmask;
2699 	struct adapter *adapter;
2700 	struct port_info *pi;
2701 	struct net_device *netdev;
2702 
2703 	/*
2704 	 * Print our driver banner the first time we're called to initialize a
2705 	 * device.
2706 	 */
2707 	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2708 
2709 	/*
2710 	 * Initialize generic PCI device state.
2711 	 */
2712 	err = pci_enable_device(pdev);
2713 	if (err) {
2714 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2715 		return err;
2716 	}
2717 
2718 	/*
2719 	 * Reserve PCI resources for the device.  If we can't get them some
2720 	 * other driver may have already claimed the device ...
2721 	 */
2722 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2723 	if (err) {
2724 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2725 		goto err_disable_device;
2726 	}
2727 
2728 	/*
2729 	 * Set up our DMA mask: try for 64-bit address masking first and
2730 	 * fall back to 32-bit if we can't get 64 bits ...
2731 	 */
2732 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2733 	if (err == 0) {
2734 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2735 		if (err) {
2736 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2737 				" coherent allocations\n");
2738 			goto err_release_regions;
2739 		}
2740 		pci_using_dac = 1;
2741 	} else {
2742 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2743 		if (err != 0) {
2744 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2745 			goto err_release_regions;
2746 		}
2747 		pci_using_dac = 0;
2748 	}
2749 
2750 	/*
2751 	 * Enable bus mastering for the device ...
2752 	 */
2753 	pci_set_master(pdev);
2754 
2755 	/*
2756 	 * Allocate our adapter data structure and attach it to the device.
2757 	 */
2758 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2759 	if (!adapter) {
2760 		err = -ENOMEM;
2761 		goto err_release_regions;
2762 	}
2763 	pci_set_drvdata(pdev, adapter);
2764 	adapter->pdev = pdev;
2765 	adapter->pdev_dev = &pdev->dev;
2766 
2767 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2768 				    (sizeof(struct mbox_cmd) *
2769 				     T4VF_OS_LOG_MBOX_CMDS),
2770 				    GFP_KERNEL);
2771 	if (!adapter->mbox_log) {
2772 		err = -ENOMEM;
2773 		goto err_free_adapter;
2774 	}
2775 	adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2776 
2777 	/*
2778 	 * Initialize SMP data synchronization resources.
2779 	 */
2780 	spin_lock_init(&adapter->stats_lock);
2781 
2782 	/*
2783 	 * Map our I/O registers in BAR0.
2784 	 */
2785 	adapter->regs = pci_ioremap_bar(pdev, 0);
2786 	if (!adapter->regs) {
2787 		dev_err(&pdev->dev, "cannot map device registers\n");
2788 		err = -ENOMEM;
2789 		goto err_free_adapter;
2790 	}
2791 
2792 	/* Wait for the device to become ready before proceeding ...
2793 	 */
2794 	err = t4vf_prep_adapter(adapter);
2795 	if (err) {
2796 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2797 			" err=%d\n", err);
2798 		goto err_unmap_bar0;
2799 	}
2800 
2801 	/* For T5 and later we want to use the new BAR-based User Doorbells,
2802 	 * so we need to map BAR2 here ...
2803 	 */
2804 	if (!is_t4(adapter->params.chip)) {
2805 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2806 					   pci_resource_len(pdev, 2));
2807 		if (!adapter->bar2) {
2808 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2809 			err = -ENOMEM;
2810 			goto err_unmap_bar0;
2811 		}
2812 	}
2813 	/*
2814 	 * Initialize adapter level features.
2815 	 */
2816 	adapter->name = pci_name(pdev);
2817 	adapter->msg_enable = dflt_msg_enable;
2818 	err = adap_init0(adapter);
2819 	if (err)
2820 		goto err_unmap_bar;
2821 
2822 	/*
2823 	 * Allocate our "adapter ports" and stitch everything together.
2824 	 */
2825 	pmask = adapter->params.vfres.pmask;
2826 	for_each_port(adapter, pidx) {
2827 		int port_id, viid;
2828 
2829 		/*
2830 		 * We simplistically allocate our virtual interfaces
2831 		 * sequentially across the port numbers to which we have
2832 		 * access rights.  This should be configurable in some manner
2833 		 * ...
2834 		 */
2835 		if (pmask == 0)
2836 			break;
2837 		port_id = ffs(pmask) - 1;
2838 		pmask &= ~(1 << port_id);
2839 		viid = t4vf_alloc_vi(adapter, port_id);
2840 		if (viid < 0) {
2841 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2842 				" err=%d\n", port_id, viid);
2843 			err = viid;
2844 			goto err_free_dev;
2845 		}
2846 
2847 		/*
2848 		 * Allocate our network device and stitch things together.
2849 		 */
2850 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2851 					   MAX_PORT_QSETS);
2852 		if (netdev == NULL) {
2853 			t4vf_free_vi(adapter, viid);
2854 			err = -ENOMEM;
2855 			goto err_free_dev;
2856 		}
2857 		adapter->port[pidx] = netdev;
2858 		SET_NETDEV_DEV(netdev, &pdev->dev);
2859 		pi = netdev_priv(netdev);
2860 		pi->adapter = adapter;
2861 		pi->pidx = pidx;
2862 		pi->port_id = port_id;
2863 		pi->viid = viid;
2864 
2865 		/*
2866 		 * Initialize the starting state of our "port" and register
2867 		 * it.
2868 		 */
2869 		pi->xact_addr_filt = -1;
2870 		netif_carrier_off(netdev);
2871 		netdev->irq = pdev->irq;
2872 
2873 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2874 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2875 			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2876 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2877 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2878 			NETIF_F_HIGHDMA;
2879 		netdev->features = netdev->hw_features |
2880 				   NETIF_F_HW_VLAN_CTAG_TX;
2881 		if (pci_using_dac)
2882 			netdev->features |= NETIF_F_HIGHDMA;
2883 
2884 		netdev->priv_flags |= IFF_UNICAST_FLT;
2885 
2886 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2887 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2888 
2889 		/*
2890 		 * Initialize the hardware/software state for the port.
2891 		 */
2892 		err = t4vf_port_init(adapter, pidx);
2893 		if (err) {
2894 			dev_err(&pdev->dev, "cannot initialize port %d\n",
2895 				pidx);
2896 			goto err_free_dev;
2897 		}
2898 	}
2899 
2900 	/* See what interrupts we'll be using.  If we've been configured to
2901 	 * use MSI-X interrupts, try to enable them but fall back to using
2902 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2903 	 * get MSI interrupts we bail with the error.
2904 	 */
2905 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2906 		adapter->flags |= USING_MSIX;
2907 	else {
2908 		if (msi == MSI_MSIX) {
2909 			dev_info(adapter->pdev_dev,
2910 				 "Unable to use MSI-X Interrupts; falling "
2911 				 "back to MSI Interrupts\n");
2912 
2913 			/* We're going to need a Forwarded Interrupt Queue so
2914 			 * that may cut into how many Queue Sets we can
2915 			 * support.
2916 			 */
2917 			msi = MSI_MSI;
2918 			size_nports_qsets(adapter);
2919 		}
2920 		err = pci_enable_msi(pdev);
2921 		if (err) {
2922 			dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
2923 				" err=%d\n", err);
2924 			goto err_free_dev;
2925 		}
2926 		adapter->flags |= USING_MSI;
2927 	}
2928 
2929 	/* Now that we know how many "ports" we have and what interrupt
2930 	 * mechanism we're going to use, we can configure our queue resources.
2931 	 */
2932 	cfg_queues(adapter);
2933 
2934 	/*
2935 	 * The "card" is now ready to go.  If any errors occur during device
2936 	 * registration we do not fail the whole "card" but rather proceed
2937 	 * only with the ports we manage to register successfully.  However we
2938 	 * must register at least one net device.
2939 	 */
2940 	for_each_port(adapter, pidx) {
2941 		struct port_info *pi = netdev_priv(adapter->port[pidx]);
2942 		netdev = adapter->port[pidx];
2943 		if (netdev == NULL)
2944 			continue;
2945 
2946 		netif_set_real_num_tx_queues(netdev, pi->nqsets);
2947 		netif_set_real_num_rx_queues(netdev, pi->nqsets);
2948 
2949 		err = register_netdev(netdev);
2950 		if (err) {
2951 			dev_warn(&pdev->dev, "cannot register net device %s,"
2952 				 " skipping\n", netdev->name);
2953 			continue;
2954 		}
2955 
2956 		set_bit(pidx, &adapter->registered_device_map);
2957 	}
2958 	if (adapter->registered_device_map == 0) {
2959 		dev_err(&pdev->dev, "could not register any net devices\n");
2960 		goto err_disable_interrupts;
2961 	}
2962 
2963 	/*
2964 	 * Set up our debugfs entries.
2965 	 */
2966 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2967 		adapter->debugfs_root =
2968 			debugfs_create_dir(pci_name(pdev),
2969 					   cxgb4vf_debugfs_root);
2970 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2971 			dev_warn(&pdev->dev, "could not create debugfs"
2972 				 " directory");
2973 		else
2974 			setup_debugfs(adapter);
2975 	}
2976 
2977 	/*
2978 	 * Print a short notice on the existence and configuration of the new
2979 	 * VF network device ...
2980 	 */
2981 	for_each_port(adapter, pidx) {
2982 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2983 			 adapter->port[pidx]->name,
2984 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2985 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2986 	}
2987 
2988 	/*
2989 	 * Return success!
2990 	 */
2991 	return 0;
2992 
2993 	/*
2994 	 * Error recovery and exit code.  Unwind state that's been created
2995 	 * so far and return the error.
2996 	 */
2997 err_disable_interrupts:
2998 	if (adapter->flags & USING_MSIX) {
2999 		pci_disable_msix(adapter->pdev);
3000 		adapter->flags &= ~USING_MSIX;
3001 	} else if (adapter->flags & USING_MSI) {
3002 		pci_disable_msi(adapter->pdev);
3003 		adapter->flags &= ~USING_MSI;
3004 	}
3005 
3006 err_free_dev:
3007 	for_each_port(adapter, pidx) {
3008 		netdev = adapter->port[pidx];
3009 		if (netdev == NULL)
3010 			continue;
3011 		pi = netdev_priv(netdev);
3012 		t4vf_free_vi(adapter, pi->viid);
3013 		if (test_bit(pidx, &adapter->registered_device_map))
3014 			unregister_netdev(netdev);
3015 		free_netdev(netdev);
3016 	}
3017 
3018 err_unmap_bar:
3019 	if (!is_t4(adapter->params.chip))
3020 		iounmap(adapter->bar2);
3021 
3022 err_unmap_bar0:
3023 	iounmap(adapter->regs);
3024 
3025 err_free_adapter:
3026 	kfree(adapter->mbox_log);
3027 	kfree(adapter);
3028 
3029 err_release_regions:
3030 	pci_release_regions(pdev);
3031 	pci_clear_master(pdev);
3032 
3033 err_disable_device:
3034 	pci_disable_device(pdev);
3035 
3036 	return err;
3037 }
3038 
3039 /*
3040  * "Remove" a device: tear down all kernel and driver state created in the
3041  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3042  * that this is called "remove_one" in the PF Driver.)
3043  */
3044 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3045 {
3046 	struct adapter *adapter = pci_get_drvdata(pdev);
3047 
3048 	/*
3049 	 * Tear down driver state associated with device.
3050 	 */
3051 	if (adapter) {
3052 		int pidx;
3053 
3054 		/*
3055 		 * Stop all of our activity.  Unregister network port,
3056 		 * disable interrupts, etc.
3057 		 */
3058 		for_each_port(adapter, pidx)
3059 			if (test_bit(pidx, &adapter->registered_device_map))
3060 				unregister_netdev(adapter->port[pidx]);
3061 		t4vf_sge_stop(adapter);
3062 		if (adapter->flags & USING_MSIX) {
3063 			pci_disable_msix(adapter->pdev);
3064 			adapter->flags &= ~USING_MSIX;
3065 		} else if (adapter->flags & USING_MSI) {
3066 			pci_disable_msi(adapter->pdev);
3067 			adapter->flags &= ~USING_MSI;
3068 		}
3069 
3070 		/*
3071 		 * Tear down our debugfs entries.
3072 		 */
3073 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3074 			cleanup_debugfs(adapter);
3075 			debugfs_remove_recursive(adapter->debugfs_root);
3076 		}
3077 
3078 		/*
3079 		 * Free all of the various resources which we've acquired ...
3080 		 */
3081 		t4vf_free_sge_resources(adapter);
3082 		for_each_port(adapter, pidx) {
3083 			struct net_device *netdev = adapter->port[pidx];
3084 			struct port_info *pi;
3085 
3086 			if (netdev == NULL)
3087 				continue;
3088 
3089 			pi = netdev_priv(netdev);
3090 			t4vf_free_vi(adapter, pi->viid);
3091 			free_netdev(netdev);
3092 		}
3093 		iounmap(adapter->regs);
3094 		if (!is_t4(adapter->params.chip))
3095 			iounmap(adapter->bar2);
3096 		kfree(adapter->mbox_log);
3097 		kfree(adapter);
3098 	}
3099 
3100 	/*
3101 	 * Disable the device and release its PCI resources.
3102 	 */
3103 	pci_disable_device(pdev);
3104 	pci_clear_master(pdev);
3105 	pci_release_regions(pdev);
3106 }
3107 
3108 /*
3109  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3110  * delivery.
3111  */
3112 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3113 {
3114 	struct adapter *adapter;
3115 	int pidx;
3116 
3117 	adapter = pci_get_drvdata(pdev);
3118 	if (!adapter)
3119 		return;
3120 
3121 	/* Disable all Virtual Interfaces.  This will shut down the
3122 	 * delivery of all ingress packets into the chip for these
3123 	 * Virtual Interfaces.
3124 	 */
3125 	for_each_port(adapter, pidx)
3126 		if (test_bit(pidx, &adapter->registered_device_map))
3127 			unregister_netdev(adapter->port[pidx]);
3128 
3129 	/* Free up all Queues which will prevent further DMA and
3130 	 * Interrupts allowing various internal pathways to drain.
3131 	 */
3132 	t4vf_sge_stop(adapter);
3133 	if (adapter->flags & USING_MSIX) {
3134 		pci_disable_msix(adapter->pdev);
3135 		adapter->flags &= ~USING_MSIX;
3136 	} else if (adapter->flags & USING_MSI) {
3137 		pci_disable_msi(adapter->pdev);
3138 		adapter->flags &= ~USING_MSI;
3139 	}
3140 
3141 	/*
3142 	 * Free up all Queues which will prevent further DMA and
3143 	 * Interrupts allowing various internal pathways to drain.
3144 	 */
3145 	t4vf_free_sge_resources(adapter);
3146 	pci_set_drvdata(pdev, NULL);
3147 }
3148 
3149 /* Macros needed to support the PCI Device ID Table ...
3150  */
3151 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3152 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3153 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3154 
3155 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3156 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3157 
3158 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3159 
3160 #include "../cxgb4/t4_pci_id_tbl.h"
3161 
3162 MODULE_DESCRIPTION(DRV_DESC);
3163 MODULE_AUTHOR("Chelsio Communications");
3164 MODULE_LICENSE("Dual BSD/GPL");
3165 MODULE_VERSION(DRV_VERSION);
3166 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3167 
3168 static struct pci_driver cxgb4vf_driver = {
3169 	.name		= KBUILD_MODNAME,
3170 	.id_table	= cxgb4vf_pci_tbl,
3171 	.probe		= cxgb4vf_pci_probe,
3172 	.remove		= cxgb4vf_pci_remove,
3173 	.shutdown	= cxgb4vf_pci_shutdown,
3174 };
3175 
3176 /*
3177  * Initialize global driver state.
3178  */
3179 static int __init cxgb4vf_module_init(void)
3180 {
3181 	int ret;
3182 
3183 	/*
3184 	 * Vet our module parameters.
3185 	 */
3186 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3187 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3188 			msi, MSI_MSIX, MSI_MSI);
3189 		return -EINVAL;
3190 	}
3191 
3192 	/* Debugfs support is optional, just warn if this fails */
3193 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3194 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3195 		pr_warn("could not create debugfs entry, continuing\n");
3196 
3197 	ret = pci_register_driver(&cxgb4vf_driver);
3198 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3199 		debugfs_remove(cxgb4vf_debugfs_root);
3200 	return ret;
3201 }
3202 
3203 /*
3204  * Tear down global driver state.
3205  */
3206 static void __exit cxgb4vf_module_exit(void)
3207 {
3208 	pci_unregister_driver(&cxgb4vf_driver);
3209 	debugfs_remove(cxgb4vf_debugfs_root);
3210 }
3211 
3212 module_init(cxgb4vf_module_init);
3213 module_exit(cxgb4vf_module_exit);
3214