xref: /linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_VERSION "2.0.0-ko"
59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
60 
61 /*
62  * Module Parameters.
63  * ==================
64  */
65 
66 /*
67  * Default ethtool "message level" for adapters.
68  */
69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72 
73 static int dflt_msg_enable = DFLT_MSG_ENABLE;
74 
75 module_param(dflt_msg_enable, int, 0644);
76 MODULE_PARM_DESC(dflt_msg_enable,
77 		 "default adapter ethtool message level bitmap, "
78 		 "deprecated parameter");
79 
80 /*
81  * The driver uses the best interrupt scheme available on a platform in the
82  * order MSI-X then MSI.  This parameter determines which of these schemes the
83  * driver may consider as follows:
84  *
85  *     msi = 2: choose from among MSI-X and MSI
86  *     msi = 1: only consider MSI interrupts
87  *
88  * Note that unlike the Physical Function driver, this Virtual Function driver
89  * does _not_ support legacy INTx interrupts (this limitation is mandated by
90  * the PCI-E SR-IOV standard).
91  */
92 #define MSI_MSIX	2
93 #define MSI_MSI		1
94 #define MSI_DEFAULT	MSI_MSIX
95 
96 static int msi = MSI_DEFAULT;
97 
98 module_param(msi, int, 0644);
99 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
100 
101 /*
102  * Fundamental constants.
103  * ======================
104  */
105 
106 enum {
107 	MAX_TXQ_ENTRIES		= 16384,
108 	MAX_RSPQ_ENTRIES	= 16384,
109 	MAX_RX_BUFFERS		= 16384,
110 
111 	MIN_TXQ_ENTRIES		= 32,
112 	MIN_RSPQ_ENTRIES	= 128,
113 	MIN_FL_ENTRIES		= 16,
114 
115 	/*
116 	 * For purposes of manipulating the Free List size we need to
117 	 * recognize that Free Lists are actually Egress Queues (the host
118 	 * produces free buffers which the hardware consumes), Egress Queues
119 	 * indices are all in units of Egress Context Units bytes, and free
120 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
121 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
122 	 * always have at least one Egress Unit's worth of Free List entries
123 	 * unused.  See sge.c for more details ...
124 	 */
125 	EQ_UNIT = SGE_EQ_IDXSIZE,
126 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
127 	MIN_FL_RESID = FL_PER_EQ_UNIT,
128 };
129 
130 /*
131  * Global driver state.
132  * ====================
133  */
134 
135 static struct dentry *cxgb4vf_debugfs_root;
136 
137 /*
138  * OS "Callback" functions.
139  * ========================
140  */
141 
142 /*
143  * The link status has changed on the indicated "port" (Virtual Interface).
144  */
145 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
146 {
147 	struct net_device *dev = adapter->port[pidx];
148 
149 	/*
150 	 * If the port is disabled or the current recorded "link up"
151 	 * status matches the new status, just return.
152 	 */
153 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
154 		return;
155 
156 	/*
157 	 * Tell the OS that the link status has changed and print a short
158 	 * informative message on the console about the event.
159 	 */
160 	if (link_ok) {
161 		const char *s;
162 		const char *fc;
163 		const struct port_info *pi = netdev_priv(dev);
164 
165 		netif_carrier_on(dev);
166 
167 		switch (pi->link_cfg.speed) {
168 		case 40000:
169 			s = "40Gbps";
170 			break;
171 
172 		case 10000:
173 			s = "10Gbps";
174 			break;
175 
176 		case 1000:
177 			s = "1000Mbps";
178 			break;
179 
180 		case 100:
181 			s = "100Mbps";
182 			break;
183 
184 		default:
185 			s = "unknown";
186 			break;
187 		}
188 
189 		switch (pi->link_cfg.fc) {
190 		case PAUSE_RX:
191 			fc = "RX";
192 			break;
193 
194 		case PAUSE_TX:
195 			fc = "TX";
196 			break;
197 
198 		case PAUSE_RX|PAUSE_TX:
199 			fc = "RX/TX";
200 			break;
201 
202 		default:
203 			fc = "no";
204 			break;
205 		}
206 
207 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
208 	} else {
209 		netif_carrier_off(dev);
210 		netdev_info(dev, "link down\n");
211 	}
212 }
213 
214 /*
215  * THe port module type has changed on the indicated "port" (Virtual
216  * Interface).
217  */
218 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
219 {
220 	static const char * const mod_str[] = {
221 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
222 	};
223 	const struct net_device *dev = adapter->port[pidx];
224 	const struct port_info *pi = netdev_priv(dev);
225 
226 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
227 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
228 			 dev->name);
229 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
230 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
231 			 dev->name, mod_str[pi->mod_type]);
232 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
233 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
234 			 "module inserted\n", dev->name);
235 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
236 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
237 			 "forcing TWINAX\n", dev->name);
238 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
239 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
240 			 dev->name);
241 	else
242 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
243 			 "inserted\n", dev->name, pi->mod_type);
244 }
245 
246 /*
247  * Net device operations.
248  * ======================
249  */
250 
251 
252 
253 
254 /*
255  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
256  * Interface).
257  */
258 static int link_start(struct net_device *dev)
259 {
260 	int ret;
261 	struct port_info *pi = netdev_priv(dev);
262 
263 	/*
264 	 * We do not set address filters and promiscuity here, the stack does
265 	 * that step explicitly. Enable vlan accel.
266 	 */
267 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
268 			      true);
269 	if (ret == 0) {
270 		ret = t4vf_change_mac(pi->adapter, pi->viid,
271 				      pi->xact_addr_filt, dev->dev_addr, true);
272 		if (ret >= 0) {
273 			pi->xact_addr_filt = ret;
274 			ret = 0;
275 		}
276 	}
277 
278 	/*
279 	 * We don't need to actually "start the link" itself since the
280 	 * firmware will do that for us when the first Virtual Interface
281 	 * is enabled on a port.
282 	 */
283 	if (ret == 0)
284 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
285 	return ret;
286 }
287 
288 /*
289  * Name the MSI-X interrupts.
290  */
291 static void name_msix_vecs(struct adapter *adapter)
292 {
293 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
294 	int pidx;
295 
296 	/*
297 	 * Firmware events.
298 	 */
299 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
300 		 "%s-FWeventq", adapter->name);
301 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
302 
303 	/*
304 	 * Ethernet queues.
305 	 */
306 	for_each_port(adapter, pidx) {
307 		struct net_device *dev = adapter->port[pidx];
308 		const struct port_info *pi = netdev_priv(dev);
309 		int qs, msi;
310 
311 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
312 			snprintf(adapter->msix_info[msi].desc, namelen,
313 				 "%s-%d", dev->name, qs);
314 			adapter->msix_info[msi].desc[namelen] = 0;
315 		}
316 	}
317 }
318 
319 /*
320  * Request all of our MSI-X resources.
321  */
322 static int request_msix_queue_irqs(struct adapter *adapter)
323 {
324 	struct sge *s = &adapter->sge;
325 	int rxq, msi, err;
326 
327 	/*
328 	 * Firmware events.
329 	 */
330 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
331 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
332 	if (err)
333 		return err;
334 
335 	/*
336 	 * Ethernet queues.
337 	 */
338 	msi = MSIX_IQFLINT;
339 	for_each_ethrxq(s, rxq) {
340 		err = request_irq(adapter->msix_info[msi].vec,
341 				  t4vf_sge_intr_msix, 0,
342 				  adapter->msix_info[msi].desc,
343 				  &s->ethrxq[rxq].rspq);
344 		if (err)
345 			goto err_free_irqs;
346 		msi++;
347 	}
348 	return 0;
349 
350 err_free_irqs:
351 	while (--rxq >= 0)
352 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
353 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
354 	return err;
355 }
356 
357 /*
358  * Free our MSI-X resources.
359  */
360 static void free_msix_queue_irqs(struct adapter *adapter)
361 {
362 	struct sge *s = &adapter->sge;
363 	int rxq, msi;
364 
365 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
366 	msi = MSIX_IQFLINT;
367 	for_each_ethrxq(s, rxq)
368 		free_irq(adapter->msix_info[msi++].vec,
369 			 &s->ethrxq[rxq].rspq);
370 }
371 
372 /*
373  * Turn on NAPI and start up interrupts on a response queue.
374  */
375 static void qenable(struct sge_rspq *rspq)
376 {
377 	napi_enable(&rspq->napi);
378 
379 	/*
380 	 * 0-increment the Going To Sleep register to start the timer and
381 	 * enable interrupts.
382 	 */
383 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
384 		     CIDXINC_V(0) |
385 		     SEINTARM_V(rspq->intr_params) |
386 		     INGRESSQID_V(rspq->cntxt_id));
387 }
388 
389 /*
390  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
391  */
392 static void enable_rx(struct adapter *adapter)
393 {
394 	int rxq;
395 	struct sge *s = &adapter->sge;
396 
397 	for_each_ethrxq(s, rxq)
398 		qenable(&s->ethrxq[rxq].rspq);
399 	qenable(&s->fw_evtq);
400 
401 	/*
402 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
403 	 * its Going To Sleep register here to get it started.
404 	 */
405 	if (adapter->flags & USING_MSI)
406 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
407 			     CIDXINC_V(0) |
408 			     SEINTARM_V(s->intrq.intr_params) |
409 			     INGRESSQID_V(s->intrq.cntxt_id));
410 
411 }
412 
413 /*
414  * Wait until all NAPI handlers are descheduled.
415  */
416 static void quiesce_rx(struct adapter *adapter)
417 {
418 	struct sge *s = &adapter->sge;
419 	int rxq;
420 
421 	for_each_ethrxq(s, rxq)
422 		napi_disable(&s->ethrxq[rxq].rspq.napi);
423 	napi_disable(&s->fw_evtq.napi);
424 }
425 
426 /*
427  * Response queue handler for the firmware event queue.
428  */
429 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
430 			  const struct pkt_gl *gl)
431 {
432 	/*
433 	 * Extract response opcode and get pointer to CPL message body.
434 	 */
435 	struct adapter *adapter = rspq->adapter;
436 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
437 	void *cpl = (void *)(rsp + 1);
438 
439 	switch (opcode) {
440 	case CPL_FW6_MSG: {
441 		/*
442 		 * We've received an asynchronous message from the firmware.
443 		 */
444 		const struct cpl_fw6_msg *fw_msg = cpl;
445 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
446 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
447 		break;
448 	}
449 
450 	case CPL_FW4_MSG: {
451 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
452 		 */
453 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
454 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
455 		if (opcode != CPL_SGE_EGR_UPDATE) {
456 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
457 				, opcode);
458 			break;
459 		}
460 		cpl = (void *)p;
461 		/*FALLTHROUGH*/
462 	}
463 
464 	case CPL_SGE_EGR_UPDATE: {
465 		/*
466 		 * We've received an Egress Queue Status Update message.  We
467 		 * get these, if the SGE is configured to send these when the
468 		 * firmware passes certain points in processing our TX
469 		 * Ethernet Queue or if we make an explicit request for one.
470 		 * We use these updates to determine when we may need to
471 		 * restart a TX Ethernet Queue which was stopped for lack of
472 		 * free TX Queue Descriptors ...
473 		 */
474 		const struct cpl_sge_egr_update *p = cpl;
475 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
476 		struct sge *s = &adapter->sge;
477 		struct sge_txq *tq;
478 		struct sge_eth_txq *txq;
479 		unsigned int eq_idx;
480 
481 		/*
482 		 * Perform sanity checking on the Queue ID to make sure it
483 		 * really refers to one of our TX Ethernet Egress Queues which
484 		 * is active and matches the queue's ID.  None of these error
485 		 * conditions should ever happen so we may want to either make
486 		 * them fatal and/or conditionalized under DEBUG.
487 		 */
488 		eq_idx = EQ_IDX(s, qid);
489 		if (unlikely(eq_idx >= MAX_EGRQ)) {
490 			dev_err(adapter->pdev_dev,
491 				"Egress Update QID %d out of range\n", qid);
492 			break;
493 		}
494 		tq = s->egr_map[eq_idx];
495 		if (unlikely(tq == NULL)) {
496 			dev_err(adapter->pdev_dev,
497 				"Egress Update QID %d TXQ=NULL\n", qid);
498 			break;
499 		}
500 		txq = container_of(tq, struct sge_eth_txq, q);
501 		if (unlikely(tq->abs_id != qid)) {
502 			dev_err(adapter->pdev_dev,
503 				"Egress Update QID %d refers to TXQ %d\n",
504 				qid, tq->abs_id);
505 			break;
506 		}
507 
508 		/*
509 		 * Restart a stopped TX Queue which has less than half of its
510 		 * TX ring in use ...
511 		 */
512 		txq->q.restarts++;
513 		netif_tx_wake_queue(txq->txq);
514 		break;
515 	}
516 
517 	default:
518 		dev_err(adapter->pdev_dev,
519 			"unexpected CPL %#x on FW event queue\n", opcode);
520 	}
521 
522 	return 0;
523 }
524 
525 /*
526  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
527  * to use and initializes them.  We support multiple "Queue Sets" per port if
528  * we have MSI-X, otherwise just one queue set per port.
529  */
530 static int setup_sge_queues(struct adapter *adapter)
531 {
532 	struct sge *s = &adapter->sge;
533 	int err, pidx, msix;
534 
535 	/*
536 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
537 	 * state.
538 	 */
539 	bitmap_zero(s->starving_fl, MAX_EGRQ);
540 
541 	/*
542 	 * If we're using MSI interrupt mode we need to set up a "forwarded
543 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
544 	 * of the ingress queues will be set up to forward their interrupts to
545 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
546 	 * the intrq's queue ID as the interrupt forwarding queue for the
547 	 * subsequent calls ...
548 	 */
549 	if (adapter->flags & USING_MSI) {
550 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
551 					 adapter->port[0], 0, NULL, NULL);
552 		if (err)
553 			goto err_free_queues;
554 	}
555 
556 	/*
557 	 * Allocate our ingress queue for asynchronous firmware messages.
558 	 */
559 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
560 				 MSIX_FW, NULL, fwevtq_handler);
561 	if (err)
562 		goto err_free_queues;
563 
564 	/*
565 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
566 	 * later on ... up to the point where any interface on the adapter is
567 	 * brought up at which point lots of things get nailed down
568 	 * permanently ...
569 	 */
570 	msix = MSIX_IQFLINT;
571 	for_each_port(adapter, pidx) {
572 		struct net_device *dev = adapter->port[pidx];
573 		struct port_info *pi = netdev_priv(dev);
574 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
575 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
576 		int qs;
577 
578 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
579 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
580 						 dev, msix++,
581 						 &rxq->fl, t4vf_ethrx_handler);
582 			if (err)
583 				goto err_free_queues;
584 
585 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
586 					     netdev_get_tx_queue(dev, qs),
587 					     s->fw_evtq.cntxt_id);
588 			if (err)
589 				goto err_free_queues;
590 
591 			rxq->rspq.idx = qs;
592 			memset(&rxq->stats, 0, sizeof(rxq->stats));
593 		}
594 	}
595 
596 	/*
597 	 * Create the reverse mappings for the queues.
598 	 */
599 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
600 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
601 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
602 	for_each_port(adapter, pidx) {
603 		struct net_device *dev = adapter->port[pidx];
604 		struct port_info *pi = netdev_priv(dev);
605 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
606 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
607 		int qs;
608 
609 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
610 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
611 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
612 
613 			/*
614 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
615 			 * for Free Lists but since all of the Egress Queues
616 			 * (including Free Lists) have Relative Queue IDs
617 			 * which are computed as Absolute - Base Queue ID, we
618 			 * can synthesize the Absolute Queue IDs for the Free
619 			 * Lists.  This is useful for debugging purposes when
620 			 * we want to dump Queue Contexts via the PF Driver.
621 			 */
622 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
623 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
624 		}
625 	}
626 	return 0;
627 
628 err_free_queues:
629 	t4vf_free_sge_resources(adapter);
630 	return err;
631 }
632 
633 /*
634  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
635  * queues.  We configure the RSS CPU lookup table to distribute to the number
636  * of HW receive queues, and the response queue lookup table to narrow that
637  * down to the response queues actually configured for each "port" (Virtual
638  * Interface).  We always configure the RSS mapping for all ports since the
639  * mapping table has plenty of entries.
640  */
641 static int setup_rss(struct adapter *adapter)
642 {
643 	int pidx;
644 
645 	for_each_port(adapter, pidx) {
646 		struct port_info *pi = adap2pinfo(adapter, pidx);
647 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
648 		u16 rss[MAX_PORT_QSETS];
649 		int qs, err;
650 
651 		for (qs = 0; qs < pi->nqsets; qs++)
652 			rss[qs] = rxq[qs].rspq.abs_id;
653 
654 		err = t4vf_config_rss_range(adapter, pi->viid,
655 					    0, pi->rss_size, rss, pi->nqsets);
656 		if (err)
657 			return err;
658 
659 		/*
660 		 * Perform Global RSS Mode-specific initialization.
661 		 */
662 		switch (adapter->params.rss.mode) {
663 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
664 			/*
665 			 * If Tunnel All Lookup isn't specified in the global
666 			 * RSS Configuration, then we need to specify a
667 			 * default Ingress Queue for any ingress packets which
668 			 * aren't hashed.  We'll use our first ingress queue
669 			 * ...
670 			 */
671 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
672 				union rss_vi_config config;
673 				err = t4vf_read_rss_vi_config(adapter,
674 							      pi->viid,
675 							      &config);
676 				if (err)
677 					return err;
678 				config.basicvirtual.defaultq =
679 					rxq[0].rspq.abs_id;
680 				err = t4vf_write_rss_vi_config(adapter,
681 							       pi->viid,
682 							       &config);
683 				if (err)
684 					return err;
685 			}
686 			break;
687 		}
688 	}
689 
690 	return 0;
691 }
692 
693 /*
694  * Bring the adapter up.  Called whenever we go from no "ports" open to having
695  * one open.  This function performs the actions necessary to make an adapter
696  * operational, such as completing the initialization of HW modules, and
697  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
698  * this is called "cxgb_up" in the PF Driver.)
699  */
700 static int adapter_up(struct adapter *adapter)
701 {
702 	int err;
703 
704 	/*
705 	 * If this is the first time we've been called, perform basic
706 	 * adapter setup.  Once we've done this, many of our adapter
707 	 * parameters can no longer be changed ...
708 	 */
709 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
710 		err = setup_sge_queues(adapter);
711 		if (err)
712 			return err;
713 		err = setup_rss(adapter);
714 		if (err) {
715 			t4vf_free_sge_resources(adapter);
716 			return err;
717 		}
718 
719 		if (adapter->flags & USING_MSIX)
720 			name_msix_vecs(adapter);
721 		adapter->flags |= FULL_INIT_DONE;
722 	}
723 
724 	/*
725 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
726 	 */
727 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
728 	if (adapter->flags & USING_MSIX)
729 		err = request_msix_queue_irqs(adapter);
730 	else
731 		err = request_irq(adapter->pdev->irq,
732 				  t4vf_intr_handler(adapter), 0,
733 				  adapter->name, adapter);
734 	if (err) {
735 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
736 			err);
737 		return err;
738 	}
739 
740 	/*
741 	 * Enable NAPI ingress processing and return success.
742 	 */
743 	enable_rx(adapter);
744 	t4vf_sge_start(adapter);
745 
746 	/* Initialize hash mac addr list*/
747 	INIT_LIST_HEAD(&adapter->mac_hlist);
748 	return 0;
749 }
750 
751 /*
752  * Bring the adapter down.  Called whenever the last "port" (Virtual
753  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
754  * Driver.)
755  */
756 static void adapter_down(struct adapter *adapter)
757 {
758 	/*
759 	 * Free interrupt resources.
760 	 */
761 	if (adapter->flags & USING_MSIX)
762 		free_msix_queue_irqs(adapter);
763 	else
764 		free_irq(adapter->pdev->irq, adapter);
765 
766 	/*
767 	 * Wait for NAPI handlers to finish.
768 	 */
769 	quiesce_rx(adapter);
770 }
771 
772 /*
773  * Start up a net device.
774  */
775 static int cxgb4vf_open(struct net_device *dev)
776 {
777 	int err;
778 	struct port_info *pi = netdev_priv(dev);
779 	struct adapter *adapter = pi->adapter;
780 
781 	/*
782 	 * If this is the first interface that we're opening on the "adapter",
783 	 * bring the "adapter" up now.
784 	 */
785 	if (adapter->open_device_map == 0) {
786 		err = adapter_up(adapter);
787 		if (err)
788 			return err;
789 	}
790 
791 	/*
792 	 * Note that this interface is up and start everything up ...
793 	 */
794 	err = link_start(dev);
795 	if (err)
796 		goto err_unwind;
797 
798 	netif_tx_start_all_queues(dev);
799 	set_bit(pi->port_id, &adapter->open_device_map);
800 	return 0;
801 
802 err_unwind:
803 	if (adapter->open_device_map == 0)
804 		adapter_down(adapter);
805 	return err;
806 }
807 
808 /*
809  * Shut down a net device.  This routine is called "cxgb_close" in the PF
810  * Driver ...
811  */
812 static int cxgb4vf_stop(struct net_device *dev)
813 {
814 	struct port_info *pi = netdev_priv(dev);
815 	struct adapter *adapter = pi->adapter;
816 
817 	netif_tx_stop_all_queues(dev);
818 	netif_carrier_off(dev);
819 	t4vf_enable_vi(adapter, pi->viid, false, false);
820 	pi->link_cfg.link_ok = 0;
821 
822 	clear_bit(pi->port_id, &adapter->open_device_map);
823 	if (adapter->open_device_map == 0)
824 		adapter_down(adapter);
825 	return 0;
826 }
827 
828 /*
829  * Translate our basic statistics into the standard "ifconfig" statistics.
830  */
831 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832 {
833 	struct t4vf_port_stats stats;
834 	struct port_info *pi = netdev2pinfo(dev);
835 	struct adapter *adapter = pi->adapter;
836 	struct net_device_stats *ns = &dev->stats;
837 	int err;
838 
839 	spin_lock(&adapter->stats_lock);
840 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841 	spin_unlock(&adapter->stats_lock);
842 
843 	memset(ns, 0, sizeof(*ns));
844 	if (err)
845 		return ns;
846 
847 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850 			  stats.tx_ucast_frames + stats.tx_offload_frames);
851 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852 			stats.rx_ucast_bytes);
853 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854 			  stats.rx_ucast_frames);
855 	ns->multicast = stats.rx_mcast_frames;
856 	ns->tx_errors = stats.tx_drop_frames;
857 	ns->rx_errors = stats.rx_err_frames;
858 
859 	return ns;
860 }
861 
862 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
863 {
864 	struct adapter *adapter = pi->adapter;
865 	u64 vec = 0;
866 	bool ucast = false;
867 	struct hash_mac_addr *entry;
868 
869 	/* Calculate the hash vector for the updated list and program it */
870 	list_for_each_entry(entry, &adapter->mac_hlist, list) {
871 		ucast |= is_unicast_ether_addr(entry->addr);
872 		vec |= (1ULL << hash_mac_addr(entry->addr));
873 	}
874 	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
875 }
876 
877 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
878 {
879 	struct port_info *pi = netdev_priv(netdev);
880 	struct adapter *adapter = pi->adapter;
881 	int ret;
882 	u64 mhash = 0;
883 	u64 uhash = 0;
884 	bool free = false;
885 	bool ucast = is_unicast_ether_addr(mac_addr);
886 	const u8 *maclist[1] = {mac_addr};
887 	struct hash_mac_addr *new_entry;
888 
889 	ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
890 				  NULL, ucast ? &uhash : &mhash, false);
891 	if (ret < 0)
892 		goto out;
893 	/* if hash != 0, then add the addr to hash addr list
894 	 * so on the end we will calculate the hash for the
895 	 * list and program it
896 	 */
897 	if (uhash || mhash) {
898 		new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
899 		if (!new_entry)
900 			return -ENOMEM;
901 		ether_addr_copy(new_entry->addr, mac_addr);
902 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
903 		ret = cxgb4vf_set_addr_hash(pi);
904 	}
905 out:
906 	return ret < 0 ? ret : 0;
907 }
908 
909 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
910 {
911 	struct port_info *pi = netdev_priv(netdev);
912 	struct adapter *adapter = pi->adapter;
913 	int ret;
914 	const u8 *maclist[1] = {mac_addr};
915 	struct hash_mac_addr *entry, *tmp;
916 
917 	/* If the MAC address to be removed is in the hash addr
918 	 * list, delete it from the list and update hash vector
919 	 */
920 	list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
921 		if (ether_addr_equal(entry->addr, mac_addr)) {
922 			list_del(&entry->list);
923 			kfree(entry);
924 			return cxgb4vf_set_addr_hash(pi);
925 		}
926 	}
927 
928 	ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
929 	return ret < 0 ? -EINVAL : 0;
930 }
931 
932 /*
933  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
934  * If @mtu is -1 it is left unchanged.
935  */
936 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
937 {
938 	struct port_info *pi = netdev_priv(dev);
939 
940 	__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
941 	__dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
942 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
943 			       (dev->flags & IFF_PROMISC) != 0,
944 			       (dev->flags & IFF_ALLMULTI) != 0,
945 			       1, -1, sleep_ok);
946 }
947 
948 /*
949  * Set the current receive modes on the device.
950  */
951 static void cxgb4vf_set_rxmode(struct net_device *dev)
952 {
953 	/* unfortunately we can't return errors to the stack */
954 	set_rxmode(dev, -1, false);
955 }
956 
957 /*
958  * Find the entry in the interrupt holdoff timer value array which comes
959  * closest to the specified interrupt holdoff value.
960  */
961 static int closest_timer(const struct sge *s, int us)
962 {
963 	int i, timer_idx = 0, min_delta = INT_MAX;
964 
965 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
966 		int delta = us - s->timer_val[i];
967 		if (delta < 0)
968 			delta = -delta;
969 		if (delta < min_delta) {
970 			min_delta = delta;
971 			timer_idx = i;
972 		}
973 	}
974 	return timer_idx;
975 }
976 
977 static int closest_thres(const struct sge *s, int thres)
978 {
979 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
980 
981 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
982 		delta = thres - s->counter_val[i];
983 		if (delta < 0)
984 			delta = -delta;
985 		if (delta < min_delta) {
986 			min_delta = delta;
987 			pktcnt_idx = i;
988 		}
989 	}
990 	return pktcnt_idx;
991 }
992 
993 /*
994  * Return a queue's interrupt hold-off time in us.  0 means no timer.
995  */
996 static unsigned int qtimer_val(const struct adapter *adapter,
997 			       const struct sge_rspq *rspq)
998 {
999 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1000 
1001 	return timer_idx < SGE_NTIMERS
1002 		? adapter->sge.timer_val[timer_idx]
1003 		: 0;
1004 }
1005 
1006 /**
1007  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1008  *	@adapter: the adapter
1009  *	@rspq: the RX response queue
1010  *	@us: the hold-off time in us, or 0 to disable timer
1011  *	@cnt: the hold-off packet count, or 0 to disable counter
1012  *
1013  *	Sets an RX response queue's interrupt hold-off time and packet count.
1014  *	At least one of the two needs to be enabled for the queue to generate
1015  *	interrupts.
1016  */
1017 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1018 			       unsigned int us, unsigned int cnt)
1019 {
1020 	unsigned int timer_idx;
1021 
1022 	/*
1023 	 * If both the interrupt holdoff timer and count are specified as
1024 	 * zero, default to a holdoff count of 1 ...
1025 	 */
1026 	if ((us | cnt) == 0)
1027 		cnt = 1;
1028 
1029 	/*
1030 	 * If an interrupt holdoff count has been specified, then find the
1031 	 * closest configured holdoff count and use that.  If the response
1032 	 * queue has already been created, then update its queue context
1033 	 * parameters ...
1034 	 */
1035 	if (cnt) {
1036 		int err;
1037 		u32 v, pktcnt_idx;
1038 
1039 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1040 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1041 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1042 			    FW_PARAMS_PARAM_X_V(
1043 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1044 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1045 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1046 			if (err)
1047 				return err;
1048 		}
1049 		rspq->pktcnt_idx = pktcnt_idx;
1050 	}
1051 
1052 	/*
1053 	 * Compute the closest holdoff timer index from the supplied holdoff
1054 	 * timer value.
1055 	 */
1056 	timer_idx = (us == 0
1057 		     ? SGE_TIMER_RSTRT_CNTR
1058 		     : closest_timer(&adapter->sge, us));
1059 
1060 	/*
1061 	 * Update the response queue's interrupt coalescing parameters and
1062 	 * return success.
1063 	 */
1064 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1065 			     QINTR_CNT_EN_V(cnt > 0));
1066 	return 0;
1067 }
1068 
1069 /*
1070  * Return a version number to identify the type of adapter.  The scheme is:
1071  * - bits 0..9: chip version
1072  * - bits 10..15: chip revision
1073  */
1074 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1075 {
1076 	/*
1077 	 * Chip version 4, revision 0x3f (cxgb4vf).
1078 	 */
1079 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1080 }
1081 
1082 /*
1083  * Execute the specified ioctl command.
1084  */
1085 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1086 {
1087 	int ret = 0;
1088 
1089 	switch (cmd) {
1090 	    /*
1091 	     * The VF Driver doesn't have access to any of the other
1092 	     * common Ethernet device ioctl()'s (like reading/writing
1093 	     * PHY registers, etc.
1094 	     */
1095 
1096 	default:
1097 		ret = -EOPNOTSUPP;
1098 		break;
1099 	}
1100 	return ret;
1101 }
1102 
1103 /*
1104  * Change the device's MTU.
1105  */
1106 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1107 {
1108 	int ret;
1109 	struct port_info *pi = netdev_priv(dev);
1110 
1111 	/* accommodate SACK */
1112 	if (new_mtu < 81)
1113 		return -EINVAL;
1114 
1115 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1116 			      -1, -1, -1, -1, true);
1117 	if (!ret)
1118 		dev->mtu = new_mtu;
1119 	return ret;
1120 }
1121 
1122 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1123 	netdev_features_t features)
1124 {
1125 	/*
1126 	 * Since there is no support for separate rx/tx vlan accel
1127 	 * enable/disable make sure tx flag is always in same state as rx.
1128 	 */
1129 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1130 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1131 	else
1132 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1133 
1134 	return features;
1135 }
1136 
1137 static int cxgb4vf_set_features(struct net_device *dev,
1138 	netdev_features_t features)
1139 {
1140 	struct port_info *pi = netdev_priv(dev);
1141 	netdev_features_t changed = dev->features ^ features;
1142 
1143 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1144 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1145 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1146 
1147 	return 0;
1148 }
1149 
1150 /*
1151  * Change the devices MAC address.
1152  */
1153 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1154 {
1155 	int ret;
1156 	struct sockaddr *addr = _addr;
1157 	struct port_info *pi = netdev_priv(dev);
1158 
1159 	if (!is_valid_ether_addr(addr->sa_data))
1160 		return -EADDRNOTAVAIL;
1161 
1162 	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1163 			      addr->sa_data, true);
1164 	if (ret < 0)
1165 		return ret;
1166 
1167 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1168 	pi->xact_addr_filt = ret;
1169 	return 0;
1170 }
1171 
1172 #ifdef CONFIG_NET_POLL_CONTROLLER
1173 /*
1174  * Poll all of our receive queues.  This is called outside of normal interrupt
1175  * context.
1176  */
1177 static void cxgb4vf_poll_controller(struct net_device *dev)
1178 {
1179 	struct port_info *pi = netdev_priv(dev);
1180 	struct adapter *adapter = pi->adapter;
1181 
1182 	if (adapter->flags & USING_MSIX) {
1183 		struct sge_eth_rxq *rxq;
1184 		int nqsets;
1185 
1186 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1187 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1188 			t4vf_sge_intr_msix(0, &rxq->rspq);
1189 			rxq++;
1190 		}
1191 	} else
1192 		t4vf_intr_handler(adapter)(0, adapter);
1193 }
1194 #endif
1195 
1196 /*
1197  * Ethtool operations.
1198  * ===================
1199  *
1200  * Note that we don't support any ethtool operations which change the physical
1201  * state of the port to which we're linked.
1202  */
1203 
1204 /**
1205  *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1206  *	@port_type: Firmware Port Type
1207  *	@mod_type: Firmware Module Type
1208  *
1209  *	Translate Firmware Port/Module type to Ethtool Port Type.
1210  */
1211 static int from_fw_port_mod_type(enum fw_port_type port_type,
1212 				 enum fw_port_module_type mod_type)
1213 {
1214 	if (port_type == FW_PORT_TYPE_BT_SGMII ||
1215 	    port_type == FW_PORT_TYPE_BT_XFI ||
1216 	    port_type == FW_PORT_TYPE_BT_XAUI) {
1217 		return PORT_TP;
1218 	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1219 		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1220 		return PORT_FIBRE;
1221 	} else if (port_type == FW_PORT_TYPE_SFP ||
1222 		   port_type == FW_PORT_TYPE_QSFP_10G ||
1223 		   port_type == FW_PORT_TYPE_QSA ||
1224 		   port_type == FW_PORT_TYPE_QSFP) {
1225 		if (mod_type == FW_PORT_MOD_TYPE_LR ||
1226 		    mod_type == FW_PORT_MOD_TYPE_SR ||
1227 		    mod_type == FW_PORT_MOD_TYPE_ER ||
1228 		    mod_type == FW_PORT_MOD_TYPE_LRM)
1229 			return PORT_FIBRE;
1230 		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1231 			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1232 			return PORT_DA;
1233 		else
1234 			return PORT_OTHER;
1235 	}
1236 
1237 	return PORT_OTHER;
1238 }
1239 
1240 /**
1241  *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1242  *	@port_type: Firmware Port Type
1243  *	@fw_caps: Firmware Port Capabilities
1244  *	@link_mode_mask: ethtool Link Mode Mask
1245  *
1246  *	Translate a Firmware Port Capabilities specification to an ethtool
1247  *	Link Mode Mask.
1248  */
1249 static void fw_caps_to_lmm(enum fw_port_type port_type,
1250 			   unsigned int fw_caps,
1251 			   unsigned long *link_mode_mask)
1252 {
1253 	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
1254 			 ## _BIT, link_mode_mask)
1255 
1256 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1257 		do { \
1258 			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
1259 				SET_LMM(__lmm_name); \
1260 		} while (0)
1261 
1262 	switch (port_type) {
1263 	case FW_PORT_TYPE_BT_SGMII:
1264 	case FW_PORT_TYPE_BT_XFI:
1265 	case FW_PORT_TYPE_BT_XAUI:
1266 		SET_LMM(TP);
1267 		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1268 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1269 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1270 		break;
1271 
1272 	case FW_PORT_TYPE_KX4:
1273 	case FW_PORT_TYPE_KX:
1274 		SET_LMM(Backplane);
1275 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1276 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1277 		break;
1278 
1279 	case FW_PORT_TYPE_KR:
1280 		SET_LMM(Backplane);
1281 		SET_LMM(10000baseKR_Full);
1282 		break;
1283 
1284 	case FW_PORT_TYPE_BP_AP:
1285 		SET_LMM(Backplane);
1286 		SET_LMM(10000baseR_FEC);
1287 		SET_LMM(10000baseKR_Full);
1288 		SET_LMM(1000baseKX_Full);
1289 		break;
1290 
1291 	case FW_PORT_TYPE_BP4_AP:
1292 		SET_LMM(Backplane);
1293 		SET_LMM(10000baseR_FEC);
1294 		SET_LMM(10000baseKR_Full);
1295 		SET_LMM(1000baseKX_Full);
1296 		SET_LMM(10000baseKX4_Full);
1297 		break;
1298 
1299 	case FW_PORT_TYPE_FIBER_XFI:
1300 	case FW_PORT_TYPE_FIBER_XAUI:
1301 	case FW_PORT_TYPE_SFP:
1302 	case FW_PORT_TYPE_QSFP_10G:
1303 	case FW_PORT_TYPE_QSA:
1304 		SET_LMM(FIBRE);
1305 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1306 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1307 		break;
1308 
1309 	case FW_PORT_TYPE_BP40_BA:
1310 	case FW_PORT_TYPE_QSFP:
1311 		SET_LMM(FIBRE);
1312 		SET_LMM(40000baseSR4_Full);
1313 		break;
1314 
1315 	case FW_PORT_TYPE_CR_QSFP:
1316 	case FW_PORT_TYPE_SFP28:
1317 		SET_LMM(FIBRE);
1318 		SET_LMM(25000baseCR_Full);
1319 		break;
1320 
1321 	case FW_PORT_TYPE_KR4_100G:
1322 	case FW_PORT_TYPE_CR4_QSFP:
1323 		SET_LMM(FIBRE);
1324 		SET_LMM(100000baseCR4_Full);
1325 		break;
1326 
1327 	default:
1328 		break;
1329 	}
1330 
1331 	FW_CAPS_TO_LMM(ANEG, Autoneg);
1332 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1333 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1334 
1335 	#undef FW_CAPS_TO_LMM
1336 	#undef SET_LMM
1337 }
1338 
1339 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1340 				      struct ethtool_link_ksettings
1341 							*link_ksettings)
1342 {
1343 	const struct port_info *pi = netdev_priv(dev);
1344 	struct ethtool_link_settings *base = &link_ksettings->base;
1345 
1346 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1347 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1348 	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1349 
1350 	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1351 
1352 	if (pi->mdio_addr >= 0) {
1353 		base->phy_address = pi->mdio_addr;
1354 		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1355 				      ? ETH_MDIO_SUPPORTS_C22
1356 				      : ETH_MDIO_SUPPORTS_C45);
1357 	} else {
1358 		base->phy_address = 255;
1359 		base->mdio_support = 0;
1360 	}
1361 
1362 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
1363 		       link_ksettings->link_modes.supported);
1364 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
1365 		       link_ksettings->link_modes.advertising);
1366 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
1367 		       link_ksettings->link_modes.lp_advertising);
1368 
1369 	if (netif_carrier_ok(dev)) {
1370 		base->speed = pi->link_cfg.speed;
1371 		base->duplex = DUPLEX_FULL;
1372 	} else {
1373 		base->speed = SPEED_UNKNOWN;
1374 		base->duplex = DUPLEX_UNKNOWN;
1375 	}
1376 
1377 	base->autoneg = pi->link_cfg.autoneg;
1378 	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
1379 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1380 						     supported, Autoneg);
1381 	if (pi->link_cfg.autoneg)
1382 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1383 						     advertising, Autoneg);
1384 
1385 	return 0;
1386 }
1387 
1388 /*
1389  * Return our driver information.
1390  */
1391 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1392 				struct ethtool_drvinfo *drvinfo)
1393 {
1394 	struct adapter *adapter = netdev2adap(dev);
1395 
1396 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1397 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1398 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1399 		sizeof(drvinfo->bus_info));
1400 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1401 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1402 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1403 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1404 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1405 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1406 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1407 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1408 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1409 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1410 }
1411 
1412 /*
1413  * Return current adapter message level.
1414  */
1415 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1416 {
1417 	return netdev2adap(dev)->msg_enable;
1418 }
1419 
1420 /*
1421  * Set current adapter message level.
1422  */
1423 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1424 {
1425 	netdev2adap(dev)->msg_enable = msglevel;
1426 }
1427 
1428 /*
1429  * Return the device's current Queue Set ring size parameters along with the
1430  * allowed maximum values.  Since ethtool doesn't understand the concept of
1431  * multi-queue devices, we just return the current values associated with the
1432  * first Queue Set.
1433  */
1434 static void cxgb4vf_get_ringparam(struct net_device *dev,
1435 				  struct ethtool_ringparam *rp)
1436 {
1437 	const struct port_info *pi = netdev_priv(dev);
1438 	const struct sge *s = &pi->adapter->sge;
1439 
1440 	rp->rx_max_pending = MAX_RX_BUFFERS;
1441 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1442 	rp->rx_jumbo_max_pending = 0;
1443 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1444 
1445 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1446 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1447 	rp->rx_jumbo_pending = 0;
1448 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1449 }
1450 
1451 /*
1452  * Set the Queue Set ring size parameters for the device.  Again, since
1453  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1454  * apply these new values across all of the Queue Sets associated with the
1455  * device -- after vetting them of course!
1456  */
1457 static int cxgb4vf_set_ringparam(struct net_device *dev,
1458 				 struct ethtool_ringparam *rp)
1459 {
1460 	const struct port_info *pi = netdev_priv(dev);
1461 	struct adapter *adapter = pi->adapter;
1462 	struct sge *s = &adapter->sge;
1463 	int qs;
1464 
1465 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1466 	    rp->rx_jumbo_pending ||
1467 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1468 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1469 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1470 	    rp->rx_pending < MIN_FL_ENTRIES ||
1471 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1472 		return -EINVAL;
1473 
1474 	if (adapter->flags & FULL_INIT_DONE)
1475 		return -EBUSY;
1476 
1477 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1478 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1479 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1480 		s->ethtxq[qs].q.size = rp->tx_pending;
1481 	}
1482 	return 0;
1483 }
1484 
1485 /*
1486  * Return the interrupt holdoff timer and count for the first Queue Set on the
1487  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1488  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1489  */
1490 static int cxgb4vf_get_coalesce(struct net_device *dev,
1491 				struct ethtool_coalesce *coalesce)
1492 {
1493 	const struct port_info *pi = netdev_priv(dev);
1494 	const struct adapter *adapter = pi->adapter;
1495 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1496 
1497 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1498 	coalesce->rx_max_coalesced_frames =
1499 		((rspq->intr_params & QINTR_CNT_EN_F)
1500 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1501 		 : 0);
1502 	return 0;
1503 }
1504 
1505 /*
1506  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1507  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1508  * the interrupt holdoff timer on any of the device's Queue Sets.
1509  */
1510 static int cxgb4vf_set_coalesce(struct net_device *dev,
1511 				struct ethtool_coalesce *coalesce)
1512 {
1513 	const struct port_info *pi = netdev_priv(dev);
1514 	struct adapter *adapter = pi->adapter;
1515 
1516 	return set_rxq_intr_params(adapter,
1517 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1518 				   coalesce->rx_coalesce_usecs,
1519 				   coalesce->rx_max_coalesced_frames);
1520 }
1521 
1522 /*
1523  * Report current port link pause parameter settings.
1524  */
1525 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1526 				   struct ethtool_pauseparam *pauseparam)
1527 {
1528 	struct port_info *pi = netdev_priv(dev);
1529 
1530 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1531 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1532 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1533 }
1534 
1535 /*
1536  * Identify the port by blinking the port's LED.
1537  */
1538 static int cxgb4vf_phys_id(struct net_device *dev,
1539 			   enum ethtool_phys_id_state state)
1540 {
1541 	unsigned int val;
1542 	struct port_info *pi = netdev_priv(dev);
1543 
1544 	if (state == ETHTOOL_ID_ACTIVE)
1545 		val = 0xffff;
1546 	else if (state == ETHTOOL_ID_INACTIVE)
1547 		val = 0;
1548 	else
1549 		return -EINVAL;
1550 
1551 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1552 }
1553 
1554 /*
1555  * Port stats maintained per queue of the port.
1556  */
1557 struct queue_port_stats {
1558 	u64 tso;
1559 	u64 tx_csum;
1560 	u64 rx_csum;
1561 	u64 vlan_ex;
1562 	u64 vlan_ins;
1563 	u64 lro_pkts;
1564 	u64 lro_merged;
1565 };
1566 
1567 /*
1568  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1569  * these need to match the order of statistics returned by
1570  * t4vf_get_port_stats().
1571  */
1572 static const char stats_strings[][ETH_GSTRING_LEN] = {
1573 	/*
1574 	 * These must match the layout of the t4vf_port_stats structure.
1575 	 */
1576 	"TxBroadcastBytes  ",
1577 	"TxBroadcastFrames ",
1578 	"TxMulticastBytes  ",
1579 	"TxMulticastFrames ",
1580 	"TxUnicastBytes    ",
1581 	"TxUnicastFrames   ",
1582 	"TxDroppedFrames   ",
1583 	"TxOffloadBytes    ",
1584 	"TxOffloadFrames   ",
1585 	"RxBroadcastBytes  ",
1586 	"RxBroadcastFrames ",
1587 	"RxMulticastBytes  ",
1588 	"RxMulticastFrames ",
1589 	"RxUnicastBytes    ",
1590 	"RxUnicastFrames   ",
1591 	"RxErrorFrames     ",
1592 
1593 	/*
1594 	 * These are accumulated per-queue statistics and must match the
1595 	 * order of the fields in the queue_port_stats structure.
1596 	 */
1597 	"TSO               ",
1598 	"TxCsumOffload     ",
1599 	"RxCsumGood        ",
1600 	"VLANextractions   ",
1601 	"VLANinsertions    ",
1602 	"GROPackets        ",
1603 	"GROMerged         ",
1604 };
1605 
1606 /*
1607  * Return the number of statistics in the specified statistics set.
1608  */
1609 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1610 {
1611 	switch (sset) {
1612 	case ETH_SS_STATS:
1613 		return ARRAY_SIZE(stats_strings);
1614 	default:
1615 		return -EOPNOTSUPP;
1616 	}
1617 	/*NOTREACHED*/
1618 }
1619 
1620 /*
1621  * Return the strings for the specified statistics set.
1622  */
1623 static void cxgb4vf_get_strings(struct net_device *dev,
1624 				u32 sset,
1625 				u8 *data)
1626 {
1627 	switch (sset) {
1628 	case ETH_SS_STATS:
1629 		memcpy(data, stats_strings, sizeof(stats_strings));
1630 		break;
1631 	}
1632 }
1633 
1634 /*
1635  * Small utility routine to accumulate queue statistics across the queues of
1636  * a "port".
1637  */
1638 static void collect_sge_port_stats(const struct adapter *adapter,
1639 				   const struct port_info *pi,
1640 				   struct queue_port_stats *stats)
1641 {
1642 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1643 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1644 	int qs;
1645 
1646 	memset(stats, 0, sizeof(*stats));
1647 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1648 		stats->tso += txq->tso;
1649 		stats->tx_csum += txq->tx_cso;
1650 		stats->rx_csum += rxq->stats.rx_cso;
1651 		stats->vlan_ex += rxq->stats.vlan_ex;
1652 		stats->vlan_ins += txq->vlan_ins;
1653 		stats->lro_pkts += rxq->stats.lro_pkts;
1654 		stats->lro_merged += rxq->stats.lro_merged;
1655 	}
1656 }
1657 
1658 /*
1659  * Return the ETH_SS_STATS statistics set.
1660  */
1661 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1662 				      struct ethtool_stats *stats,
1663 				      u64 *data)
1664 {
1665 	struct port_info *pi = netdev2pinfo(dev);
1666 	struct adapter *adapter = pi->adapter;
1667 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1668 				      (struct t4vf_port_stats *)data);
1669 	if (err)
1670 		memset(data, 0, sizeof(struct t4vf_port_stats));
1671 
1672 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1673 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1674 }
1675 
1676 /*
1677  * Return the size of our register map.
1678  */
1679 static int cxgb4vf_get_regs_len(struct net_device *dev)
1680 {
1681 	return T4VF_REGMAP_SIZE;
1682 }
1683 
1684 /*
1685  * Dump a block of registers, start to end inclusive, into a buffer.
1686  */
1687 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1688 			   unsigned int start, unsigned int end)
1689 {
1690 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1691 
1692 	for ( ; start <= end; start += sizeof(u32)) {
1693 		/*
1694 		 * Avoid reading the Mailbox Control register since that
1695 		 * can trigger a Mailbox Ownership Arbitration cycle and
1696 		 * interfere with communication with the firmware.
1697 		 */
1698 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1699 			*bp++ = 0xffff;
1700 		else
1701 			*bp++ = t4_read_reg(adapter, start);
1702 	}
1703 }
1704 
1705 /*
1706  * Copy our entire register map into the provided buffer.
1707  */
1708 static void cxgb4vf_get_regs(struct net_device *dev,
1709 			     struct ethtool_regs *regs,
1710 			     void *regbuf)
1711 {
1712 	struct adapter *adapter = netdev2adap(dev);
1713 
1714 	regs->version = mk_adap_vers(adapter);
1715 
1716 	/*
1717 	 * Fill in register buffer with our register map.
1718 	 */
1719 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1720 
1721 	reg_block_dump(adapter, regbuf,
1722 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1723 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1724 	reg_block_dump(adapter, regbuf,
1725 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1726 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1727 
1728 	/* T5 adds new registers in the PL Register map.
1729 	 */
1730 	reg_block_dump(adapter, regbuf,
1731 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1732 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1733 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1734 	reg_block_dump(adapter, regbuf,
1735 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1736 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1737 
1738 	reg_block_dump(adapter, regbuf,
1739 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1740 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1741 }
1742 
1743 /*
1744  * Report current Wake On LAN settings.
1745  */
1746 static void cxgb4vf_get_wol(struct net_device *dev,
1747 			    struct ethtool_wolinfo *wol)
1748 {
1749 	wol->supported = 0;
1750 	wol->wolopts = 0;
1751 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1752 }
1753 
1754 /*
1755  * TCP Segmentation Offload flags which we support.
1756  */
1757 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1758 
1759 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1760 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
1761 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1762 	.get_msglevel		= cxgb4vf_get_msglevel,
1763 	.set_msglevel		= cxgb4vf_set_msglevel,
1764 	.get_ringparam		= cxgb4vf_get_ringparam,
1765 	.set_ringparam		= cxgb4vf_set_ringparam,
1766 	.get_coalesce		= cxgb4vf_get_coalesce,
1767 	.set_coalesce		= cxgb4vf_set_coalesce,
1768 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1769 	.get_link		= ethtool_op_get_link,
1770 	.get_strings		= cxgb4vf_get_strings,
1771 	.set_phys_id		= cxgb4vf_phys_id,
1772 	.get_sset_count		= cxgb4vf_get_sset_count,
1773 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1774 	.get_regs_len		= cxgb4vf_get_regs_len,
1775 	.get_regs		= cxgb4vf_get_regs,
1776 	.get_wol		= cxgb4vf_get_wol,
1777 };
1778 
1779 /*
1780  * /sys/kernel/debug/cxgb4vf support code and data.
1781  * ================================================
1782  */
1783 
1784 /*
1785  * Show Firmware Mailbox Command/Reply Log
1786  *
1787  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1788  * it's possible that we can catch things during a log update and therefore
1789  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1790  * If we ever decide that we want to make sure that we're dumping a coherent
1791  * log, we'd need to perform locking in the mailbox logging and in
1792  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1793  * like we do for the Firmware Device Log.  But as stated above, meh ...
1794  */
1795 static int mboxlog_show(struct seq_file *seq, void *v)
1796 {
1797 	struct adapter *adapter = seq->private;
1798 	struct mbox_cmd_log *log = adapter->mbox_log;
1799 	struct mbox_cmd *entry;
1800 	int entry_idx, i;
1801 
1802 	if (v == SEQ_START_TOKEN) {
1803 		seq_printf(seq,
1804 			   "%10s  %15s  %5s  %5s  %s\n",
1805 			   "Seq#", "Tstamp", "Atime", "Etime",
1806 			   "Command/Reply");
1807 		return 0;
1808 	}
1809 
1810 	entry_idx = log->cursor + ((uintptr_t)v - 2);
1811 	if (entry_idx >= log->size)
1812 		entry_idx -= log->size;
1813 	entry = mbox_cmd_log_entry(log, entry_idx);
1814 
1815 	/* skip over unused entries */
1816 	if (entry->timestamp == 0)
1817 		return 0;
1818 
1819 	seq_printf(seq, "%10u  %15llu  %5d  %5d",
1820 		   entry->seqno, entry->timestamp,
1821 		   entry->access, entry->execute);
1822 	for (i = 0; i < MBOX_LEN / 8; i++) {
1823 		u64 flit = entry->cmd[i];
1824 		u32 hi = (u32)(flit >> 32);
1825 		u32 lo = (u32)flit;
1826 
1827 		seq_printf(seq, "  %08x %08x", hi, lo);
1828 	}
1829 	seq_puts(seq, "\n");
1830 	return 0;
1831 }
1832 
1833 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1834 {
1835 	struct adapter *adapter = seq->private;
1836 	struct mbox_cmd_log *log = adapter->mbox_log;
1837 
1838 	return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1839 }
1840 
1841 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1842 {
1843 	return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1844 }
1845 
1846 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1847 {
1848 	++*pos;
1849 	return mboxlog_get_idx(seq, *pos);
1850 }
1851 
1852 static void mboxlog_stop(struct seq_file *seq, void *v)
1853 {
1854 }
1855 
1856 static const struct seq_operations mboxlog_seq_ops = {
1857 	.start = mboxlog_start,
1858 	.next  = mboxlog_next,
1859 	.stop  = mboxlog_stop,
1860 	.show  = mboxlog_show
1861 };
1862 
1863 static int mboxlog_open(struct inode *inode, struct file *file)
1864 {
1865 	int res = seq_open(file, &mboxlog_seq_ops);
1866 
1867 	if (!res) {
1868 		struct seq_file *seq = file->private_data;
1869 
1870 		seq->private = inode->i_private;
1871 	}
1872 	return res;
1873 }
1874 
1875 static const struct file_operations mboxlog_fops = {
1876 	.owner   = THIS_MODULE,
1877 	.open    = mboxlog_open,
1878 	.read    = seq_read,
1879 	.llseek  = seq_lseek,
1880 	.release = seq_release,
1881 };
1882 
1883 /*
1884  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1885  */
1886 #define QPL	4
1887 
1888 static int sge_qinfo_show(struct seq_file *seq, void *v)
1889 {
1890 	struct adapter *adapter = seq->private;
1891 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1892 	int qs, r = (uintptr_t)v - 1;
1893 
1894 	if (r)
1895 		seq_putc(seq, '\n');
1896 
1897 	#define S3(fmt_spec, s, v) \
1898 		do {\
1899 			seq_printf(seq, "%-12s", s); \
1900 			for (qs = 0; qs < n; ++qs) \
1901 				seq_printf(seq, " %16" fmt_spec, v); \
1902 			seq_putc(seq, '\n'); \
1903 		} while (0)
1904 	#define S(s, v)		S3("s", s, v)
1905 	#define T(s, v)		S3("u", s, txq[qs].v)
1906 	#define R(s, v)		S3("u", s, rxq[qs].v)
1907 
1908 	if (r < eth_entries) {
1909 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1910 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1911 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1912 
1913 		S("QType:", "Ethernet");
1914 		S("Interface:",
1915 		  (rxq[qs].rspq.netdev
1916 		   ? rxq[qs].rspq.netdev->name
1917 		   : "N/A"));
1918 		S3("d", "Port:",
1919 		   (rxq[qs].rspq.netdev
1920 		    ? ((struct port_info *)
1921 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1922 		    : -1));
1923 		T("TxQ ID:", q.abs_id);
1924 		T("TxQ size:", q.size);
1925 		T("TxQ inuse:", q.in_use);
1926 		T("TxQ PIdx:", q.pidx);
1927 		T("TxQ CIdx:", q.cidx);
1928 		R("RspQ ID:", rspq.abs_id);
1929 		R("RspQ size:", rspq.size);
1930 		R("RspQE size:", rspq.iqe_len);
1931 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1932 		S3("u", "Intr pktcnt:",
1933 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1934 		R("RspQ CIdx:", rspq.cidx);
1935 		R("RspQ Gen:", rspq.gen);
1936 		R("FL ID:", fl.abs_id);
1937 		R("FL size:", fl.size - MIN_FL_RESID);
1938 		R("FL avail:", fl.avail);
1939 		R("FL PIdx:", fl.pidx);
1940 		R("FL CIdx:", fl.cidx);
1941 		return 0;
1942 	}
1943 
1944 	r -= eth_entries;
1945 	if (r == 0) {
1946 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1947 
1948 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1949 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1950 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1951 			   qtimer_val(adapter, evtq));
1952 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1953 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1954 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1955 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1956 	} else if (r == 1) {
1957 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1958 
1959 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1960 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1961 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1962 			   qtimer_val(adapter, intrq));
1963 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1964 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1965 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1966 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1967 	}
1968 
1969 	#undef R
1970 	#undef T
1971 	#undef S
1972 	#undef S3
1973 
1974 	return 0;
1975 }
1976 
1977 /*
1978  * Return the number of "entries" in our "file".  We group the multi-Queue
1979  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1980  *
1981  *     Ethernet RX/TX Queue Sets
1982  *     Firmware Event Queue
1983  *     Forwarded Interrupt Queue (if in MSI mode)
1984  */
1985 static int sge_queue_entries(const struct adapter *adapter)
1986 {
1987 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1988 		((adapter->flags & USING_MSI) != 0);
1989 }
1990 
1991 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1992 {
1993 	int entries = sge_queue_entries(seq->private);
1994 
1995 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1996 }
1997 
1998 static void sge_queue_stop(struct seq_file *seq, void *v)
1999 {
2000 }
2001 
2002 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2003 {
2004 	int entries = sge_queue_entries(seq->private);
2005 
2006 	++*pos;
2007 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2008 }
2009 
2010 static const struct seq_operations sge_qinfo_seq_ops = {
2011 	.start = sge_queue_start,
2012 	.next  = sge_queue_next,
2013 	.stop  = sge_queue_stop,
2014 	.show  = sge_qinfo_show
2015 };
2016 
2017 static int sge_qinfo_open(struct inode *inode, struct file *file)
2018 {
2019 	int res = seq_open(file, &sge_qinfo_seq_ops);
2020 
2021 	if (!res) {
2022 		struct seq_file *seq = file->private_data;
2023 		seq->private = inode->i_private;
2024 	}
2025 	return res;
2026 }
2027 
2028 static const struct file_operations sge_qinfo_debugfs_fops = {
2029 	.owner   = THIS_MODULE,
2030 	.open    = sge_qinfo_open,
2031 	.read    = seq_read,
2032 	.llseek  = seq_lseek,
2033 	.release = seq_release,
2034 };
2035 
2036 /*
2037  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2038  */
2039 #define QPL	4
2040 
2041 static int sge_qstats_show(struct seq_file *seq, void *v)
2042 {
2043 	struct adapter *adapter = seq->private;
2044 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2045 	int qs, r = (uintptr_t)v - 1;
2046 
2047 	if (r)
2048 		seq_putc(seq, '\n');
2049 
2050 	#define S3(fmt, s, v) \
2051 		do { \
2052 			seq_printf(seq, "%-16s", s); \
2053 			for (qs = 0; qs < n; ++qs) \
2054 				seq_printf(seq, " %8" fmt, v); \
2055 			seq_putc(seq, '\n'); \
2056 		} while (0)
2057 	#define S(s, v)		S3("s", s, v)
2058 
2059 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
2060 	#define T(s, v)		T3("lu", s, v)
2061 
2062 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
2063 	#define R(s, v)		R3("lu", s, v)
2064 
2065 	if (r < eth_entries) {
2066 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2067 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2068 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2069 
2070 		S("QType:", "Ethernet");
2071 		S("Interface:",
2072 		  (rxq[qs].rspq.netdev
2073 		   ? rxq[qs].rspq.netdev->name
2074 		   : "N/A"));
2075 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2076 		R("RxPackets:", stats.pkts);
2077 		R("RxCSO:", stats.rx_cso);
2078 		R("VLANxtract:", stats.vlan_ex);
2079 		R("LROmerged:", stats.lro_merged);
2080 		R("LROpackets:", stats.lro_pkts);
2081 		R("RxDrops:", stats.rx_drops);
2082 		T("TSO:", tso);
2083 		T("TxCSO:", tx_cso);
2084 		T("VLANins:", vlan_ins);
2085 		T("TxQFull:", q.stops);
2086 		T("TxQRestarts:", q.restarts);
2087 		T("TxMapErr:", mapping_err);
2088 		R("FLAllocErr:", fl.alloc_failed);
2089 		R("FLLrgAlcErr:", fl.large_alloc_failed);
2090 		R("FLStarving:", fl.starving);
2091 		return 0;
2092 	}
2093 
2094 	r -= eth_entries;
2095 	if (r == 0) {
2096 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2097 
2098 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2099 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2100 			   evtq->unhandled_irqs);
2101 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2102 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2103 	} else if (r == 1) {
2104 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2105 
2106 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2107 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2108 			   intrq->unhandled_irqs);
2109 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2110 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2111 	}
2112 
2113 	#undef R
2114 	#undef T
2115 	#undef S
2116 	#undef R3
2117 	#undef T3
2118 	#undef S3
2119 
2120 	return 0;
2121 }
2122 
2123 /*
2124  * Return the number of "entries" in our "file".  We group the multi-Queue
2125  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2126  *
2127  *     Ethernet RX/TX Queue Sets
2128  *     Firmware Event Queue
2129  *     Forwarded Interrupt Queue (if in MSI mode)
2130  */
2131 static int sge_qstats_entries(const struct adapter *adapter)
2132 {
2133 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2134 		((adapter->flags & USING_MSI) != 0);
2135 }
2136 
2137 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2138 {
2139 	int entries = sge_qstats_entries(seq->private);
2140 
2141 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2142 }
2143 
2144 static void sge_qstats_stop(struct seq_file *seq, void *v)
2145 {
2146 }
2147 
2148 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2149 {
2150 	int entries = sge_qstats_entries(seq->private);
2151 
2152 	(*pos)++;
2153 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2154 }
2155 
2156 static const struct seq_operations sge_qstats_seq_ops = {
2157 	.start = sge_qstats_start,
2158 	.next  = sge_qstats_next,
2159 	.stop  = sge_qstats_stop,
2160 	.show  = sge_qstats_show
2161 };
2162 
2163 static int sge_qstats_open(struct inode *inode, struct file *file)
2164 {
2165 	int res = seq_open(file, &sge_qstats_seq_ops);
2166 
2167 	if (res == 0) {
2168 		struct seq_file *seq = file->private_data;
2169 		seq->private = inode->i_private;
2170 	}
2171 	return res;
2172 }
2173 
2174 static const struct file_operations sge_qstats_proc_fops = {
2175 	.owner   = THIS_MODULE,
2176 	.open    = sge_qstats_open,
2177 	.read    = seq_read,
2178 	.llseek  = seq_lseek,
2179 	.release = seq_release,
2180 };
2181 
2182 /*
2183  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2184  */
2185 static int resources_show(struct seq_file *seq, void *v)
2186 {
2187 	struct adapter *adapter = seq->private;
2188 	struct vf_resources *vfres = &adapter->params.vfres;
2189 
2190 	#define S(desc, fmt, var) \
2191 		seq_printf(seq, "%-60s " fmt "\n", \
2192 			   desc " (" #var "):", vfres->var)
2193 
2194 	S("Virtual Interfaces", "%d", nvi);
2195 	S("Egress Queues", "%d", neq);
2196 	S("Ethernet Control", "%d", nethctrl);
2197 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2198 	S("Ingress Queues", "%d", niq);
2199 	S("Traffic Class", "%d", tc);
2200 	S("Port Access Rights Mask", "%#x", pmask);
2201 	S("MAC Address Filters", "%d", nexactf);
2202 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2203 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2204 
2205 	#undef S
2206 
2207 	return 0;
2208 }
2209 
2210 static int resources_open(struct inode *inode, struct file *file)
2211 {
2212 	return single_open(file, resources_show, inode->i_private);
2213 }
2214 
2215 static const struct file_operations resources_proc_fops = {
2216 	.owner   = THIS_MODULE,
2217 	.open    = resources_open,
2218 	.read    = seq_read,
2219 	.llseek  = seq_lseek,
2220 	.release = single_release,
2221 };
2222 
2223 /*
2224  * Show Virtual Interfaces.
2225  */
2226 static int interfaces_show(struct seq_file *seq, void *v)
2227 {
2228 	if (v == SEQ_START_TOKEN) {
2229 		seq_puts(seq, "Interface  Port   VIID\n");
2230 	} else {
2231 		struct adapter *adapter = seq->private;
2232 		int pidx = (uintptr_t)v - 2;
2233 		struct net_device *dev = adapter->port[pidx];
2234 		struct port_info *pi = netdev_priv(dev);
2235 
2236 		seq_printf(seq, "%9s  %4d  %#5x\n",
2237 			   dev->name, pi->port_id, pi->viid);
2238 	}
2239 	return 0;
2240 }
2241 
2242 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2243 {
2244 	return pos <= adapter->params.nports
2245 		? (void *)(uintptr_t)(pos + 1)
2246 		: NULL;
2247 }
2248 
2249 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2250 {
2251 	return *pos
2252 		? interfaces_get_idx(seq->private, *pos)
2253 		: SEQ_START_TOKEN;
2254 }
2255 
2256 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2257 {
2258 	(*pos)++;
2259 	return interfaces_get_idx(seq->private, *pos);
2260 }
2261 
2262 static void interfaces_stop(struct seq_file *seq, void *v)
2263 {
2264 }
2265 
2266 static const struct seq_operations interfaces_seq_ops = {
2267 	.start = interfaces_start,
2268 	.next  = interfaces_next,
2269 	.stop  = interfaces_stop,
2270 	.show  = interfaces_show
2271 };
2272 
2273 static int interfaces_open(struct inode *inode, struct file *file)
2274 {
2275 	int res = seq_open(file, &interfaces_seq_ops);
2276 
2277 	if (res == 0) {
2278 		struct seq_file *seq = file->private_data;
2279 		seq->private = inode->i_private;
2280 	}
2281 	return res;
2282 }
2283 
2284 static const struct file_operations interfaces_proc_fops = {
2285 	.owner   = THIS_MODULE,
2286 	.open    = interfaces_open,
2287 	.read    = seq_read,
2288 	.llseek  = seq_lseek,
2289 	.release = seq_release,
2290 };
2291 
2292 /*
2293  * /sys/kernel/debugfs/cxgb4vf/ files list.
2294  */
2295 struct cxgb4vf_debugfs_entry {
2296 	const char *name;		/* name of debugfs node */
2297 	umode_t mode;			/* file system mode */
2298 	const struct file_operations *fops;
2299 };
2300 
2301 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2302 	{ "mboxlog",    S_IRUGO, &mboxlog_fops },
2303 	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2304 	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2305 	{ "resources",  S_IRUGO, &resources_proc_fops },
2306 	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2307 };
2308 
2309 /*
2310  * Module and device initialization and cleanup code.
2311  * ==================================================
2312  */
2313 
2314 /*
2315  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2316  * directory (debugfs_root) has already been set up.
2317  */
2318 static int setup_debugfs(struct adapter *adapter)
2319 {
2320 	int i;
2321 
2322 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2323 
2324 	/*
2325 	 * Debugfs support is best effort.
2326 	 */
2327 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2328 		(void)debugfs_create_file(debugfs_files[i].name,
2329 				  debugfs_files[i].mode,
2330 				  adapter->debugfs_root,
2331 				  (void *)adapter,
2332 				  debugfs_files[i].fops);
2333 
2334 	return 0;
2335 }
2336 
2337 /*
2338  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2339  * it to our caller to tear down the directory (debugfs_root).
2340  */
2341 static void cleanup_debugfs(struct adapter *adapter)
2342 {
2343 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2344 
2345 	/*
2346 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2347 	 * individual entries because a call will be made to
2348 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2349 	 * persistent state.
2350 	 */
2351 	/* nothing to do */
2352 }
2353 
2354 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2355  * knowing our Virtual Function Resources and may be called a second time if
2356  * we fall back from MSI-X to MSI Interrupt Mode.
2357  */
2358 static void size_nports_qsets(struct adapter *adapter)
2359 {
2360 	struct vf_resources *vfres = &adapter->params.vfres;
2361 	unsigned int ethqsets, pmask_nports;
2362 
2363 	/* The number of "ports" which we support is equal to the number of
2364 	 * Virtual Interfaces with which we've been provisioned.
2365 	 */
2366 	adapter->params.nports = vfres->nvi;
2367 	if (adapter->params.nports > MAX_NPORTS) {
2368 		dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2369 			 " allowed virtual interfaces\n", MAX_NPORTS,
2370 			 adapter->params.nports);
2371 		adapter->params.nports = MAX_NPORTS;
2372 	}
2373 
2374 	/* We may have been provisioned with more VIs than the number of
2375 	 * ports we're allowed to access (our Port Access Rights Mask).
2376 	 * This is obviously a configuration conflict but we don't want to
2377 	 * crash the kernel or anything silly just because of that.
2378 	 */
2379 	pmask_nports = hweight32(adapter->params.vfres.pmask);
2380 	if (pmask_nports < adapter->params.nports) {
2381 		dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
2382 			 " virtual interfaces; limited by Port Access Rights"
2383 			 " mask %#x\n", pmask_nports, adapter->params.nports,
2384 			 adapter->params.vfres.pmask);
2385 		adapter->params.nports = pmask_nports;
2386 	}
2387 
2388 	/* We need to reserve an Ingress Queue for the Asynchronous Firmware
2389 	 * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2390 	 * reserve an Ingress Queue for a Forwarded Interrupts.
2391 	 *
2392 	 * The rest of the FL/Intr-capable ingress queues will be matched up
2393 	 * one-for-one with Ethernet/Control egress queues in order to form
2394 	 * "Queue Sets" which will be aportioned between the "ports".  For
2395 	 * each Queue Set, we'll need the ability to allocate two Egress
2396 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2397 	 * Ethernet Queue.
2398 	 *
2399 	 * Note that even if we're currently configured to use MSI-X
2400 	 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2401 	 * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2402 	 * happens we'll need to adjust things later.
2403 	 */
2404 	ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2405 	if (vfres->nethctrl != ethqsets)
2406 		ethqsets = min(vfres->nethctrl, ethqsets);
2407 	if (vfres->neq < ethqsets*2)
2408 		ethqsets = vfres->neq/2;
2409 	if (ethqsets > MAX_ETH_QSETS)
2410 		ethqsets = MAX_ETH_QSETS;
2411 	adapter->sge.max_ethqsets = ethqsets;
2412 
2413 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2414 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2415 			 " virtual interfaces (too few Queue Sets)\n",
2416 			 adapter->sge.max_ethqsets, adapter->params.nports);
2417 		adapter->params.nports = adapter->sge.max_ethqsets;
2418 	}
2419 }
2420 
2421 /*
2422  * Perform early "adapter" initialization.  This is where we discover what
2423  * adapter parameters we're going to be using and initialize basic adapter
2424  * hardware support.
2425  */
2426 static int adap_init0(struct adapter *adapter)
2427 {
2428 	struct sge_params *sge_params = &adapter->params.sge;
2429 	struct sge *s = &adapter->sge;
2430 	int err;
2431 	u32 param, val = 0;
2432 
2433 	/*
2434 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2435 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2436 	 * issue an FLR because of a self- deadlock on the device semaphore.
2437 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2438 	 * cases where they're needed -- for instance, some versions of KVM
2439 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2440 	 * use the firmware based reset in order to reset any per function
2441 	 * state.
2442 	 */
2443 	err = t4vf_fw_reset(adapter);
2444 	if (err < 0) {
2445 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2446 		return err;
2447 	}
2448 
2449 	/*
2450 	 * Grab basic operational parameters.  These will predominantly have
2451 	 * been set up by the Physical Function Driver or will be hard coded
2452 	 * into the adapter.  We just have to live with them ...  Note that
2453 	 * we _must_ get our VPD parameters before our SGE parameters because
2454 	 * we need to know the adapter's core clock from the VPD in order to
2455 	 * properly decode the SGE Timer Values.
2456 	 */
2457 	err = t4vf_get_dev_params(adapter);
2458 	if (err) {
2459 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2460 			" device parameters: err=%d\n", err);
2461 		return err;
2462 	}
2463 	err = t4vf_get_vpd_params(adapter);
2464 	if (err) {
2465 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2466 			" VPD parameters: err=%d\n", err);
2467 		return err;
2468 	}
2469 	err = t4vf_get_sge_params(adapter);
2470 	if (err) {
2471 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2472 			" SGE parameters: err=%d\n", err);
2473 		return err;
2474 	}
2475 	err = t4vf_get_rss_glb_config(adapter);
2476 	if (err) {
2477 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2478 			" RSS parameters: err=%d\n", err);
2479 		return err;
2480 	}
2481 	if (adapter->params.rss.mode !=
2482 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2483 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2484 			" mode %d\n", adapter->params.rss.mode);
2485 		return -EINVAL;
2486 	}
2487 	err = t4vf_sge_init(adapter);
2488 	if (err) {
2489 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2490 			" err=%d\n", err);
2491 		return err;
2492 	}
2493 
2494 	/* If we're running on newer firmware, let it know that we're
2495 	 * prepared to deal with encapsulated CPL messages.  Older
2496 	 * firmware won't understand this and we'll just get
2497 	 * unencapsulated messages ...
2498 	 */
2499 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2500 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2501 	val = 1;
2502 	(void) t4vf_set_params(adapter, 1, &param, &val);
2503 
2504 	/*
2505 	 * Retrieve our RX interrupt holdoff timer values and counter
2506 	 * threshold values from the SGE parameters.
2507 	 */
2508 	s->timer_val[0] = core_ticks_to_us(adapter,
2509 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2510 	s->timer_val[1] = core_ticks_to_us(adapter,
2511 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2512 	s->timer_val[2] = core_ticks_to_us(adapter,
2513 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2514 	s->timer_val[3] = core_ticks_to_us(adapter,
2515 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2516 	s->timer_val[4] = core_ticks_to_us(adapter,
2517 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2518 	s->timer_val[5] = core_ticks_to_us(adapter,
2519 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2520 
2521 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2522 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2523 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2524 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2525 
2526 	/*
2527 	 * Grab our Virtual Interface resource allocation, extract the
2528 	 * features that we're interested in and do a bit of sanity testing on
2529 	 * what we discover.
2530 	 */
2531 	err = t4vf_get_vfres(adapter);
2532 	if (err) {
2533 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2534 			" resources: err=%d\n", err);
2535 		return err;
2536 	}
2537 
2538 	/* Check for various parameter sanity issues */
2539 	if (adapter->params.vfres.pmask == 0) {
2540 		dev_err(adapter->pdev_dev, "no port access configured\n"
2541 			"usable!\n");
2542 		return -EINVAL;
2543 	}
2544 	if (adapter->params.vfres.nvi == 0) {
2545 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2546 			"usable!\n");
2547 		return -EINVAL;
2548 	}
2549 
2550 	/* Initialize nports and max_ethqsets now that we have our Virtual
2551 	 * Function Resources.
2552 	 */
2553 	size_nports_qsets(adapter);
2554 
2555 	return 0;
2556 }
2557 
2558 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2559 			     u8 pkt_cnt_idx, unsigned int size,
2560 			     unsigned int iqe_size)
2561 {
2562 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2563 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2564 			      QINTR_CNT_EN_F : 0));
2565 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2566 			    ? pkt_cnt_idx
2567 			    : 0);
2568 	rspq->iqe_len = iqe_size;
2569 	rspq->size = size;
2570 }
2571 
2572 /*
2573  * Perform default configuration of DMA queues depending on the number and
2574  * type of ports we found and the number of available CPUs.  Most settings can
2575  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2576  * being brought up for the first time.
2577  */
2578 static void cfg_queues(struct adapter *adapter)
2579 {
2580 	struct sge *s = &adapter->sge;
2581 	int q10g, n10g, qidx, pidx, qs;
2582 	size_t iqe_size;
2583 
2584 	/*
2585 	 * We should not be called till we know how many Queue Sets we can
2586 	 * support.  In particular, this means that we need to know what kind
2587 	 * of interrupts we'll be using ...
2588 	 */
2589 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2590 
2591 	/*
2592 	 * Count the number of 10GbE Virtual Interfaces that we have.
2593 	 */
2594 	n10g = 0;
2595 	for_each_port(adapter, pidx)
2596 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2597 
2598 	/*
2599 	 * We default to 1 queue per non-10G port and up to # of cores queues
2600 	 * per 10G port.
2601 	 */
2602 	if (n10g == 0)
2603 		q10g = 0;
2604 	else {
2605 		int n1g = (adapter->params.nports - n10g);
2606 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2607 		if (q10g > num_online_cpus())
2608 			q10g = num_online_cpus();
2609 	}
2610 
2611 	/*
2612 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2613 	 * The layout will be established in setup_sge_queues() when the
2614 	 * adapter is brough up for the first time.
2615 	 */
2616 	qidx = 0;
2617 	for_each_port(adapter, pidx) {
2618 		struct port_info *pi = adap2pinfo(adapter, pidx);
2619 
2620 		pi->first_qset = qidx;
2621 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2622 		qidx += pi->nqsets;
2623 	}
2624 	s->ethqsets = qidx;
2625 
2626 	/*
2627 	 * The Ingress Queue Entry Size for our various Response Queues needs
2628 	 * to be big enough to accommodate the largest message we can receive
2629 	 * from the chip/firmware; which is 64 bytes ...
2630 	 */
2631 	iqe_size = 64;
2632 
2633 	/*
2634 	 * Set up default Queue Set parameters ...  Start off with the
2635 	 * shortest interrupt holdoff timer.
2636 	 */
2637 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2638 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2639 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2640 
2641 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2642 		rxq->fl.size = 72;
2643 		txq->q.size = 1024;
2644 	}
2645 
2646 	/*
2647 	 * The firmware event queue is used for link state changes and
2648 	 * notifications of TX DMA completions.
2649 	 */
2650 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2651 
2652 	/*
2653 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2654 	 * mode.  In this mode all interrupts associated with RX queues will
2655 	 * be forwarded to a single queue which we'll associate with our MSI
2656 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2657 	 * queue will indicate which ingress queue needs servicing ...  This
2658 	 * queue needs to be large enough to accommodate all of the ingress
2659 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2660 	 * from equalling the CIDX if every ingress queue has an outstanding
2661 	 * interrupt).  The queue doesn't need to be any larger because no
2662 	 * ingress queue will ever have more than one outstanding interrupt at
2663 	 * any time ...
2664 	 */
2665 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2666 		  iqe_size);
2667 }
2668 
2669 /*
2670  * Reduce the number of Ethernet queues across all ports to at most n.
2671  * n provides at least one queue per port.
2672  */
2673 static void reduce_ethqs(struct adapter *adapter, int n)
2674 {
2675 	int i;
2676 	struct port_info *pi;
2677 
2678 	/*
2679 	 * While we have too many active Ether Queue Sets, interate across the
2680 	 * "ports" and reduce their individual Queue Set allocations.
2681 	 */
2682 	BUG_ON(n < adapter->params.nports);
2683 	while (n < adapter->sge.ethqsets)
2684 		for_each_port(adapter, i) {
2685 			pi = adap2pinfo(adapter, i);
2686 			if (pi->nqsets > 1) {
2687 				pi->nqsets--;
2688 				adapter->sge.ethqsets--;
2689 				if (adapter->sge.ethqsets <= n)
2690 					break;
2691 			}
2692 		}
2693 
2694 	/*
2695 	 * Reassign the starting Queue Sets for each of the "ports" ...
2696 	 */
2697 	n = 0;
2698 	for_each_port(adapter, i) {
2699 		pi = adap2pinfo(adapter, i);
2700 		pi->first_qset = n;
2701 		n += pi->nqsets;
2702 	}
2703 }
2704 
2705 /*
2706  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2707  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2708  * need.  Minimally we need one for every Virtual Interface plus those needed
2709  * for our "extras".  Note that this process may lower the maximum number of
2710  * allowed Queue Sets ...
2711  */
2712 static int enable_msix(struct adapter *adapter)
2713 {
2714 	int i, want, need, nqsets;
2715 	struct msix_entry entries[MSIX_ENTRIES];
2716 	struct sge *s = &adapter->sge;
2717 
2718 	for (i = 0; i < MSIX_ENTRIES; ++i)
2719 		entries[i].entry = i;
2720 
2721 	/*
2722 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2723 	 * plus those needed for our "extras" (for example, the firmware
2724 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2725 	 * Interface plus those needed for our "extras".  So now we get to see
2726 	 * if the song is right ...
2727 	 */
2728 	want = s->max_ethqsets + MSIX_EXTRAS;
2729 	need = adapter->params.nports + MSIX_EXTRAS;
2730 
2731 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2732 	if (want < 0)
2733 		return want;
2734 
2735 	nqsets = want - MSIX_EXTRAS;
2736 	if (nqsets < s->max_ethqsets) {
2737 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2738 			 " for %d Queue Sets\n", nqsets);
2739 		s->max_ethqsets = nqsets;
2740 		if (nqsets < s->ethqsets)
2741 			reduce_ethqs(adapter, nqsets);
2742 	}
2743 	for (i = 0; i < want; ++i)
2744 		adapter->msix_info[i].vec = entries[i].vector;
2745 
2746 	return 0;
2747 }
2748 
2749 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2750 	.ndo_open		= cxgb4vf_open,
2751 	.ndo_stop		= cxgb4vf_stop,
2752 	.ndo_start_xmit		= t4vf_eth_xmit,
2753 	.ndo_get_stats		= cxgb4vf_get_stats,
2754 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2755 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2756 	.ndo_validate_addr	= eth_validate_addr,
2757 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2758 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2759 	.ndo_fix_features	= cxgb4vf_fix_features,
2760 	.ndo_set_features	= cxgb4vf_set_features,
2761 #ifdef CONFIG_NET_POLL_CONTROLLER
2762 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2763 #endif
2764 };
2765 
2766 /*
2767  * "Probe" a device: initialize a device and construct all kernel and driver
2768  * state needed to manage the device.  This routine is called "init_one" in
2769  * the PF Driver ...
2770  */
2771 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2772 			     const struct pci_device_id *ent)
2773 {
2774 	int pci_using_dac;
2775 	int err, pidx;
2776 	unsigned int pmask;
2777 	struct adapter *adapter;
2778 	struct port_info *pi;
2779 	struct net_device *netdev;
2780 
2781 	/*
2782 	 * Print our driver banner the first time we're called to initialize a
2783 	 * device.
2784 	 */
2785 	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2786 
2787 	/*
2788 	 * Initialize generic PCI device state.
2789 	 */
2790 	err = pci_enable_device(pdev);
2791 	if (err) {
2792 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2793 		return err;
2794 	}
2795 
2796 	/*
2797 	 * Reserve PCI resources for the device.  If we can't get them some
2798 	 * other driver may have already claimed the device ...
2799 	 */
2800 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2801 	if (err) {
2802 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2803 		goto err_disable_device;
2804 	}
2805 
2806 	/*
2807 	 * Set up our DMA mask: try for 64-bit address masking first and
2808 	 * fall back to 32-bit if we can't get 64 bits ...
2809 	 */
2810 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2811 	if (err == 0) {
2812 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2813 		if (err) {
2814 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2815 				" coherent allocations\n");
2816 			goto err_release_regions;
2817 		}
2818 		pci_using_dac = 1;
2819 	} else {
2820 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2821 		if (err != 0) {
2822 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2823 			goto err_release_regions;
2824 		}
2825 		pci_using_dac = 0;
2826 	}
2827 
2828 	/*
2829 	 * Enable bus mastering for the device ...
2830 	 */
2831 	pci_set_master(pdev);
2832 
2833 	/*
2834 	 * Allocate our adapter data structure and attach it to the device.
2835 	 */
2836 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2837 	if (!adapter) {
2838 		err = -ENOMEM;
2839 		goto err_release_regions;
2840 	}
2841 	pci_set_drvdata(pdev, adapter);
2842 	adapter->pdev = pdev;
2843 	adapter->pdev_dev = &pdev->dev;
2844 
2845 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2846 				    (sizeof(struct mbox_cmd) *
2847 				     T4VF_OS_LOG_MBOX_CMDS),
2848 				    GFP_KERNEL);
2849 	if (!adapter->mbox_log) {
2850 		err = -ENOMEM;
2851 		goto err_free_adapter;
2852 	}
2853 	adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2854 
2855 	/*
2856 	 * Initialize SMP data synchronization resources.
2857 	 */
2858 	spin_lock_init(&adapter->stats_lock);
2859 	spin_lock_init(&adapter->mbox_lock);
2860 	INIT_LIST_HEAD(&adapter->mlist.list);
2861 
2862 	/*
2863 	 * Map our I/O registers in BAR0.
2864 	 */
2865 	adapter->regs = pci_ioremap_bar(pdev, 0);
2866 	if (!adapter->regs) {
2867 		dev_err(&pdev->dev, "cannot map device registers\n");
2868 		err = -ENOMEM;
2869 		goto err_free_adapter;
2870 	}
2871 
2872 	/* Wait for the device to become ready before proceeding ...
2873 	 */
2874 	err = t4vf_prep_adapter(adapter);
2875 	if (err) {
2876 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2877 			" err=%d\n", err);
2878 		goto err_unmap_bar0;
2879 	}
2880 
2881 	/* For T5 and later we want to use the new BAR-based User Doorbells,
2882 	 * so we need to map BAR2 here ...
2883 	 */
2884 	if (!is_t4(adapter->params.chip)) {
2885 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2886 					   pci_resource_len(pdev, 2));
2887 		if (!adapter->bar2) {
2888 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2889 			err = -ENOMEM;
2890 			goto err_unmap_bar0;
2891 		}
2892 	}
2893 	/*
2894 	 * Initialize adapter level features.
2895 	 */
2896 	adapter->name = pci_name(pdev);
2897 	adapter->msg_enable = dflt_msg_enable;
2898 	err = adap_init0(adapter);
2899 	if (err)
2900 		goto err_unmap_bar;
2901 
2902 	/*
2903 	 * Allocate our "adapter ports" and stitch everything together.
2904 	 */
2905 	pmask = adapter->params.vfres.pmask;
2906 	for_each_port(adapter, pidx) {
2907 		int port_id, viid;
2908 
2909 		/*
2910 		 * We simplistically allocate our virtual interfaces
2911 		 * sequentially across the port numbers to which we have
2912 		 * access rights.  This should be configurable in some manner
2913 		 * ...
2914 		 */
2915 		if (pmask == 0)
2916 			break;
2917 		port_id = ffs(pmask) - 1;
2918 		pmask &= ~(1 << port_id);
2919 		viid = t4vf_alloc_vi(adapter, port_id);
2920 		if (viid < 0) {
2921 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2922 				" err=%d\n", port_id, viid);
2923 			err = viid;
2924 			goto err_free_dev;
2925 		}
2926 
2927 		/*
2928 		 * Allocate our network device and stitch things together.
2929 		 */
2930 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2931 					   MAX_PORT_QSETS);
2932 		if (netdev == NULL) {
2933 			t4vf_free_vi(adapter, viid);
2934 			err = -ENOMEM;
2935 			goto err_free_dev;
2936 		}
2937 		adapter->port[pidx] = netdev;
2938 		SET_NETDEV_DEV(netdev, &pdev->dev);
2939 		pi = netdev_priv(netdev);
2940 		pi->adapter = adapter;
2941 		pi->pidx = pidx;
2942 		pi->port_id = port_id;
2943 		pi->viid = viid;
2944 
2945 		/*
2946 		 * Initialize the starting state of our "port" and register
2947 		 * it.
2948 		 */
2949 		pi->xact_addr_filt = -1;
2950 		netif_carrier_off(netdev);
2951 		netdev->irq = pdev->irq;
2952 
2953 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2954 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2955 			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2956 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2957 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2958 			NETIF_F_HIGHDMA;
2959 		netdev->features = netdev->hw_features |
2960 				   NETIF_F_HW_VLAN_CTAG_TX;
2961 		if (pci_using_dac)
2962 			netdev->features |= NETIF_F_HIGHDMA;
2963 
2964 		netdev->priv_flags |= IFF_UNICAST_FLT;
2965 
2966 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2967 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2968 
2969 		/*
2970 		 * Initialize the hardware/software state for the port.
2971 		 */
2972 		err = t4vf_port_init(adapter, pidx);
2973 		if (err) {
2974 			dev_err(&pdev->dev, "cannot initialize port %d\n",
2975 				pidx);
2976 			goto err_free_dev;
2977 		}
2978 	}
2979 
2980 	/* See what interrupts we'll be using.  If we've been configured to
2981 	 * use MSI-X interrupts, try to enable them but fall back to using
2982 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2983 	 * get MSI interrupts we bail with the error.
2984 	 */
2985 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2986 		adapter->flags |= USING_MSIX;
2987 	else {
2988 		if (msi == MSI_MSIX) {
2989 			dev_info(adapter->pdev_dev,
2990 				 "Unable to use MSI-X Interrupts; falling "
2991 				 "back to MSI Interrupts\n");
2992 
2993 			/* We're going to need a Forwarded Interrupt Queue so
2994 			 * that may cut into how many Queue Sets we can
2995 			 * support.
2996 			 */
2997 			msi = MSI_MSI;
2998 			size_nports_qsets(adapter);
2999 		}
3000 		err = pci_enable_msi(pdev);
3001 		if (err) {
3002 			dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3003 				" err=%d\n", err);
3004 			goto err_free_dev;
3005 		}
3006 		adapter->flags |= USING_MSI;
3007 	}
3008 
3009 	/* Now that we know how many "ports" we have and what interrupt
3010 	 * mechanism we're going to use, we can configure our queue resources.
3011 	 */
3012 	cfg_queues(adapter);
3013 
3014 	/*
3015 	 * The "card" is now ready to go.  If any errors occur during device
3016 	 * registration we do not fail the whole "card" but rather proceed
3017 	 * only with the ports we manage to register successfully.  However we
3018 	 * must register at least one net device.
3019 	 */
3020 	for_each_port(adapter, pidx) {
3021 		struct port_info *pi = netdev_priv(adapter->port[pidx]);
3022 		netdev = adapter->port[pidx];
3023 		if (netdev == NULL)
3024 			continue;
3025 
3026 		netif_set_real_num_tx_queues(netdev, pi->nqsets);
3027 		netif_set_real_num_rx_queues(netdev, pi->nqsets);
3028 
3029 		err = register_netdev(netdev);
3030 		if (err) {
3031 			dev_warn(&pdev->dev, "cannot register net device %s,"
3032 				 " skipping\n", netdev->name);
3033 			continue;
3034 		}
3035 
3036 		set_bit(pidx, &adapter->registered_device_map);
3037 	}
3038 	if (adapter->registered_device_map == 0) {
3039 		dev_err(&pdev->dev, "could not register any net devices\n");
3040 		goto err_disable_interrupts;
3041 	}
3042 
3043 	/*
3044 	 * Set up our debugfs entries.
3045 	 */
3046 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3047 		adapter->debugfs_root =
3048 			debugfs_create_dir(pci_name(pdev),
3049 					   cxgb4vf_debugfs_root);
3050 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
3051 			dev_warn(&pdev->dev, "could not create debugfs"
3052 				 " directory");
3053 		else
3054 			setup_debugfs(adapter);
3055 	}
3056 
3057 	/*
3058 	 * Print a short notice on the existence and configuration of the new
3059 	 * VF network device ...
3060 	 */
3061 	for_each_port(adapter, pidx) {
3062 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3063 			 adapter->port[pidx]->name,
3064 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
3065 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
3066 	}
3067 
3068 	/*
3069 	 * Return success!
3070 	 */
3071 	return 0;
3072 
3073 	/*
3074 	 * Error recovery and exit code.  Unwind state that's been created
3075 	 * so far and return the error.
3076 	 */
3077 err_disable_interrupts:
3078 	if (adapter->flags & USING_MSIX) {
3079 		pci_disable_msix(adapter->pdev);
3080 		adapter->flags &= ~USING_MSIX;
3081 	} else if (adapter->flags & USING_MSI) {
3082 		pci_disable_msi(adapter->pdev);
3083 		adapter->flags &= ~USING_MSI;
3084 	}
3085 
3086 err_free_dev:
3087 	for_each_port(adapter, pidx) {
3088 		netdev = adapter->port[pidx];
3089 		if (netdev == NULL)
3090 			continue;
3091 		pi = netdev_priv(netdev);
3092 		t4vf_free_vi(adapter, pi->viid);
3093 		if (test_bit(pidx, &adapter->registered_device_map))
3094 			unregister_netdev(netdev);
3095 		free_netdev(netdev);
3096 	}
3097 
3098 err_unmap_bar:
3099 	if (!is_t4(adapter->params.chip))
3100 		iounmap(adapter->bar2);
3101 
3102 err_unmap_bar0:
3103 	iounmap(adapter->regs);
3104 
3105 err_free_adapter:
3106 	kfree(adapter->mbox_log);
3107 	kfree(adapter);
3108 
3109 err_release_regions:
3110 	pci_release_regions(pdev);
3111 	pci_clear_master(pdev);
3112 
3113 err_disable_device:
3114 	pci_disable_device(pdev);
3115 
3116 	return err;
3117 }
3118 
3119 /*
3120  * "Remove" a device: tear down all kernel and driver state created in the
3121  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3122  * that this is called "remove_one" in the PF Driver.)
3123  */
3124 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3125 {
3126 	struct adapter *adapter = pci_get_drvdata(pdev);
3127 
3128 	/*
3129 	 * Tear down driver state associated with device.
3130 	 */
3131 	if (adapter) {
3132 		int pidx;
3133 
3134 		/*
3135 		 * Stop all of our activity.  Unregister network port,
3136 		 * disable interrupts, etc.
3137 		 */
3138 		for_each_port(adapter, pidx)
3139 			if (test_bit(pidx, &adapter->registered_device_map))
3140 				unregister_netdev(adapter->port[pidx]);
3141 		t4vf_sge_stop(adapter);
3142 		if (adapter->flags & USING_MSIX) {
3143 			pci_disable_msix(adapter->pdev);
3144 			adapter->flags &= ~USING_MSIX;
3145 		} else if (adapter->flags & USING_MSI) {
3146 			pci_disable_msi(adapter->pdev);
3147 			adapter->flags &= ~USING_MSI;
3148 		}
3149 
3150 		/*
3151 		 * Tear down our debugfs entries.
3152 		 */
3153 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3154 			cleanup_debugfs(adapter);
3155 			debugfs_remove_recursive(adapter->debugfs_root);
3156 		}
3157 
3158 		/*
3159 		 * Free all of the various resources which we've acquired ...
3160 		 */
3161 		t4vf_free_sge_resources(adapter);
3162 		for_each_port(adapter, pidx) {
3163 			struct net_device *netdev = adapter->port[pidx];
3164 			struct port_info *pi;
3165 
3166 			if (netdev == NULL)
3167 				continue;
3168 
3169 			pi = netdev_priv(netdev);
3170 			t4vf_free_vi(adapter, pi->viid);
3171 			free_netdev(netdev);
3172 		}
3173 		iounmap(adapter->regs);
3174 		if (!is_t4(adapter->params.chip))
3175 			iounmap(adapter->bar2);
3176 		kfree(adapter->mbox_log);
3177 		kfree(adapter);
3178 	}
3179 
3180 	/*
3181 	 * Disable the device and release its PCI resources.
3182 	 */
3183 	pci_disable_device(pdev);
3184 	pci_clear_master(pdev);
3185 	pci_release_regions(pdev);
3186 }
3187 
3188 /*
3189  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3190  * delivery.
3191  */
3192 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3193 {
3194 	struct adapter *adapter;
3195 	int pidx;
3196 
3197 	adapter = pci_get_drvdata(pdev);
3198 	if (!adapter)
3199 		return;
3200 
3201 	/* Disable all Virtual Interfaces.  This will shut down the
3202 	 * delivery of all ingress packets into the chip for these
3203 	 * Virtual Interfaces.
3204 	 */
3205 	for_each_port(adapter, pidx)
3206 		if (test_bit(pidx, &adapter->registered_device_map))
3207 			unregister_netdev(adapter->port[pidx]);
3208 
3209 	/* Free up all Queues which will prevent further DMA and
3210 	 * Interrupts allowing various internal pathways to drain.
3211 	 */
3212 	t4vf_sge_stop(adapter);
3213 	if (adapter->flags & USING_MSIX) {
3214 		pci_disable_msix(adapter->pdev);
3215 		adapter->flags &= ~USING_MSIX;
3216 	} else if (adapter->flags & USING_MSI) {
3217 		pci_disable_msi(adapter->pdev);
3218 		adapter->flags &= ~USING_MSI;
3219 	}
3220 
3221 	/*
3222 	 * Free up all Queues which will prevent further DMA and
3223 	 * Interrupts allowing various internal pathways to drain.
3224 	 */
3225 	t4vf_free_sge_resources(adapter);
3226 	pci_set_drvdata(pdev, NULL);
3227 }
3228 
3229 /* Macros needed to support the PCI Device ID Table ...
3230  */
3231 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3232 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3233 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3234 
3235 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3236 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3237 
3238 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3239 
3240 #include "../cxgb4/t4_pci_id_tbl.h"
3241 
3242 MODULE_DESCRIPTION(DRV_DESC);
3243 MODULE_AUTHOR("Chelsio Communications");
3244 MODULE_LICENSE("Dual BSD/GPL");
3245 MODULE_VERSION(DRV_VERSION);
3246 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3247 
3248 static struct pci_driver cxgb4vf_driver = {
3249 	.name		= KBUILD_MODNAME,
3250 	.id_table	= cxgb4vf_pci_tbl,
3251 	.probe		= cxgb4vf_pci_probe,
3252 	.remove		= cxgb4vf_pci_remove,
3253 	.shutdown	= cxgb4vf_pci_shutdown,
3254 };
3255 
3256 /*
3257  * Initialize global driver state.
3258  */
3259 static int __init cxgb4vf_module_init(void)
3260 {
3261 	int ret;
3262 
3263 	/*
3264 	 * Vet our module parameters.
3265 	 */
3266 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3267 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3268 			msi, MSI_MSIX, MSI_MSI);
3269 		return -EINVAL;
3270 	}
3271 
3272 	/* Debugfs support is optional, just warn if this fails */
3273 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3274 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3275 		pr_warn("could not create debugfs entry, continuing\n");
3276 
3277 	ret = pci_register_driver(&cxgb4vf_driver);
3278 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3279 		debugfs_remove(cxgb4vf_debugfs_root);
3280 	return ret;
3281 }
3282 
3283 /*
3284  * Tear down global driver state.
3285  */
3286 static void __exit cxgb4vf_module_exit(void)
3287 {
3288 	pci_unregister_driver(&cxgb4vf_driver);
3289 	debugfs_remove(cxgb4vf_debugfs_root);
3290 }
3291 
3292 module_init(cxgb4vf_module_init);
3293 module_exit(cxgb4vf_module_exit);
3294