xref: /linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision ca55b2fef3a9373fcfc30f82fd26bc7fccbda732)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_VERSION "2.0.0-ko"
59 #define DRV_DESC "Chelsio T4/T5 Virtual Function (VF) Network Driver"
60 
61 /*
62  * Module Parameters.
63  * ==================
64  */
65 
66 /*
67  * Default ethtool "message level" for adapters.
68  */
69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72 
73 static int dflt_msg_enable = DFLT_MSG_ENABLE;
74 
75 module_param(dflt_msg_enable, int, 0644);
76 MODULE_PARM_DESC(dflt_msg_enable,
77 		 "default adapter ethtool message level bitmap");
78 
79 /*
80  * The driver uses the best interrupt scheme available on a platform in the
81  * order MSI-X then MSI.  This parameter determines which of these schemes the
82  * driver may consider as follows:
83  *
84  *     msi = 2: choose from among MSI-X and MSI
85  *     msi = 1: only consider MSI interrupts
86  *
87  * Note that unlike the Physical Function driver, this Virtual Function driver
88  * does _not_ support legacy INTx interrupts (this limitation is mandated by
89  * the PCI-E SR-IOV standard).
90  */
91 #define MSI_MSIX	2
92 #define MSI_MSI		1
93 #define MSI_DEFAULT	MSI_MSIX
94 
95 static int msi = MSI_DEFAULT;
96 
97 module_param(msi, int, 0644);
98 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
99 
100 /*
101  * Fundamental constants.
102  * ======================
103  */
104 
105 enum {
106 	MAX_TXQ_ENTRIES		= 16384,
107 	MAX_RSPQ_ENTRIES	= 16384,
108 	MAX_RX_BUFFERS		= 16384,
109 
110 	MIN_TXQ_ENTRIES		= 32,
111 	MIN_RSPQ_ENTRIES	= 128,
112 	MIN_FL_ENTRIES		= 16,
113 
114 	/*
115 	 * For purposes of manipulating the Free List size we need to
116 	 * recognize that Free Lists are actually Egress Queues (the host
117 	 * produces free buffers which the hardware consumes), Egress Queues
118 	 * indices are all in units of Egress Context Units bytes, and free
119 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
120 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
121 	 * always have at least one Egress Unit's worth of Free List entries
122 	 * unused.  See sge.c for more details ...
123 	 */
124 	EQ_UNIT = SGE_EQ_IDXSIZE,
125 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
126 	MIN_FL_RESID = FL_PER_EQ_UNIT,
127 };
128 
129 /*
130  * Global driver state.
131  * ====================
132  */
133 
134 static struct dentry *cxgb4vf_debugfs_root;
135 
136 /*
137  * OS "Callback" functions.
138  * ========================
139  */
140 
141 /*
142  * The link status has changed on the indicated "port" (Virtual Interface).
143  */
144 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
145 {
146 	struct net_device *dev = adapter->port[pidx];
147 
148 	/*
149 	 * If the port is disabled or the current recorded "link up"
150 	 * status matches the new status, just return.
151 	 */
152 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
153 		return;
154 
155 	/*
156 	 * Tell the OS that the link status has changed and print a short
157 	 * informative message on the console about the event.
158 	 */
159 	if (link_ok) {
160 		const char *s;
161 		const char *fc;
162 		const struct port_info *pi = netdev_priv(dev);
163 
164 		netif_carrier_on(dev);
165 
166 		switch (pi->link_cfg.speed) {
167 		case 40000:
168 			s = "40Gbps";
169 			break;
170 
171 		case 10000:
172 			s = "10Gbps";
173 			break;
174 
175 		case 1000:
176 			s = "1000Mbps";
177 			break;
178 
179 		case 100:
180 			s = "100Mbps";
181 			break;
182 
183 		default:
184 			s = "unknown";
185 			break;
186 		}
187 
188 		switch (pi->link_cfg.fc) {
189 		case PAUSE_RX:
190 			fc = "RX";
191 			break;
192 
193 		case PAUSE_TX:
194 			fc = "TX";
195 			break;
196 
197 		case PAUSE_RX|PAUSE_TX:
198 			fc = "RX/TX";
199 			break;
200 
201 		default:
202 			fc = "no";
203 			break;
204 		}
205 
206 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
207 	} else {
208 		netif_carrier_off(dev);
209 		netdev_info(dev, "link down\n");
210 	}
211 }
212 
213 /*
214  * THe port module type has changed on the indicated "port" (Virtual
215  * Interface).
216  */
217 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
218 {
219 	static const char * const mod_str[] = {
220 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
221 	};
222 	const struct net_device *dev = adapter->port[pidx];
223 	const struct port_info *pi = netdev_priv(dev);
224 
225 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
226 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
227 			 dev->name);
228 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
229 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
230 			 dev->name, mod_str[pi->mod_type]);
231 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
232 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
233 			 "module inserted\n", dev->name);
234 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
235 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
236 			 "forcing TWINAX\n", dev->name);
237 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
238 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
239 			 dev->name);
240 	else
241 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
242 			 "inserted\n", dev->name, pi->mod_type);
243 }
244 
245 /*
246  * Net device operations.
247  * ======================
248  */
249 
250 
251 
252 
253 /*
254  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
255  * Interface).
256  */
257 static int link_start(struct net_device *dev)
258 {
259 	int ret;
260 	struct port_info *pi = netdev_priv(dev);
261 
262 	/*
263 	 * We do not set address filters and promiscuity here, the stack does
264 	 * that step explicitly. Enable vlan accel.
265 	 */
266 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
267 			      true);
268 	if (ret == 0) {
269 		ret = t4vf_change_mac(pi->adapter, pi->viid,
270 				      pi->xact_addr_filt, dev->dev_addr, true);
271 		if (ret >= 0) {
272 			pi->xact_addr_filt = ret;
273 			ret = 0;
274 		}
275 	}
276 
277 	/*
278 	 * We don't need to actually "start the link" itself since the
279 	 * firmware will do that for us when the first Virtual Interface
280 	 * is enabled on a port.
281 	 */
282 	if (ret == 0)
283 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
284 	return ret;
285 }
286 
287 /*
288  * Name the MSI-X interrupts.
289  */
290 static void name_msix_vecs(struct adapter *adapter)
291 {
292 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
293 	int pidx;
294 
295 	/*
296 	 * Firmware events.
297 	 */
298 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
299 		 "%s-FWeventq", adapter->name);
300 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
301 
302 	/*
303 	 * Ethernet queues.
304 	 */
305 	for_each_port(adapter, pidx) {
306 		struct net_device *dev = adapter->port[pidx];
307 		const struct port_info *pi = netdev_priv(dev);
308 		int qs, msi;
309 
310 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
311 			snprintf(adapter->msix_info[msi].desc, namelen,
312 				 "%s-%d", dev->name, qs);
313 			adapter->msix_info[msi].desc[namelen] = 0;
314 		}
315 	}
316 }
317 
318 /*
319  * Request all of our MSI-X resources.
320  */
321 static int request_msix_queue_irqs(struct adapter *adapter)
322 {
323 	struct sge *s = &adapter->sge;
324 	int rxq, msi, err;
325 
326 	/*
327 	 * Firmware events.
328 	 */
329 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
330 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
331 	if (err)
332 		return err;
333 
334 	/*
335 	 * Ethernet queues.
336 	 */
337 	msi = MSIX_IQFLINT;
338 	for_each_ethrxq(s, rxq) {
339 		err = request_irq(adapter->msix_info[msi].vec,
340 				  t4vf_sge_intr_msix, 0,
341 				  adapter->msix_info[msi].desc,
342 				  &s->ethrxq[rxq].rspq);
343 		if (err)
344 			goto err_free_irqs;
345 		msi++;
346 	}
347 	return 0;
348 
349 err_free_irqs:
350 	while (--rxq >= 0)
351 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
352 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
353 	return err;
354 }
355 
356 /*
357  * Free our MSI-X resources.
358  */
359 static void free_msix_queue_irqs(struct adapter *adapter)
360 {
361 	struct sge *s = &adapter->sge;
362 	int rxq, msi;
363 
364 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
365 	msi = MSIX_IQFLINT;
366 	for_each_ethrxq(s, rxq)
367 		free_irq(adapter->msix_info[msi++].vec,
368 			 &s->ethrxq[rxq].rspq);
369 }
370 
371 /*
372  * Turn on NAPI and start up interrupts on a response queue.
373  */
374 static void qenable(struct sge_rspq *rspq)
375 {
376 	napi_enable(&rspq->napi);
377 
378 	/*
379 	 * 0-increment the Going To Sleep register to start the timer and
380 	 * enable interrupts.
381 	 */
382 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
383 		     CIDXINC_V(0) |
384 		     SEINTARM_V(rspq->intr_params) |
385 		     INGRESSQID_V(rspq->cntxt_id));
386 }
387 
388 /*
389  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
390  */
391 static void enable_rx(struct adapter *adapter)
392 {
393 	int rxq;
394 	struct sge *s = &adapter->sge;
395 
396 	for_each_ethrxq(s, rxq)
397 		qenable(&s->ethrxq[rxq].rspq);
398 	qenable(&s->fw_evtq);
399 
400 	/*
401 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
402 	 * its Going To Sleep register here to get it started.
403 	 */
404 	if (adapter->flags & USING_MSI)
405 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
406 			     CIDXINC_V(0) |
407 			     SEINTARM_V(s->intrq.intr_params) |
408 			     INGRESSQID_V(s->intrq.cntxt_id));
409 
410 }
411 
412 /*
413  * Wait until all NAPI handlers are descheduled.
414  */
415 static void quiesce_rx(struct adapter *adapter)
416 {
417 	struct sge *s = &adapter->sge;
418 	int rxq;
419 
420 	for_each_ethrxq(s, rxq)
421 		napi_disable(&s->ethrxq[rxq].rspq.napi);
422 	napi_disable(&s->fw_evtq.napi);
423 }
424 
425 /*
426  * Response queue handler for the firmware event queue.
427  */
428 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
429 			  const struct pkt_gl *gl)
430 {
431 	/*
432 	 * Extract response opcode and get pointer to CPL message body.
433 	 */
434 	struct adapter *adapter = rspq->adapter;
435 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
436 	void *cpl = (void *)(rsp + 1);
437 
438 	switch (opcode) {
439 	case CPL_FW6_MSG: {
440 		/*
441 		 * We've received an asynchronous message from the firmware.
442 		 */
443 		const struct cpl_fw6_msg *fw_msg = cpl;
444 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
445 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
446 		break;
447 	}
448 
449 	case CPL_FW4_MSG: {
450 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
451 		 */
452 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
453 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
454 		if (opcode != CPL_SGE_EGR_UPDATE) {
455 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
456 				, opcode);
457 			break;
458 		}
459 		cpl = (void *)p;
460 		/*FALLTHROUGH*/
461 	}
462 
463 	case CPL_SGE_EGR_UPDATE: {
464 		/*
465 		 * We've received an Egress Queue Status Update message.  We
466 		 * get these, if the SGE is configured to send these when the
467 		 * firmware passes certain points in processing our TX
468 		 * Ethernet Queue or if we make an explicit request for one.
469 		 * We use these updates to determine when we may need to
470 		 * restart a TX Ethernet Queue which was stopped for lack of
471 		 * free TX Queue Descriptors ...
472 		 */
473 		const struct cpl_sge_egr_update *p = cpl;
474 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
475 		struct sge *s = &adapter->sge;
476 		struct sge_txq *tq;
477 		struct sge_eth_txq *txq;
478 		unsigned int eq_idx;
479 
480 		/*
481 		 * Perform sanity checking on the Queue ID to make sure it
482 		 * really refers to one of our TX Ethernet Egress Queues which
483 		 * is active and matches the queue's ID.  None of these error
484 		 * conditions should ever happen so we may want to either make
485 		 * them fatal and/or conditionalized under DEBUG.
486 		 */
487 		eq_idx = EQ_IDX(s, qid);
488 		if (unlikely(eq_idx >= MAX_EGRQ)) {
489 			dev_err(adapter->pdev_dev,
490 				"Egress Update QID %d out of range\n", qid);
491 			break;
492 		}
493 		tq = s->egr_map[eq_idx];
494 		if (unlikely(tq == NULL)) {
495 			dev_err(adapter->pdev_dev,
496 				"Egress Update QID %d TXQ=NULL\n", qid);
497 			break;
498 		}
499 		txq = container_of(tq, struct sge_eth_txq, q);
500 		if (unlikely(tq->abs_id != qid)) {
501 			dev_err(adapter->pdev_dev,
502 				"Egress Update QID %d refers to TXQ %d\n",
503 				qid, tq->abs_id);
504 			break;
505 		}
506 
507 		/*
508 		 * Restart a stopped TX Queue which has less than half of its
509 		 * TX ring in use ...
510 		 */
511 		txq->q.restarts++;
512 		netif_tx_wake_queue(txq->txq);
513 		break;
514 	}
515 
516 	default:
517 		dev_err(adapter->pdev_dev,
518 			"unexpected CPL %#x on FW event queue\n", opcode);
519 	}
520 
521 	return 0;
522 }
523 
524 /*
525  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
526  * to use and initializes them.  We support multiple "Queue Sets" per port if
527  * we have MSI-X, otherwise just one queue set per port.
528  */
529 static int setup_sge_queues(struct adapter *adapter)
530 {
531 	struct sge *s = &adapter->sge;
532 	int err, pidx, msix;
533 
534 	/*
535 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
536 	 * state.
537 	 */
538 	bitmap_zero(s->starving_fl, MAX_EGRQ);
539 
540 	/*
541 	 * If we're using MSI interrupt mode we need to set up a "forwarded
542 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
543 	 * of the ingress queues will be set up to forward their interrupts to
544 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
545 	 * the intrq's queue ID as the interrupt forwarding queue for the
546 	 * subsequent calls ...
547 	 */
548 	if (adapter->flags & USING_MSI) {
549 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
550 					 adapter->port[0], 0, NULL, NULL);
551 		if (err)
552 			goto err_free_queues;
553 	}
554 
555 	/*
556 	 * Allocate our ingress queue for asynchronous firmware messages.
557 	 */
558 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
559 				 MSIX_FW, NULL, fwevtq_handler);
560 	if (err)
561 		goto err_free_queues;
562 
563 	/*
564 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
565 	 * later on ... up to the point where any interface on the adapter is
566 	 * brought up at which point lots of things get nailed down
567 	 * permanently ...
568 	 */
569 	msix = MSIX_IQFLINT;
570 	for_each_port(adapter, pidx) {
571 		struct net_device *dev = adapter->port[pidx];
572 		struct port_info *pi = netdev_priv(dev);
573 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
574 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
575 		int qs;
576 
577 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
578 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
579 						 dev, msix++,
580 						 &rxq->fl, t4vf_ethrx_handler);
581 			if (err)
582 				goto err_free_queues;
583 
584 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
585 					     netdev_get_tx_queue(dev, qs),
586 					     s->fw_evtq.cntxt_id);
587 			if (err)
588 				goto err_free_queues;
589 
590 			rxq->rspq.idx = qs;
591 			memset(&rxq->stats, 0, sizeof(rxq->stats));
592 		}
593 	}
594 
595 	/*
596 	 * Create the reverse mappings for the queues.
597 	 */
598 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
599 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
600 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
601 	for_each_port(adapter, pidx) {
602 		struct net_device *dev = adapter->port[pidx];
603 		struct port_info *pi = netdev_priv(dev);
604 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
605 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
606 		int qs;
607 
608 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
609 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
610 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
611 
612 			/*
613 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
614 			 * for Free Lists but since all of the Egress Queues
615 			 * (including Free Lists) have Relative Queue IDs
616 			 * which are computed as Absolute - Base Queue ID, we
617 			 * can synthesize the Absolute Queue IDs for the Free
618 			 * Lists.  This is useful for debugging purposes when
619 			 * we want to dump Queue Contexts via the PF Driver.
620 			 */
621 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
622 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
623 		}
624 	}
625 	return 0;
626 
627 err_free_queues:
628 	t4vf_free_sge_resources(adapter);
629 	return err;
630 }
631 
632 /*
633  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
634  * queues.  We configure the RSS CPU lookup table to distribute to the number
635  * of HW receive queues, and the response queue lookup table to narrow that
636  * down to the response queues actually configured for each "port" (Virtual
637  * Interface).  We always configure the RSS mapping for all ports since the
638  * mapping table has plenty of entries.
639  */
640 static int setup_rss(struct adapter *adapter)
641 {
642 	int pidx;
643 
644 	for_each_port(adapter, pidx) {
645 		struct port_info *pi = adap2pinfo(adapter, pidx);
646 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
647 		u16 rss[MAX_PORT_QSETS];
648 		int qs, err;
649 
650 		for (qs = 0; qs < pi->nqsets; qs++)
651 			rss[qs] = rxq[qs].rspq.abs_id;
652 
653 		err = t4vf_config_rss_range(adapter, pi->viid,
654 					    0, pi->rss_size, rss, pi->nqsets);
655 		if (err)
656 			return err;
657 
658 		/*
659 		 * Perform Global RSS Mode-specific initialization.
660 		 */
661 		switch (adapter->params.rss.mode) {
662 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
663 			/*
664 			 * If Tunnel All Lookup isn't specified in the global
665 			 * RSS Configuration, then we need to specify a
666 			 * default Ingress Queue for any ingress packets which
667 			 * aren't hashed.  We'll use our first ingress queue
668 			 * ...
669 			 */
670 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
671 				union rss_vi_config config;
672 				err = t4vf_read_rss_vi_config(adapter,
673 							      pi->viid,
674 							      &config);
675 				if (err)
676 					return err;
677 				config.basicvirtual.defaultq =
678 					rxq[0].rspq.abs_id;
679 				err = t4vf_write_rss_vi_config(adapter,
680 							       pi->viid,
681 							       &config);
682 				if (err)
683 					return err;
684 			}
685 			break;
686 		}
687 	}
688 
689 	return 0;
690 }
691 
692 /*
693  * Bring the adapter up.  Called whenever we go from no "ports" open to having
694  * one open.  This function performs the actions necessary to make an adapter
695  * operational, such as completing the initialization of HW modules, and
696  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
697  * this is called "cxgb_up" in the PF Driver.)
698  */
699 static int adapter_up(struct adapter *adapter)
700 {
701 	int err;
702 
703 	/*
704 	 * If this is the first time we've been called, perform basic
705 	 * adapter setup.  Once we've done this, many of our adapter
706 	 * parameters can no longer be changed ...
707 	 */
708 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
709 		err = setup_sge_queues(adapter);
710 		if (err)
711 			return err;
712 		err = setup_rss(adapter);
713 		if (err) {
714 			t4vf_free_sge_resources(adapter);
715 			return err;
716 		}
717 
718 		if (adapter->flags & USING_MSIX)
719 			name_msix_vecs(adapter);
720 		adapter->flags |= FULL_INIT_DONE;
721 	}
722 
723 	/*
724 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
725 	 */
726 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
727 	if (adapter->flags & USING_MSIX)
728 		err = request_msix_queue_irqs(adapter);
729 	else
730 		err = request_irq(adapter->pdev->irq,
731 				  t4vf_intr_handler(adapter), 0,
732 				  adapter->name, adapter);
733 	if (err) {
734 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
735 			err);
736 		return err;
737 	}
738 
739 	/*
740 	 * Enable NAPI ingress processing and return success.
741 	 */
742 	enable_rx(adapter);
743 	t4vf_sge_start(adapter);
744 	return 0;
745 }
746 
747 /*
748  * Bring the adapter down.  Called whenever the last "port" (Virtual
749  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
750  * Driver.)
751  */
752 static void adapter_down(struct adapter *adapter)
753 {
754 	/*
755 	 * Free interrupt resources.
756 	 */
757 	if (adapter->flags & USING_MSIX)
758 		free_msix_queue_irqs(adapter);
759 	else
760 		free_irq(adapter->pdev->irq, adapter);
761 
762 	/*
763 	 * Wait for NAPI handlers to finish.
764 	 */
765 	quiesce_rx(adapter);
766 }
767 
768 /*
769  * Start up a net device.
770  */
771 static int cxgb4vf_open(struct net_device *dev)
772 {
773 	int err;
774 	struct port_info *pi = netdev_priv(dev);
775 	struct adapter *adapter = pi->adapter;
776 
777 	/*
778 	 * If this is the first interface that we're opening on the "adapter",
779 	 * bring the "adapter" up now.
780 	 */
781 	if (adapter->open_device_map == 0) {
782 		err = adapter_up(adapter);
783 		if (err)
784 			return err;
785 	}
786 
787 	/*
788 	 * Note that this interface is up and start everything up ...
789 	 */
790 	netif_set_real_num_tx_queues(dev, pi->nqsets);
791 	err = netif_set_real_num_rx_queues(dev, pi->nqsets);
792 	if (err)
793 		goto err_unwind;
794 	err = link_start(dev);
795 	if (err)
796 		goto err_unwind;
797 
798 	netif_tx_start_all_queues(dev);
799 	set_bit(pi->port_id, &adapter->open_device_map);
800 	return 0;
801 
802 err_unwind:
803 	if (adapter->open_device_map == 0)
804 		adapter_down(adapter);
805 	return err;
806 }
807 
808 /*
809  * Shut down a net device.  This routine is called "cxgb_close" in the PF
810  * Driver ...
811  */
812 static int cxgb4vf_stop(struct net_device *dev)
813 {
814 	struct port_info *pi = netdev_priv(dev);
815 	struct adapter *adapter = pi->adapter;
816 
817 	netif_tx_stop_all_queues(dev);
818 	netif_carrier_off(dev);
819 	t4vf_enable_vi(adapter, pi->viid, false, false);
820 	pi->link_cfg.link_ok = 0;
821 
822 	clear_bit(pi->port_id, &adapter->open_device_map);
823 	if (adapter->open_device_map == 0)
824 		adapter_down(adapter);
825 	return 0;
826 }
827 
828 /*
829  * Translate our basic statistics into the standard "ifconfig" statistics.
830  */
831 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832 {
833 	struct t4vf_port_stats stats;
834 	struct port_info *pi = netdev2pinfo(dev);
835 	struct adapter *adapter = pi->adapter;
836 	struct net_device_stats *ns = &dev->stats;
837 	int err;
838 
839 	spin_lock(&adapter->stats_lock);
840 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841 	spin_unlock(&adapter->stats_lock);
842 
843 	memset(ns, 0, sizeof(*ns));
844 	if (err)
845 		return ns;
846 
847 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850 			  stats.tx_ucast_frames + stats.tx_offload_frames);
851 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852 			stats.rx_ucast_bytes);
853 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854 			  stats.rx_ucast_frames);
855 	ns->multicast = stats.rx_mcast_frames;
856 	ns->tx_errors = stats.tx_drop_frames;
857 	ns->rx_errors = stats.rx_err_frames;
858 
859 	return ns;
860 }
861 
862 /*
863  * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
864  * at a specified offset within the list, into an array of addrss pointers and
865  * return the number collected.
866  */
867 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
868 							const u8 **addr,
869 							unsigned int offset,
870 							unsigned int maxaddrs)
871 {
872 	unsigned int index = 0;
873 	unsigned int naddr = 0;
874 	const struct netdev_hw_addr *ha;
875 
876 	for_each_dev_addr(dev, ha)
877 		if (index++ >= offset) {
878 			addr[naddr++] = ha->addr;
879 			if (naddr >= maxaddrs)
880 				break;
881 		}
882 	return naddr;
883 }
884 
885 /*
886  * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
887  * at a specified offset within the list, into an array of addrss pointers and
888  * return the number collected.
889  */
890 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
891 							const u8 **addr,
892 							unsigned int offset,
893 							unsigned int maxaddrs)
894 {
895 	unsigned int index = 0;
896 	unsigned int naddr = 0;
897 	const struct netdev_hw_addr *ha;
898 
899 	netdev_for_each_mc_addr(ha, dev)
900 		if (index++ >= offset) {
901 			addr[naddr++] = ha->addr;
902 			if (naddr >= maxaddrs)
903 				break;
904 		}
905 	return naddr;
906 }
907 
908 /*
909  * Configure the exact and hash address filters to handle a port's multicast
910  * and secondary unicast MAC addresses.
911  */
912 static int set_addr_filters(const struct net_device *dev, bool sleep)
913 {
914 	u64 mhash = 0;
915 	u64 uhash = 0;
916 	bool free = true;
917 	unsigned int offset, naddr;
918 	const u8 *addr[7];
919 	int ret;
920 	const struct port_info *pi = netdev_priv(dev);
921 
922 	/* first do the secondary unicast addresses */
923 	for (offset = 0; ; offset += naddr) {
924 		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
925 						     ARRAY_SIZE(addr));
926 		if (naddr == 0)
927 			break;
928 
929 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
930 					  naddr, addr, NULL, &uhash, sleep);
931 		if (ret < 0)
932 			return ret;
933 
934 		free = false;
935 	}
936 
937 	/* next set up the multicast addresses */
938 	for (offset = 0; ; offset += naddr) {
939 		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
940 						     ARRAY_SIZE(addr));
941 		if (naddr == 0)
942 			break;
943 
944 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
945 					  naddr, addr, NULL, &mhash, sleep);
946 		if (ret < 0)
947 			return ret;
948 		free = false;
949 	}
950 
951 	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
952 				  uhash | mhash, sleep);
953 }
954 
955 /*
956  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
957  * If @mtu is -1 it is left unchanged.
958  */
959 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
960 {
961 	int ret;
962 	struct port_info *pi = netdev_priv(dev);
963 
964 	ret = set_addr_filters(dev, sleep_ok);
965 	if (ret == 0)
966 		ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
967 				      (dev->flags & IFF_PROMISC) != 0,
968 				      (dev->flags & IFF_ALLMULTI) != 0,
969 				      1, -1, sleep_ok);
970 	return ret;
971 }
972 
973 /*
974  * Set the current receive modes on the device.
975  */
976 static void cxgb4vf_set_rxmode(struct net_device *dev)
977 {
978 	/* unfortunately we can't return errors to the stack */
979 	set_rxmode(dev, -1, false);
980 }
981 
982 /*
983  * Find the entry in the interrupt holdoff timer value array which comes
984  * closest to the specified interrupt holdoff value.
985  */
986 static int closest_timer(const struct sge *s, int us)
987 {
988 	int i, timer_idx = 0, min_delta = INT_MAX;
989 
990 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
991 		int delta = us - s->timer_val[i];
992 		if (delta < 0)
993 			delta = -delta;
994 		if (delta < min_delta) {
995 			min_delta = delta;
996 			timer_idx = i;
997 		}
998 	}
999 	return timer_idx;
1000 }
1001 
1002 static int closest_thres(const struct sge *s, int thres)
1003 {
1004 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1005 
1006 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1007 		delta = thres - s->counter_val[i];
1008 		if (delta < 0)
1009 			delta = -delta;
1010 		if (delta < min_delta) {
1011 			min_delta = delta;
1012 			pktcnt_idx = i;
1013 		}
1014 	}
1015 	return pktcnt_idx;
1016 }
1017 
1018 /*
1019  * Return a queue's interrupt hold-off time in us.  0 means no timer.
1020  */
1021 static unsigned int qtimer_val(const struct adapter *adapter,
1022 			       const struct sge_rspq *rspq)
1023 {
1024 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1025 
1026 	return timer_idx < SGE_NTIMERS
1027 		? adapter->sge.timer_val[timer_idx]
1028 		: 0;
1029 }
1030 
1031 /**
1032  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1033  *	@adapter: the adapter
1034  *	@rspq: the RX response queue
1035  *	@us: the hold-off time in us, or 0 to disable timer
1036  *	@cnt: the hold-off packet count, or 0 to disable counter
1037  *
1038  *	Sets an RX response queue's interrupt hold-off time and packet count.
1039  *	At least one of the two needs to be enabled for the queue to generate
1040  *	interrupts.
1041  */
1042 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1043 			       unsigned int us, unsigned int cnt)
1044 {
1045 	unsigned int timer_idx;
1046 
1047 	/*
1048 	 * If both the interrupt holdoff timer and count are specified as
1049 	 * zero, default to a holdoff count of 1 ...
1050 	 */
1051 	if ((us | cnt) == 0)
1052 		cnt = 1;
1053 
1054 	/*
1055 	 * If an interrupt holdoff count has been specified, then find the
1056 	 * closest configured holdoff count and use that.  If the response
1057 	 * queue has already been created, then update its queue context
1058 	 * parameters ...
1059 	 */
1060 	if (cnt) {
1061 		int err;
1062 		u32 v, pktcnt_idx;
1063 
1064 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1065 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1066 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1067 			    FW_PARAMS_PARAM_X_V(
1068 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1069 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1070 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1071 			if (err)
1072 				return err;
1073 		}
1074 		rspq->pktcnt_idx = pktcnt_idx;
1075 	}
1076 
1077 	/*
1078 	 * Compute the closest holdoff timer index from the supplied holdoff
1079 	 * timer value.
1080 	 */
1081 	timer_idx = (us == 0
1082 		     ? SGE_TIMER_RSTRT_CNTR
1083 		     : closest_timer(&adapter->sge, us));
1084 
1085 	/*
1086 	 * Update the response queue's interrupt coalescing parameters and
1087 	 * return success.
1088 	 */
1089 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1090 			     QINTR_CNT_EN_V(cnt > 0));
1091 	return 0;
1092 }
1093 
1094 /*
1095  * Return a version number to identify the type of adapter.  The scheme is:
1096  * - bits 0..9: chip version
1097  * - bits 10..15: chip revision
1098  */
1099 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1100 {
1101 	/*
1102 	 * Chip version 4, revision 0x3f (cxgb4vf).
1103 	 */
1104 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1105 }
1106 
1107 /*
1108  * Execute the specified ioctl command.
1109  */
1110 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1111 {
1112 	int ret = 0;
1113 
1114 	switch (cmd) {
1115 	    /*
1116 	     * The VF Driver doesn't have access to any of the other
1117 	     * common Ethernet device ioctl()'s (like reading/writing
1118 	     * PHY registers, etc.
1119 	     */
1120 
1121 	default:
1122 		ret = -EOPNOTSUPP;
1123 		break;
1124 	}
1125 	return ret;
1126 }
1127 
1128 /*
1129  * Change the device's MTU.
1130  */
1131 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1132 {
1133 	int ret;
1134 	struct port_info *pi = netdev_priv(dev);
1135 
1136 	/* accommodate SACK */
1137 	if (new_mtu < 81)
1138 		return -EINVAL;
1139 
1140 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1141 			      -1, -1, -1, -1, true);
1142 	if (!ret)
1143 		dev->mtu = new_mtu;
1144 	return ret;
1145 }
1146 
1147 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1148 	netdev_features_t features)
1149 {
1150 	/*
1151 	 * Since there is no support for separate rx/tx vlan accel
1152 	 * enable/disable make sure tx flag is always in same state as rx.
1153 	 */
1154 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1155 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1156 	else
1157 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1158 
1159 	return features;
1160 }
1161 
1162 static int cxgb4vf_set_features(struct net_device *dev,
1163 	netdev_features_t features)
1164 {
1165 	struct port_info *pi = netdev_priv(dev);
1166 	netdev_features_t changed = dev->features ^ features;
1167 
1168 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1169 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1170 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1171 
1172 	return 0;
1173 }
1174 
1175 /*
1176  * Change the devices MAC address.
1177  */
1178 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1179 {
1180 	int ret;
1181 	struct sockaddr *addr = _addr;
1182 	struct port_info *pi = netdev_priv(dev);
1183 
1184 	if (!is_valid_ether_addr(addr->sa_data))
1185 		return -EADDRNOTAVAIL;
1186 
1187 	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1188 			      addr->sa_data, true);
1189 	if (ret < 0)
1190 		return ret;
1191 
1192 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1193 	pi->xact_addr_filt = ret;
1194 	return 0;
1195 }
1196 
1197 #ifdef CONFIG_NET_POLL_CONTROLLER
1198 /*
1199  * Poll all of our receive queues.  This is called outside of normal interrupt
1200  * context.
1201  */
1202 static void cxgb4vf_poll_controller(struct net_device *dev)
1203 {
1204 	struct port_info *pi = netdev_priv(dev);
1205 	struct adapter *adapter = pi->adapter;
1206 
1207 	if (adapter->flags & USING_MSIX) {
1208 		struct sge_eth_rxq *rxq;
1209 		int nqsets;
1210 
1211 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1212 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1213 			t4vf_sge_intr_msix(0, &rxq->rspq);
1214 			rxq++;
1215 		}
1216 	} else
1217 		t4vf_intr_handler(adapter)(0, adapter);
1218 }
1219 #endif
1220 
1221 /*
1222  * Ethtool operations.
1223  * ===================
1224  *
1225  * Note that we don't support any ethtool operations which change the physical
1226  * state of the port to which we're linked.
1227  */
1228 
1229 static unsigned int t4vf_from_fw_linkcaps(enum fw_port_type type,
1230 					  unsigned int caps)
1231 {
1232 	unsigned int v = 0;
1233 
1234 	if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1235 	    type == FW_PORT_TYPE_BT_XAUI) {
1236 		v |= SUPPORTED_TP;
1237 		if (caps & FW_PORT_CAP_SPEED_100M)
1238 			v |= SUPPORTED_100baseT_Full;
1239 		if (caps & FW_PORT_CAP_SPEED_1G)
1240 			v |= SUPPORTED_1000baseT_Full;
1241 		if (caps & FW_PORT_CAP_SPEED_10G)
1242 			v |= SUPPORTED_10000baseT_Full;
1243 	} else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1244 		v |= SUPPORTED_Backplane;
1245 		if (caps & FW_PORT_CAP_SPEED_1G)
1246 			v |= SUPPORTED_1000baseKX_Full;
1247 		if (caps & FW_PORT_CAP_SPEED_10G)
1248 			v |= SUPPORTED_10000baseKX4_Full;
1249 	} else if (type == FW_PORT_TYPE_KR)
1250 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1251 	else if (type == FW_PORT_TYPE_BP_AP)
1252 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1253 		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full;
1254 	else if (type == FW_PORT_TYPE_BP4_AP)
1255 		v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC |
1256 		     SUPPORTED_10000baseKR_Full | SUPPORTED_1000baseKX_Full |
1257 		     SUPPORTED_10000baseKX4_Full;
1258 	else if (type == FW_PORT_TYPE_FIBER_XFI ||
1259 		 type == FW_PORT_TYPE_FIBER_XAUI ||
1260 		 type == FW_PORT_TYPE_SFP ||
1261 		 type == FW_PORT_TYPE_QSFP_10G ||
1262 		 type == FW_PORT_TYPE_QSA) {
1263 		v |= SUPPORTED_FIBRE;
1264 		if (caps & FW_PORT_CAP_SPEED_1G)
1265 			v |= SUPPORTED_1000baseT_Full;
1266 		if (caps & FW_PORT_CAP_SPEED_10G)
1267 			v |= SUPPORTED_10000baseT_Full;
1268 	} else if (type == FW_PORT_TYPE_BP40_BA ||
1269 		   type == FW_PORT_TYPE_QSFP) {
1270 		v |= SUPPORTED_40000baseSR4_Full;
1271 		v |= SUPPORTED_FIBRE;
1272 	}
1273 
1274 	if (caps & FW_PORT_CAP_ANEG)
1275 		v |= SUPPORTED_Autoneg;
1276 	return v;
1277 }
1278 
1279 static int cxgb4vf_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1280 {
1281 	const struct port_info *p = netdev_priv(dev);
1282 
1283 	if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1284 	    p->port_type == FW_PORT_TYPE_BT_XFI ||
1285 	    p->port_type == FW_PORT_TYPE_BT_XAUI)
1286 		cmd->port = PORT_TP;
1287 	else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1288 		 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1289 		cmd->port = PORT_FIBRE;
1290 	else if (p->port_type == FW_PORT_TYPE_SFP ||
1291 		 p->port_type == FW_PORT_TYPE_QSFP_10G ||
1292 		 p->port_type == FW_PORT_TYPE_QSA ||
1293 		 p->port_type == FW_PORT_TYPE_QSFP) {
1294 		if (p->mod_type == FW_PORT_MOD_TYPE_LR ||
1295 		    p->mod_type == FW_PORT_MOD_TYPE_SR ||
1296 		    p->mod_type == FW_PORT_MOD_TYPE_ER ||
1297 		    p->mod_type == FW_PORT_MOD_TYPE_LRM)
1298 			cmd->port = PORT_FIBRE;
1299 		else if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1300 			 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1301 			cmd->port = PORT_DA;
1302 		else
1303 			cmd->port = PORT_OTHER;
1304 	} else
1305 		cmd->port = PORT_OTHER;
1306 
1307 	if (p->mdio_addr >= 0) {
1308 		cmd->phy_address = p->mdio_addr;
1309 		cmd->transceiver = XCVR_EXTERNAL;
1310 		cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1311 			MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1312 	} else {
1313 		cmd->phy_address = 0;  /* not really, but no better option */
1314 		cmd->transceiver = XCVR_INTERNAL;
1315 		cmd->mdio_support = 0;
1316 	}
1317 
1318 	cmd->supported = t4vf_from_fw_linkcaps(p->port_type,
1319 					       p->link_cfg.supported);
1320 	cmd->advertising = t4vf_from_fw_linkcaps(p->port_type,
1321 					    p->link_cfg.advertising);
1322 	ethtool_cmd_speed_set(cmd,
1323 			      netif_carrier_ok(dev) ? p->link_cfg.speed : 0);
1324 	cmd->duplex = DUPLEX_FULL;
1325 	cmd->autoneg = p->link_cfg.autoneg;
1326 	cmd->maxtxpkt = 0;
1327 	cmd->maxrxpkt = 0;
1328 	return 0;
1329 }
1330 
1331 /*
1332  * Return our driver information.
1333  */
1334 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1335 				struct ethtool_drvinfo *drvinfo)
1336 {
1337 	struct adapter *adapter = netdev2adap(dev);
1338 
1339 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1340 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1341 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1342 		sizeof(drvinfo->bus_info));
1343 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1344 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1345 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1346 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1347 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1348 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1349 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1350 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1351 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1352 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1353 }
1354 
1355 /*
1356  * Return current adapter message level.
1357  */
1358 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1359 {
1360 	return netdev2adap(dev)->msg_enable;
1361 }
1362 
1363 /*
1364  * Set current adapter message level.
1365  */
1366 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1367 {
1368 	netdev2adap(dev)->msg_enable = msglevel;
1369 }
1370 
1371 /*
1372  * Return the device's current Queue Set ring size parameters along with the
1373  * allowed maximum values.  Since ethtool doesn't understand the concept of
1374  * multi-queue devices, we just return the current values associated with the
1375  * first Queue Set.
1376  */
1377 static void cxgb4vf_get_ringparam(struct net_device *dev,
1378 				  struct ethtool_ringparam *rp)
1379 {
1380 	const struct port_info *pi = netdev_priv(dev);
1381 	const struct sge *s = &pi->adapter->sge;
1382 
1383 	rp->rx_max_pending = MAX_RX_BUFFERS;
1384 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1385 	rp->rx_jumbo_max_pending = 0;
1386 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1387 
1388 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1389 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1390 	rp->rx_jumbo_pending = 0;
1391 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1392 }
1393 
1394 /*
1395  * Set the Queue Set ring size parameters for the device.  Again, since
1396  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1397  * apply these new values across all of the Queue Sets associated with the
1398  * device -- after vetting them of course!
1399  */
1400 static int cxgb4vf_set_ringparam(struct net_device *dev,
1401 				 struct ethtool_ringparam *rp)
1402 {
1403 	const struct port_info *pi = netdev_priv(dev);
1404 	struct adapter *adapter = pi->adapter;
1405 	struct sge *s = &adapter->sge;
1406 	int qs;
1407 
1408 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1409 	    rp->rx_jumbo_pending ||
1410 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1411 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1412 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1413 	    rp->rx_pending < MIN_FL_ENTRIES ||
1414 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1415 		return -EINVAL;
1416 
1417 	if (adapter->flags & FULL_INIT_DONE)
1418 		return -EBUSY;
1419 
1420 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1421 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1422 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1423 		s->ethtxq[qs].q.size = rp->tx_pending;
1424 	}
1425 	return 0;
1426 }
1427 
1428 /*
1429  * Return the interrupt holdoff timer and count for the first Queue Set on the
1430  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1431  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1432  */
1433 static int cxgb4vf_get_coalesce(struct net_device *dev,
1434 				struct ethtool_coalesce *coalesce)
1435 {
1436 	const struct port_info *pi = netdev_priv(dev);
1437 	const struct adapter *adapter = pi->adapter;
1438 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1439 
1440 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1441 	coalesce->rx_max_coalesced_frames =
1442 		((rspq->intr_params & QINTR_CNT_EN_F)
1443 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1444 		 : 0);
1445 	return 0;
1446 }
1447 
1448 /*
1449  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1450  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1451  * the interrupt holdoff timer on any of the device's Queue Sets.
1452  */
1453 static int cxgb4vf_set_coalesce(struct net_device *dev,
1454 				struct ethtool_coalesce *coalesce)
1455 {
1456 	const struct port_info *pi = netdev_priv(dev);
1457 	struct adapter *adapter = pi->adapter;
1458 
1459 	return set_rxq_intr_params(adapter,
1460 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1461 				   coalesce->rx_coalesce_usecs,
1462 				   coalesce->rx_max_coalesced_frames);
1463 }
1464 
1465 /*
1466  * Report current port link pause parameter settings.
1467  */
1468 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1469 				   struct ethtool_pauseparam *pauseparam)
1470 {
1471 	struct port_info *pi = netdev_priv(dev);
1472 
1473 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1474 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1475 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1476 }
1477 
1478 /*
1479  * Identify the port by blinking the port's LED.
1480  */
1481 static int cxgb4vf_phys_id(struct net_device *dev,
1482 			   enum ethtool_phys_id_state state)
1483 {
1484 	unsigned int val;
1485 	struct port_info *pi = netdev_priv(dev);
1486 
1487 	if (state == ETHTOOL_ID_ACTIVE)
1488 		val = 0xffff;
1489 	else if (state == ETHTOOL_ID_INACTIVE)
1490 		val = 0;
1491 	else
1492 		return -EINVAL;
1493 
1494 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1495 }
1496 
1497 /*
1498  * Port stats maintained per queue of the port.
1499  */
1500 struct queue_port_stats {
1501 	u64 tso;
1502 	u64 tx_csum;
1503 	u64 rx_csum;
1504 	u64 vlan_ex;
1505 	u64 vlan_ins;
1506 	u64 lro_pkts;
1507 	u64 lro_merged;
1508 };
1509 
1510 /*
1511  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1512  * these need to match the order of statistics returned by
1513  * t4vf_get_port_stats().
1514  */
1515 static const char stats_strings[][ETH_GSTRING_LEN] = {
1516 	/*
1517 	 * These must match the layout of the t4vf_port_stats structure.
1518 	 */
1519 	"TxBroadcastBytes  ",
1520 	"TxBroadcastFrames ",
1521 	"TxMulticastBytes  ",
1522 	"TxMulticastFrames ",
1523 	"TxUnicastBytes    ",
1524 	"TxUnicastFrames   ",
1525 	"TxDroppedFrames   ",
1526 	"TxOffloadBytes    ",
1527 	"TxOffloadFrames   ",
1528 	"RxBroadcastBytes  ",
1529 	"RxBroadcastFrames ",
1530 	"RxMulticastBytes  ",
1531 	"RxMulticastFrames ",
1532 	"RxUnicastBytes    ",
1533 	"RxUnicastFrames   ",
1534 	"RxErrorFrames     ",
1535 
1536 	/*
1537 	 * These are accumulated per-queue statistics and must match the
1538 	 * order of the fields in the queue_port_stats structure.
1539 	 */
1540 	"TSO               ",
1541 	"TxCsumOffload     ",
1542 	"RxCsumGood        ",
1543 	"VLANextractions   ",
1544 	"VLANinsertions    ",
1545 	"GROPackets        ",
1546 	"GROMerged         ",
1547 };
1548 
1549 /*
1550  * Return the number of statistics in the specified statistics set.
1551  */
1552 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1553 {
1554 	switch (sset) {
1555 	case ETH_SS_STATS:
1556 		return ARRAY_SIZE(stats_strings);
1557 	default:
1558 		return -EOPNOTSUPP;
1559 	}
1560 	/*NOTREACHED*/
1561 }
1562 
1563 /*
1564  * Return the strings for the specified statistics set.
1565  */
1566 static void cxgb4vf_get_strings(struct net_device *dev,
1567 				u32 sset,
1568 				u8 *data)
1569 {
1570 	switch (sset) {
1571 	case ETH_SS_STATS:
1572 		memcpy(data, stats_strings, sizeof(stats_strings));
1573 		break;
1574 	}
1575 }
1576 
1577 /*
1578  * Small utility routine to accumulate queue statistics across the queues of
1579  * a "port".
1580  */
1581 static void collect_sge_port_stats(const struct adapter *adapter,
1582 				   const struct port_info *pi,
1583 				   struct queue_port_stats *stats)
1584 {
1585 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1586 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1587 	int qs;
1588 
1589 	memset(stats, 0, sizeof(*stats));
1590 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1591 		stats->tso += txq->tso;
1592 		stats->tx_csum += txq->tx_cso;
1593 		stats->rx_csum += rxq->stats.rx_cso;
1594 		stats->vlan_ex += rxq->stats.vlan_ex;
1595 		stats->vlan_ins += txq->vlan_ins;
1596 		stats->lro_pkts += rxq->stats.lro_pkts;
1597 		stats->lro_merged += rxq->stats.lro_merged;
1598 	}
1599 }
1600 
1601 /*
1602  * Return the ETH_SS_STATS statistics set.
1603  */
1604 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1605 				      struct ethtool_stats *stats,
1606 				      u64 *data)
1607 {
1608 	struct port_info *pi = netdev2pinfo(dev);
1609 	struct adapter *adapter = pi->adapter;
1610 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1611 				      (struct t4vf_port_stats *)data);
1612 	if (err)
1613 		memset(data, 0, sizeof(struct t4vf_port_stats));
1614 
1615 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1616 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1617 }
1618 
1619 /*
1620  * Return the size of our register map.
1621  */
1622 static int cxgb4vf_get_regs_len(struct net_device *dev)
1623 {
1624 	return T4VF_REGMAP_SIZE;
1625 }
1626 
1627 /*
1628  * Dump a block of registers, start to end inclusive, into a buffer.
1629  */
1630 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1631 			   unsigned int start, unsigned int end)
1632 {
1633 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1634 
1635 	for ( ; start <= end; start += sizeof(u32)) {
1636 		/*
1637 		 * Avoid reading the Mailbox Control register since that
1638 		 * can trigger a Mailbox Ownership Arbitration cycle and
1639 		 * interfere with communication with the firmware.
1640 		 */
1641 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1642 			*bp++ = 0xffff;
1643 		else
1644 			*bp++ = t4_read_reg(adapter, start);
1645 	}
1646 }
1647 
1648 /*
1649  * Copy our entire register map into the provided buffer.
1650  */
1651 static void cxgb4vf_get_regs(struct net_device *dev,
1652 			     struct ethtool_regs *regs,
1653 			     void *regbuf)
1654 {
1655 	struct adapter *adapter = netdev2adap(dev);
1656 
1657 	regs->version = mk_adap_vers(adapter);
1658 
1659 	/*
1660 	 * Fill in register buffer with our register map.
1661 	 */
1662 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1663 
1664 	reg_block_dump(adapter, regbuf,
1665 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1666 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1667 	reg_block_dump(adapter, regbuf,
1668 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1669 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1670 
1671 	/* T5 adds new registers in the PL Register map.
1672 	 */
1673 	reg_block_dump(adapter, regbuf,
1674 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1675 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1676 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1677 	reg_block_dump(adapter, regbuf,
1678 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1679 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1680 
1681 	reg_block_dump(adapter, regbuf,
1682 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1683 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1684 }
1685 
1686 /*
1687  * Report current Wake On LAN settings.
1688  */
1689 static void cxgb4vf_get_wol(struct net_device *dev,
1690 			    struct ethtool_wolinfo *wol)
1691 {
1692 	wol->supported = 0;
1693 	wol->wolopts = 0;
1694 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1695 }
1696 
1697 /*
1698  * TCP Segmentation Offload flags which we support.
1699  */
1700 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1701 
1702 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1703 	.get_settings		= cxgb4vf_get_settings,
1704 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1705 	.get_msglevel		= cxgb4vf_get_msglevel,
1706 	.set_msglevel		= cxgb4vf_set_msglevel,
1707 	.get_ringparam		= cxgb4vf_get_ringparam,
1708 	.set_ringparam		= cxgb4vf_set_ringparam,
1709 	.get_coalesce		= cxgb4vf_get_coalesce,
1710 	.set_coalesce		= cxgb4vf_set_coalesce,
1711 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1712 	.get_link		= ethtool_op_get_link,
1713 	.get_strings		= cxgb4vf_get_strings,
1714 	.set_phys_id		= cxgb4vf_phys_id,
1715 	.get_sset_count		= cxgb4vf_get_sset_count,
1716 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1717 	.get_regs_len		= cxgb4vf_get_regs_len,
1718 	.get_regs		= cxgb4vf_get_regs,
1719 	.get_wol		= cxgb4vf_get_wol,
1720 };
1721 
1722 /*
1723  * /sys/kernel/debug/cxgb4vf support code and data.
1724  * ================================================
1725  */
1726 
1727 /*
1728  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1729  */
1730 #define QPL	4
1731 
1732 static int sge_qinfo_show(struct seq_file *seq, void *v)
1733 {
1734 	struct adapter *adapter = seq->private;
1735 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1736 	int qs, r = (uintptr_t)v - 1;
1737 
1738 	if (r)
1739 		seq_putc(seq, '\n');
1740 
1741 	#define S3(fmt_spec, s, v) \
1742 		do {\
1743 			seq_printf(seq, "%-12s", s); \
1744 			for (qs = 0; qs < n; ++qs) \
1745 				seq_printf(seq, " %16" fmt_spec, v); \
1746 			seq_putc(seq, '\n'); \
1747 		} while (0)
1748 	#define S(s, v)		S3("s", s, v)
1749 	#define T(s, v)		S3("u", s, txq[qs].v)
1750 	#define R(s, v)		S3("u", s, rxq[qs].v)
1751 
1752 	if (r < eth_entries) {
1753 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1754 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1755 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1756 
1757 		S("QType:", "Ethernet");
1758 		S("Interface:",
1759 		  (rxq[qs].rspq.netdev
1760 		   ? rxq[qs].rspq.netdev->name
1761 		   : "N/A"));
1762 		S3("d", "Port:",
1763 		   (rxq[qs].rspq.netdev
1764 		    ? ((struct port_info *)
1765 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1766 		    : -1));
1767 		T("TxQ ID:", q.abs_id);
1768 		T("TxQ size:", q.size);
1769 		T("TxQ inuse:", q.in_use);
1770 		T("TxQ PIdx:", q.pidx);
1771 		T("TxQ CIdx:", q.cidx);
1772 		R("RspQ ID:", rspq.abs_id);
1773 		R("RspQ size:", rspq.size);
1774 		R("RspQE size:", rspq.iqe_len);
1775 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1776 		S3("u", "Intr pktcnt:",
1777 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1778 		R("RspQ CIdx:", rspq.cidx);
1779 		R("RspQ Gen:", rspq.gen);
1780 		R("FL ID:", fl.abs_id);
1781 		R("FL size:", fl.size - MIN_FL_RESID);
1782 		R("FL avail:", fl.avail);
1783 		R("FL PIdx:", fl.pidx);
1784 		R("FL CIdx:", fl.cidx);
1785 		return 0;
1786 	}
1787 
1788 	r -= eth_entries;
1789 	if (r == 0) {
1790 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1791 
1792 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1793 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1794 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1795 			   qtimer_val(adapter, evtq));
1796 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1797 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1798 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1799 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1800 	} else if (r == 1) {
1801 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1802 
1803 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1804 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1805 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1806 			   qtimer_val(adapter, intrq));
1807 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1808 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1809 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1810 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1811 	}
1812 
1813 	#undef R
1814 	#undef T
1815 	#undef S
1816 	#undef S3
1817 
1818 	return 0;
1819 }
1820 
1821 /*
1822  * Return the number of "entries" in our "file".  We group the multi-Queue
1823  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1824  *
1825  *     Ethernet RX/TX Queue Sets
1826  *     Firmware Event Queue
1827  *     Forwarded Interrupt Queue (if in MSI mode)
1828  */
1829 static int sge_queue_entries(const struct adapter *adapter)
1830 {
1831 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1832 		((adapter->flags & USING_MSI) != 0);
1833 }
1834 
1835 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1836 {
1837 	int entries = sge_queue_entries(seq->private);
1838 
1839 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1840 }
1841 
1842 static void sge_queue_stop(struct seq_file *seq, void *v)
1843 {
1844 }
1845 
1846 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1847 {
1848 	int entries = sge_queue_entries(seq->private);
1849 
1850 	++*pos;
1851 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1852 }
1853 
1854 static const struct seq_operations sge_qinfo_seq_ops = {
1855 	.start = sge_queue_start,
1856 	.next  = sge_queue_next,
1857 	.stop  = sge_queue_stop,
1858 	.show  = sge_qinfo_show
1859 };
1860 
1861 static int sge_qinfo_open(struct inode *inode, struct file *file)
1862 {
1863 	int res = seq_open(file, &sge_qinfo_seq_ops);
1864 
1865 	if (!res) {
1866 		struct seq_file *seq = file->private_data;
1867 		seq->private = inode->i_private;
1868 	}
1869 	return res;
1870 }
1871 
1872 static const struct file_operations sge_qinfo_debugfs_fops = {
1873 	.owner   = THIS_MODULE,
1874 	.open    = sge_qinfo_open,
1875 	.read    = seq_read,
1876 	.llseek  = seq_lseek,
1877 	.release = seq_release,
1878 };
1879 
1880 /*
1881  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1882  */
1883 #define QPL	4
1884 
1885 static int sge_qstats_show(struct seq_file *seq, void *v)
1886 {
1887 	struct adapter *adapter = seq->private;
1888 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1889 	int qs, r = (uintptr_t)v - 1;
1890 
1891 	if (r)
1892 		seq_putc(seq, '\n');
1893 
1894 	#define S3(fmt, s, v) \
1895 		do { \
1896 			seq_printf(seq, "%-16s", s); \
1897 			for (qs = 0; qs < n; ++qs) \
1898 				seq_printf(seq, " %8" fmt, v); \
1899 			seq_putc(seq, '\n'); \
1900 		} while (0)
1901 	#define S(s, v)		S3("s", s, v)
1902 
1903 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1904 	#define T(s, v)		T3("lu", s, v)
1905 
1906 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1907 	#define R(s, v)		R3("lu", s, v)
1908 
1909 	if (r < eth_entries) {
1910 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1911 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1912 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1913 
1914 		S("QType:", "Ethernet");
1915 		S("Interface:",
1916 		  (rxq[qs].rspq.netdev
1917 		   ? rxq[qs].rspq.netdev->name
1918 		   : "N/A"));
1919 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1920 		R("RxPackets:", stats.pkts);
1921 		R("RxCSO:", stats.rx_cso);
1922 		R("VLANxtract:", stats.vlan_ex);
1923 		R("LROmerged:", stats.lro_merged);
1924 		R("LROpackets:", stats.lro_pkts);
1925 		R("RxDrops:", stats.rx_drops);
1926 		T("TSO:", tso);
1927 		T("TxCSO:", tx_cso);
1928 		T("VLANins:", vlan_ins);
1929 		T("TxQFull:", q.stops);
1930 		T("TxQRestarts:", q.restarts);
1931 		T("TxMapErr:", mapping_err);
1932 		R("FLAllocErr:", fl.alloc_failed);
1933 		R("FLLrgAlcErr:", fl.large_alloc_failed);
1934 		R("FLStarving:", fl.starving);
1935 		return 0;
1936 	}
1937 
1938 	r -= eth_entries;
1939 	if (r == 0) {
1940 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1941 
1942 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1943 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1944 			   evtq->unhandled_irqs);
1945 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1946 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1947 	} else if (r == 1) {
1948 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1949 
1950 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1951 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1952 			   intrq->unhandled_irqs);
1953 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1954 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1955 	}
1956 
1957 	#undef R
1958 	#undef T
1959 	#undef S
1960 	#undef R3
1961 	#undef T3
1962 	#undef S3
1963 
1964 	return 0;
1965 }
1966 
1967 /*
1968  * Return the number of "entries" in our "file".  We group the multi-Queue
1969  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1970  *
1971  *     Ethernet RX/TX Queue Sets
1972  *     Firmware Event Queue
1973  *     Forwarded Interrupt Queue (if in MSI mode)
1974  */
1975 static int sge_qstats_entries(const struct adapter *adapter)
1976 {
1977 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1978 		((adapter->flags & USING_MSI) != 0);
1979 }
1980 
1981 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1982 {
1983 	int entries = sge_qstats_entries(seq->private);
1984 
1985 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1986 }
1987 
1988 static void sge_qstats_stop(struct seq_file *seq, void *v)
1989 {
1990 }
1991 
1992 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1993 {
1994 	int entries = sge_qstats_entries(seq->private);
1995 
1996 	(*pos)++;
1997 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1998 }
1999 
2000 static const struct seq_operations sge_qstats_seq_ops = {
2001 	.start = sge_qstats_start,
2002 	.next  = sge_qstats_next,
2003 	.stop  = sge_qstats_stop,
2004 	.show  = sge_qstats_show
2005 };
2006 
2007 static int sge_qstats_open(struct inode *inode, struct file *file)
2008 {
2009 	int res = seq_open(file, &sge_qstats_seq_ops);
2010 
2011 	if (res == 0) {
2012 		struct seq_file *seq = file->private_data;
2013 		seq->private = inode->i_private;
2014 	}
2015 	return res;
2016 }
2017 
2018 static const struct file_operations sge_qstats_proc_fops = {
2019 	.owner   = THIS_MODULE,
2020 	.open    = sge_qstats_open,
2021 	.read    = seq_read,
2022 	.llseek  = seq_lseek,
2023 	.release = seq_release,
2024 };
2025 
2026 /*
2027  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2028  */
2029 static int resources_show(struct seq_file *seq, void *v)
2030 {
2031 	struct adapter *adapter = seq->private;
2032 	struct vf_resources *vfres = &adapter->params.vfres;
2033 
2034 	#define S(desc, fmt, var) \
2035 		seq_printf(seq, "%-60s " fmt "\n", \
2036 			   desc " (" #var "):", vfres->var)
2037 
2038 	S("Virtual Interfaces", "%d", nvi);
2039 	S("Egress Queues", "%d", neq);
2040 	S("Ethernet Control", "%d", nethctrl);
2041 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2042 	S("Ingress Queues", "%d", niq);
2043 	S("Traffic Class", "%d", tc);
2044 	S("Port Access Rights Mask", "%#x", pmask);
2045 	S("MAC Address Filters", "%d", nexactf);
2046 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2047 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2048 
2049 	#undef S
2050 
2051 	return 0;
2052 }
2053 
2054 static int resources_open(struct inode *inode, struct file *file)
2055 {
2056 	return single_open(file, resources_show, inode->i_private);
2057 }
2058 
2059 static const struct file_operations resources_proc_fops = {
2060 	.owner   = THIS_MODULE,
2061 	.open    = resources_open,
2062 	.read    = seq_read,
2063 	.llseek  = seq_lseek,
2064 	.release = single_release,
2065 };
2066 
2067 /*
2068  * Show Virtual Interfaces.
2069  */
2070 static int interfaces_show(struct seq_file *seq, void *v)
2071 {
2072 	if (v == SEQ_START_TOKEN) {
2073 		seq_puts(seq, "Interface  Port   VIID\n");
2074 	} else {
2075 		struct adapter *adapter = seq->private;
2076 		int pidx = (uintptr_t)v - 2;
2077 		struct net_device *dev = adapter->port[pidx];
2078 		struct port_info *pi = netdev_priv(dev);
2079 
2080 		seq_printf(seq, "%9s  %4d  %#5x\n",
2081 			   dev->name, pi->port_id, pi->viid);
2082 	}
2083 	return 0;
2084 }
2085 
2086 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2087 {
2088 	return pos <= adapter->params.nports
2089 		? (void *)(uintptr_t)(pos + 1)
2090 		: NULL;
2091 }
2092 
2093 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2094 {
2095 	return *pos
2096 		? interfaces_get_idx(seq->private, *pos)
2097 		: SEQ_START_TOKEN;
2098 }
2099 
2100 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2101 {
2102 	(*pos)++;
2103 	return interfaces_get_idx(seq->private, *pos);
2104 }
2105 
2106 static void interfaces_stop(struct seq_file *seq, void *v)
2107 {
2108 }
2109 
2110 static const struct seq_operations interfaces_seq_ops = {
2111 	.start = interfaces_start,
2112 	.next  = interfaces_next,
2113 	.stop  = interfaces_stop,
2114 	.show  = interfaces_show
2115 };
2116 
2117 static int interfaces_open(struct inode *inode, struct file *file)
2118 {
2119 	int res = seq_open(file, &interfaces_seq_ops);
2120 
2121 	if (res == 0) {
2122 		struct seq_file *seq = file->private_data;
2123 		seq->private = inode->i_private;
2124 	}
2125 	return res;
2126 }
2127 
2128 static const struct file_operations interfaces_proc_fops = {
2129 	.owner   = THIS_MODULE,
2130 	.open    = interfaces_open,
2131 	.read    = seq_read,
2132 	.llseek  = seq_lseek,
2133 	.release = seq_release,
2134 };
2135 
2136 /*
2137  * /sys/kernel/debugfs/cxgb4vf/ files list.
2138  */
2139 struct cxgb4vf_debugfs_entry {
2140 	const char *name;		/* name of debugfs node */
2141 	umode_t mode;			/* file system mode */
2142 	const struct file_operations *fops;
2143 };
2144 
2145 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2146 	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2147 	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2148 	{ "resources",  S_IRUGO, &resources_proc_fops },
2149 	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2150 };
2151 
2152 /*
2153  * Module and device initialization and cleanup code.
2154  * ==================================================
2155  */
2156 
2157 /*
2158  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2159  * directory (debugfs_root) has already been set up.
2160  */
2161 static int setup_debugfs(struct adapter *adapter)
2162 {
2163 	int i;
2164 
2165 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2166 
2167 	/*
2168 	 * Debugfs support is best effort.
2169 	 */
2170 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2171 		(void)debugfs_create_file(debugfs_files[i].name,
2172 				  debugfs_files[i].mode,
2173 				  adapter->debugfs_root,
2174 				  (void *)adapter,
2175 				  debugfs_files[i].fops);
2176 
2177 	return 0;
2178 }
2179 
2180 /*
2181  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2182  * it to our caller to tear down the directory (debugfs_root).
2183  */
2184 static void cleanup_debugfs(struct adapter *adapter)
2185 {
2186 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2187 
2188 	/*
2189 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2190 	 * individual entries because a call will be made to
2191 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2192 	 * persistent state.
2193 	 */
2194 	/* nothing to do */
2195 }
2196 
2197 /*
2198  * Perform early "adapter" initialization.  This is where we discover what
2199  * adapter parameters we're going to be using and initialize basic adapter
2200  * hardware support.
2201  */
2202 static int adap_init0(struct adapter *adapter)
2203 {
2204 	struct vf_resources *vfres = &adapter->params.vfres;
2205 	struct sge_params *sge_params = &adapter->params.sge;
2206 	struct sge *s = &adapter->sge;
2207 	unsigned int ethqsets;
2208 	int err;
2209 	u32 param, val = 0;
2210 
2211 	/*
2212 	 * Wait for the device to become ready before proceeding ...
2213 	 */
2214 	err = t4vf_wait_dev_ready(adapter);
2215 	if (err) {
2216 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2217 			" err=%d\n", err);
2218 		return err;
2219 	}
2220 
2221 	/*
2222 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2223 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2224 	 * issue an FLR because of a self- deadlock on the device semaphore.
2225 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2226 	 * cases where they're needed -- for instance, some versions of KVM
2227 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2228 	 * use the firmware based reset in order to reset any per function
2229 	 * state.
2230 	 */
2231 	err = t4vf_fw_reset(adapter);
2232 	if (err < 0) {
2233 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2234 		return err;
2235 	}
2236 
2237 	/*
2238 	 * Grab basic operational parameters.  These will predominantly have
2239 	 * been set up by the Physical Function Driver or will be hard coded
2240 	 * into the adapter.  We just have to live with them ...  Note that
2241 	 * we _must_ get our VPD parameters before our SGE parameters because
2242 	 * we need to know the adapter's core clock from the VPD in order to
2243 	 * properly decode the SGE Timer Values.
2244 	 */
2245 	err = t4vf_get_dev_params(adapter);
2246 	if (err) {
2247 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2248 			" device parameters: err=%d\n", err);
2249 		return err;
2250 	}
2251 	err = t4vf_get_vpd_params(adapter);
2252 	if (err) {
2253 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2254 			" VPD parameters: err=%d\n", err);
2255 		return err;
2256 	}
2257 	err = t4vf_get_sge_params(adapter);
2258 	if (err) {
2259 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2260 			" SGE parameters: err=%d\n", err);
2261 		return err;
2262 	}
2263 	err = t4vf_get_rss_glb_config(adapter);
2264 	if (err) {
2265 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2266 			" RSS parameters: err=%d\n", err);
2267 		return err;
2268 	}
2269 	if (adapter->params.rss.mode !=
2270 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2271 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2272 			" mode %d\n", adapter->params.rss.mode);
2273 		return -EINVAL;
2274 	}
2275 	err = t4vf_sge_init(adapter);
2276 	if (err) {
2277 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2278 			" err=%d\n", err);
2279 		return err;
2280 	}
2281 
2282 	/* If we're running on newer firmware, let it know that we're
2283 	 * prepared to deal with encapsulated CPL messages.  Older
2284 	 * firmware won't understand this and we'll just get
2285 	 * unencapsulated messages ...
2286 	 */
2287 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2288 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2289 	val = 1;
2290 	(void) t4vf_set_params(adapter, 1, &param, &val);
2291 
2292 	/*
2293 	 * Retrieve our RX interrupt holdoff timer values and counter
2294 	 * threshold values from the SGE parameters.
2295 	 */
2296 	s->timer_val[0] = core_ticks_to_us(adapter,
2297 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2298 	s->timer_val[1] = core_ticks_to_us(adapter,
2299 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2300 	s->timer_val[2] = core_ticks_to_us(adapter,
2301 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2302 	s->timer_val[3] = core_ticks_to_us(adapter,
2303 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2304 	s->timer_val[4] = core_ticks_to_us(adapter,
2305 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2306 	s->timer_val[5] = core_ticks_to_us(adapter,
2307 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2308 
2309 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2310 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2311 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2312 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2313 
2314 	/*
2315 	 * Grab our Virtual Interface resource allocation, extract the
2316 	 * features that we're interested in and do a bit of sanity testing on
2317 	 * what we discover.
2318 	 */
2319 	err = t4vf_get_vfres(adapter);
2320 	if (err) {
2321 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2322 			" resources: err=%d\n", err);
2323 		return err;
2324 	}
2325 
2326 	/*
2327 	 * The number of "ports" which we support is equal to the number of
2328 	 * Virtual Interfaces with which we've been provisioned.
2329 	 */
2330 	adapter->params.nports = vfres->nvi;
2331 	if (adapter->params.nports > MAX_NPORTS) {
2332 		dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2333 			 " virtual interfaces\n", MAX_NPORTS,
2334 			 adapter->params.nports);
2335 		adapter->params.nports = MAX_NPORTS;
2336 	}
2337 
2338 	/*
2339 	 * We need to reserve a number of the ingress queues with Free List
2340 	 * and Interrupt capabilities for special interrupt purposes (like
2341 	 * asynchronous firmware messages, or forwarded interrupts if we're
2342 	 * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2343 	 * matched up one-for-one with Ethernet/Control egress queues in order
2344 	 * to form "Queue Sets" which will be aportioned between the "ports".
2345 	 * For each Queue Set, we'll need the ability to allocate two Egress
2346 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2347 	 * Ethernet Queue.
2348 	 */
2349 	ethqsets = vfres->niqflint - INGQ_EXTRAS;
2350 	if (vfres->nethctrl != ethqsets) {
2351 		dev_warn(adapter->pdev_dev, "unequal number of [available]"
2352 			 " ingress/egress queues (%d/%d); using minimum for"
2353 			 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2354 		ethqsets = min(vfres->nethctrl, ethqsets);
2355 	}
2356 	if (vfres->neq < ethqsets*2) {
2357 		dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2358 			 " to support Queue Sets (%d); reducing allowed Queue"
2359 			 " Sets\n", vfres->neq, ethqsets);
2360 		ethqsets = vfres->neq/2;
2361 	}
2362 	if (ethqsets > MAX_ETH_QSETS) {
2363 		dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2364 			 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2365 		ethqsets = MAX_ETH_QSETS;
2366 	}
2367 	if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2368 		dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2369 			 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2370 	}
2371 	adapter->sge.max_ethqsets = ethqsets;
2372 
2373 	/*
2374 	 * Check for various parameter sanity issues.  Most checks simply
2375 	 * result in us using fewer resources than our provissioning but we
2376 	 * do need at least  one "port" with which to work ...
2377 	 */
2378 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2379 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2380 			 " virtual interfaces (too few Queue Sets)\n",
2381 			 adapter->sge.max_ethqsets, adapter->params.nports);
2382 		adapter->params.nports = adapter->sge.max_ethqsets;
2383 	}
2384 	if (adapter->params.nports == 0) {
2385 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2386 			"usable!\n");
2387 		return -EINVAL;
2388 	}
2389 	return 0;
2390 }
2391 
2392 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2393 			     u8 pkt_cnt_idx, unsigned int size,
2394 			     unsigned int iqe_size)
2395 {
2396 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2397 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2398 			      QINTR_CNT_EN_F : 0));
2399 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2400 			    ? pkt_cnt_idx
2401 			    : 0);
2402 	rspq->iqe_len = iqe_size;
2403 	rspq->size = size;
2404 }
2405 
2406 /*
2407  * Perform default configuration of DMA queues depending on the number and
2408  * type of ports we found and the number of available CPUs.  Most settings can
2409  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2410  * being brought up for the first time.
2411  */
2412 static void cfg_queues(struct adapter *adapter)
2413 {
2414 	struct sge *s = &adapter->sge;
2415 	int q10g, n10g, qidx, pidx, qs;
2416 	size_t iqe_size;
2417 
2418 	/*
2419 	 * We should not be called till we know how many Queue Sets we can
2420 	 * support.  In particular, this means that we need to know what kind
2421 	 * of interrupts we'll be using ...
2422 	 */
2423 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2424 
2425 	/*
2426 	 * Count the number of 10GbE Virtual Interfaces that we have.
2427 	 */
2428 	n10g = 0;
2429 	for_each_port(adapter, pidx)
2430 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2431 
2432 	/*
2433 	 * We default to 1 queue per non-10G port and up to # of cores queues
2434 	 * per 10G port.
2435 	 */
2436 	if (n10g == 0)
2437 		q10g = 0;
2438 	else {
2439 		int n1g = (adapter->params.nports - n10g);
2440 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2441 		if (q10g > num_online_cpus())
2442 			q10g = num_online_cpus();
2443 	}
2444 
2445 	/*
2446 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2447 	 * The layout will be established in setup_sge_queues() when the
2448 	 * adapter is brough up for the first time.
2449 	 */
2450 	qidx = 0;
2451 	for_each_port(adapter, pidx) {
2452 		struct port_info *pi = adap2pinfo(adapter, pidx);
2453 
2454 		pi->first_qset = qidx;
2455 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2456 		qidx += pi->nqsets;
2457 	}
2458 	s->ethqsets = qidx;
2459 
2460 	/*
2461 	 * The Ingress Queue Entry Size for our various Response Queues needs
2462 	 * to be big enough to accommodate the largest message we can receive
2463 	 * from the chip/firmware; which is 64 bytes ...
2464 	 */
2465 	iqe_size = 64;
2466 
2467 	/*
2468 	 * Set up default Queue Set parameters ...  Start off with the
2469 	 * shortest interrupt holdoff timer.
2470 	 */
2471 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2472 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2473 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2474 
2475 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2476 		rxq->fl.size = 72;
2477 		txq->q.size = 1024;
2478 	}
2479 
2480 	/*
2481 	 * The firmware event queue is used for link state changes and
2482 	 * notifications of TX DMA completions.
2483 	 */
2484 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2485 
2486 	/*
2487 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2488 	 * mode.  In this mode all interrupts associated with RX queues will
2489 	 * be forwarded to a single queue which we'll associate with our MSI
2490 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2491 	 * queue will indicate which ingress queue needs servicing ...  This
2492 	 * queue needs to be large enough to accommodate all of the ingress
2493 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2494 	 * from equalling the CIDX if every ingress queue has an outstanding
2495 	 * interrupt).  The queue doesn't need to be any larger because no
2496 	 * ingress queue will ever have more than one outstanding interrupt at
2497 	 * any time ...
2498 	 */
2499 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2500 		  iqe_size);
2501 }
2502 
2503 /*
2504  * Reduce the number of Ethernet queues across all ports to at most n.
2505  * n provides at least one queue per port.
2506  */
2507 static void reduce_ethqs(struct adapter *adapter, int n)
2508 {
2509 	int i;
2510 	struct port_info *pi;
2511 
2512 	/*
2513 	 * While we have too many active Ether Queue Sets, interate across the
2514 	 * "ports" and reduce their individual Queue Set allocations.
2515 	 */
2516 	BUG_ON(n < adapter->params.nports);
2517 	while (n < adapter->sge.ethqsets)
2518 		for_each_port(adapter, i) {
2519 			pi = adap2pinfo(adapter, i);
2520 			if (pi->nqsets > 1) {
2521 				pi->nqsets--;
2522 				adapter->sge.ethqsets--;
2523 				if (adapter->sge.ethqsets <= n)
2524 					break;
2525 			}
2526 		}
2527 
2528 	/*
2529 	 * Reassign the starting Queue Sets for each of the "ports" ...
2530 	 */
2531 	n = 0;
2532 	for_each_port(adapter, i) {
2533 		pi = adap2pinfo(adapter, i);
2534 		pi->first_qset = n;
2535 		n += pi->nqsets;
2536 	}
2537 }
2538 
2539 /*
2540  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2541  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2542  * need.  Minimally we need one for every Virtual Interface plus those needed
2543  * for our "extras".  Note that this process may lower the maximum number of
2544  * allowed Queue Sets ...
2545  */
2546 static int enable_msix(struct adapter *adapter)
2547 {
2548 	int i, want, need, nqsets;
2549 	struct msix_entry entries[MSIX_ENTRIES];
2550 	struct sge *s = &adapter->sge;
2551 
2552 	for (i = 0; i < MSIX_ENTRIES; ++i)
2553 		entries[i].entry = i;
2554 
2555 	/*
2556 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2557 	 * plus those needed for our "extras" (for example, the firmware
2558 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2559 	 * Interface plus those needed for our "extras".  So now we get to see
2560 	 * if the song is right ...
2561 	 */
2562 	want = s->max_ethqsets + MSIX_EXTRAS;
2563 	need = adapter->params.nports + MSIX_EXTRAS;
2564 
2565 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2566 	if (want < 0)
2567 		return want;
2568 
2569 	nqsets = want - MSIX_EXTRAS;
2570 	if (nqsets < s->max_ethqsets) {
2571 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2572 			 " for %d Queue Sets\n", nqsets);
2573 		s->max_ethqsets = nqsets;
2574 		if (nqsets < s->ethqsets)
2575 			reduce_ethqs(adapter, nqsets);
2576 	}
2577 	for (i = 0; i < want; ++i)
2578 		adapter->msix_info[i].vec = entries[i].vector;
2579 
2580 	return 0;
2581 }
2582 
2583 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2584 	.ndo_open		= cxgb4vf_open,
2585 	.ndo_stop		= cxgb4vf_stop,
2586 	.ndo_start_xmit		= t4vf_eth_xmit,
2587 	.ndo_get_stats		= cxgb4vf_get_stats,
2588 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2589 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2590 	.ndo_validate_addr	= eth_validate_addr,
2591 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2592 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2593 	.ndo_fix_features	= cxgb4vf_fix_features,
2594 	.ndo_set_features	= cxgb4vf_set_features,
2595 #ifdef CONFIG_NET_POLL_CONTROLLER
2596 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2597 #endif
2598 };
2599 
2600 /*
2601  * "Probe" a device: initialize a device and construct all kernel and driver
2602  * state needed to manage the device.  This routine is called "init_one" in
2603  * the PF Driver ...
2604  */
2605 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2606 			     const struct pci_device_id *ent)
2607 {
2608 	int pci_using_dac;
2609 	int err, pidx;
2610 	unsigned int pmask;
2611 	struct adapter *adapter;
2612 	struct port_info *pi;
2613 	struct net_device *netdev;
2614 
2615 	/*
2616 	 * Print our driver banner the first time we're called to initialize a
2617 	 * device.
2618 	 */
2619 	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2620 
2621 	/*
2622 	 * Initialize generic PCI device state.
2623 	 */
2624 	err = pci_enable_device(pdev);
2625 	if (err) {
2626 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2627 		return err;
2628 	}
2629 
2630 	/*
2631 	 * Reserve PCI resources for the device.  If we can't get them some
2632 	 * other driver may have already claimed the device ...
2633 	 */
2634 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2635 	if (err) {
2636 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2637 		goto err_disable_device;
2638 	}
2639 
2640 	/*
2641 	 * Set up our DMA mask: try for 64-bit address masking first and
2642 	 * fall back to 32-bit if we can't get 64 bits ...
2643 	 */
2644 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2645 	if (err == 0) {
2646 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2647 		if (err) {
2648 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2649 				" coherent allocations\n");
2650 			goto err_release_regions;
2651 		}
2652 		pci_using_dac = 1;
2653 	} else {
2654 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2655 		if (err != 0) {
2656 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2657 			goto err_release_regions;
2658 		}
2659 		pci_using_dac = 0;
2660 	}
2661 
2662 	/*
2663 	 * Enable bus mastering for the device ...
2664 	 */
2665 	pci_set_master(pdev);
2666 
2667 	/*
2668 	 * Allocate our adapter data structure and attach it to the device.
2669 	 */
2670 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2671 	if (!adapter) {
2672 		err = -ENOMEM;
2673 		goto err_release_regions;
2674 	}
2675 	pci_set_drvdata(pdev, adapter);
2676 	adapter->pdev = pdev;
2677 	adapter->pdev_dev = &pdev->dev;
2678 
2679 	/*
2680 	 * Initialize SMP data synchronization resources.
2681 	 */
2682 	spin_lock_init(&adapter->stats_lock);
2683 
2684 	/*
2685 	 * Map our I/O registers in BAR0.
2686 	 */
2687 	adapter->regs = pci_ioremap_bar(pdev, 0);
2688 	if (!adapter->regs) {
2689 		dev_err(&pdev->dev, "cannot map device registers\n");
2690 		err = -ENOMEM;
2691 		goto err_free_adapter;
2692 	}
2693 
2694 	/* Wait for the device to become ready before proceeding ...
2695 	 */
2696 	err = t4vf_prep_adapter(adapter);
2697 	if (err) {
2698 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2699 			" err=%d\n", err);
2700 		goto err_unmap_bar0;
2701 	}
2702 
2703 	/* For T5 and later we want to use the new BAR-based User Doorbells,
2704 	 * so we need to map BAR2 here ...
2705 	 */
2706 	if (!is_t4(adapter->params.chip)) {
2707 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2708 					   pci_resource_len(pdev, 2));
2709 		if (!adapter->bar2) {
2710 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2711 			err = -ENOMEM;
2712 			goto err_unmap_bar0;
2713 		}
2714 	}
2715 	/*
2716 	 * Initialize adapter level features.
2717 	 */
2718 	adapter->name = pci_name(pdev);
2719 	adapter->msg_enable = dflt_msg_enable;
2720 	err = adap_init0(adapter);
2721 	if (err)
2722 		goto err_unmap_bar;
2723 
2724 	/*
2725 	 * Allocate our "adapter ports" and stitch everything together.
2726 	 */
2727 	pmask = adapter->params.vfres.pmask;
2728 	for_each_port(adapter, pidx) {
2729 		int port_id, viid;
2730 
2731 		/*
2732 		 * We simplistically allocate our virtual interfaces
2733 		 * sequentially across the port numbers to which we have
2734 		 * access rights.  This should be configurable in some manner
2735 		 * ...
2736 		 */
2737 		if (pmask == 0)
2738 			break;
2739 		port_id = ffs(pmask) - 1;
2740 		pmask &= ~(1 << port_id);
2741 		viid = t4vf_alloc_vi(adapter, port_id);
2742 		if (viid < 0) {
2743 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2744 				" err=%d\n", port_id, viid);
2745 			err = viid;
2746 			goto err_free_dev;
2747 		}
2748 
2749 		/*
2750 		 * Allocate our network device and stitch things together.
2751 		 */
2752 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2753 					   MAX_PORT_QSETS);
2754 		if (netdev == NULL) {
2755 			t4vf_free_vi(adapter, viid);
2756 			err = -ENOMEM;
2757 			goto err_free_dev;
2758 		}
2759 		adapter->port[pidx] = netdev;
2760 		SET_NETDEV_DEV(netdev, &pdev->dev);
2761 		pi = netdev_priv(netdev);
2762 		pi->adapter = adapter;
2763 		pi->pidx = pidx;
2764 		pi->port_id = port_id;
2765 		pi->viid = viid;
2766 
2767 		/*
2768 		 * Initialize the starting state of our "port" and register
2769 		 * it.
2770 		 */
2771 		pi->xact_addr_filt = -1;
2772 		netif_carrier_off(netdev);
2773 		netdev->irq = pdev->irq;
2774 
2775 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2776 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2777 			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2778 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2779 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2780 			NETIF_F_HIGHDMA;
2781 		netdev->features = netdev->hw_features |
2782 				   NETIF_F_HW_VLAN_CTAG_TX;
2783 		if (pci_using_dac)
2784 			netdev->features |= NETIF_F_HIGHDMA;
2785 
2786 		netdev->priv_flags |= IFF_UNICAST_FLT;
2787 
2788 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2789 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2790 
2791 		/*
2792 		 * Initialize the hardware/software state for the port.
2793 		 */
2794 		err = t4vf_port_init(adapter, pidx);
2795 		if (err) {
2796 			dev_err(&pdev->dev, "cannot initialize port %d\n",
2797 				pidx);
2798 			goto err_free_dev;
2799 		}
2800 	}
2801 
2802 	/*
2803 	 * The "card" is now ready to go.  If any errors occur during device
2804 	 * registration we do not fail the whole "card" but rather proceed
2805 	 * only with the ports we manage to register successfully.  However we
2806 	 * must register at least one net device.
2807 	 */
2808 	for_each_port(adapter, pidx) {
2809 		netdev = adapter->port[pidx];
2810 		if (netdev == NULL)
2811 			continue;
2812 
2813 		err = register_netdev(netdev);
2814 		if (err) {
2815 			dev_warn(&pdev->dev, "cannot register net device %s,"
2816 				 " skipping\n", netdev->name);
2817 			continue;
2818 		}
2819 
2820 		set_bit(pidx, &adapter->registered_device_map);
2821 	}
2822 	if (adapter->registered_device_map == 0) {
2823 		dev_err(&pdev->dev, "could not register any net devices\n");
2824 		goto err_free_dev;
2825 	}
2826 
2827 	/*
2828 	 * Set up our debugfs entries.
2829 	 */
2830 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2831 		adapter->debugfs_root =
2832 			debugfs_create_dir(pci_name(pdev),
2833 					   cxgb4vf_debugfs_root);
2834 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2835 			dev_warn(&pdev->dev, "could not create debugfs"
2836 				 " directory");
2837 		else
2838 			setup_debugfs(adapter);
2839 	}
2840 
2841 	/*
2842 	 * See what interrupts we'll be using.  If we've been configured to
2843 	 * use MSI-X interrupts, try to enable them but fall back to using
2844 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2845 	 * get MSI interrupts we bail with the error.
2846 	 */
2847 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2848 		adapter->flags |= USING_MSIX;
2849 	else {
2850 		err = pci_enable_msi(pdev);
2851 		if (err) {
2852 			dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2853 				" err=%d\n",
2854 				msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2855 			goto err_free_debugfs;
2856 		}
2857 		adapter->flags |= USING_MSI;
2858 	}
2859 
2860 	/*
2861 	 * Now that we know how many "ports" we have and what their types are,
2862 	 * and how many Queue Sets we can support, we can configure our queue
2863 	 * resources.
2864 	 */
2865 	cfg_queues(adapter);
2866 
2867 	/*
2868 	 * Print a short notice on the existence and configuration of the new
2869 	 * VF network device ...
2870 	 */
2871 	for_each_port(adapter, pidx) {
2872 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2873 			 adapter->port[pidx]->name,
2874 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2875 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2876 	}
2877 
2878 	/*
2879 	 * Return success!
2880 	 */
2881 	return 0;
2882 
2883 	/*
2884 	 * Error recovery and exit code.  Unwind state that's been created
2885 	 * so far and return the error.
2886 	 */
2887 
2888 err_free_debugfs:
2889 	if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2890 		cleanup_debugfs(adapter);
2891 		debugfs_remove_recursive(adapter->debugfs_root);
2892 	}
2893 
2894 err_free_dev:
2895 	for_each_port(adapter, pidx) {
2896 		netdev = adapter->port[pidx];
2897 		if (netdev == NULL)
2898 			continue;
2899 		pi = netdev_priv(netdev);
2900 		t4vf_free_vi(adapter, pi->viid);
2901 		if (test_bit(pidx, &adapter->registered_device_map))
2902 			unregister_netdev(netdev);
2903 		free_netdev(netdev);
2904 	}
2905 
2906 err_unmap_bar:
2907 	if (!is_t4(adapter->params.chip))
2908 		iounmap(adapter->bar2);
2909 
2910 err_unmap_bar0:
2911 	iounmap(adapter->regs);
2912 
2913 err_free_adapter:
2914 	kfree(adapter);
2915 
2916 err_release_regions:
2917 	pci_release_regions(pdev);
2918 	pci_clear_master(pdev);
2919 
2920 err_disable_device:
2921 	pci_disable_device(pdev);
2922 
2923 	return err;
2924 }
2925 
2926 /*
2927  * "Remove" a device: tear down all kernel and driver state created in the
2928  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2929  * that this is called "remove_one" in the PF Driver.)
2930  */
2931 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
2932 {
2933 	struct adapter *adapter = pci_get_drvdata(pdev);
2934 
2935 	/*
2936 	 * Tear down driver state associated with device.
2937 	 */
2938 	if (adapter) {
2939 		int pidx;
2940 
2941 		/*
2942 		 * Stop all of our activity.  Unregister network port,
2943 		 * disable interrupts, etc.
2944 		 */
2945 		for_each_port(adapter, pidx)
2946 			if (test_bit(pidx, &adapter->registered_device_map))
2947 				unregister_netdev(adapter->port[pidx]);
2948 		t4vf_sge_stop(adapter);
2949 		if (adapter->flags & USING_MSIX) {
2950 			pci_disable_msix(adapter->pdev);
2951 			adapter->flags &= ~USING_MSIX;
2952 		} else if (adapter->flags & USING_MSI) {
2953 			pci_disable_msi(adapter->pdev);
2954 			adapter->flags &= ~USING_MSI;
2955 		}
2956 
2957 		/*
2958 		 * Tear down our debugfs entries.
2959 		 */
2960 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2961 			cleanup_debugfs(adapter);
2962 			debugfs_remove_recursive(adapter->debugfs_root);
2963 		}
2964 
2965 		/*
2966 		 * Free all of the various resources which we've acquired ...
2967 		 */
2968 		t4vf_free_sge_resources(adapter);
2969 		for_each_port(adapter, pidx) {
2970 			struct net_device *netdev = adapter->port[pidx];
2971 			struct port_info *pi;
2972 
2973 			if (netdev == NULL)
2974 				continue;
2975 
2976 			pi = netdev_priv(netdev);
2977 			t4vf_free_vi(adapter, pi->viid);
2978 			free_netdev(netdev);
2979 		}
2980 		iounmap(adapter->regs);
2981 		if (!is_t4(adapter->params.chip))
2982 			iounmap(adapter->bar2);
2983 		kfree(adapter);
2984 	}
2985 
2986 	/*
2987 	 * Disable the device and release its PCI resources.
2988 	 */
2989 	pci_disable_device(pdev);
2990 	pci_clear_master(pdev);
2991 	pci_release_regions(pdev);
2992 }
2993 
2994 /*
2995  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2996  * delivery.
2997  */
2998 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2999 {
3000 	struct adapter *adapter;
3001 	int pidx;
3002 
3003 	adapter = pci_get_drvdata(pdev);
3004 	if (!adapter)
3005 		return;
3006 
3007 	/* Disable all Virtual Interfaces.  This will shut down the
3008 	 * delivery of all ingress packets into the chip for these
3009 	 * Virtual Interfaces.
3010 	 */
3011 	for_each_port(adapter, pidx)
3012 		if (test_bit(pidx, &adapter->registered_device_map))
3013 			unregister_netdev(adapter->port[pidx]);
3014 
3015 	/* Free up all Queues which will prevent further DMA and
3016 	 * Interrupts allowing various internal pathways to drain.
3017 	 */
3018 	t4vf_sge_stop(adapter);
3019 	if (adapter->flags & USING_MSIX) {
3020 		pci_disable_msix(adapter->pdev);
3021 		adapter->flags &= ~USING_MSIX;
3022 	} else if (adapter->flags & USING_MSI) {
3023 		pci_disable_msi(adapter->pdev);
3024 		adapter->flags &= ~USING_MSI;
3025 	}
3026 
3027 	/*
3028 	 * Free up all Queues which will prevent further DMA and
3029 	 * Interrupts allowing various internal pathways to drain.
3030 	 */
3031 	t4vf_free_sge_resources(adapter);
3032 	pci_set_drvdata(pdev, NULL);
3033 }
3034 
3035 /* Macros needed to support the PCI Device ID Table ...
3036  */
3037 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3038 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3039 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3040 
3041 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3042 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3043 
3044 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3045 
3046 #include "../cxgb4/t4_pci_id_tbl.h"
3047 
3048 MODULE_DESCRIPTION(DRV_DESC);
3049 MODULE_AUTHOR("Chelsio Communications");
3050 MODULE_LICENSE("Dual BSD/GPL");
3051 MODULE_VERSION(DRV_VERSION);
3052 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3053 
3054 static struct pci_driver cxgb4vf_driver = {
3055 	.name		= KBUILD_MODNAME,
3056 	.id_table	= cxgb4vf_pci_tbl,
3057 	.probe		= cxgb4vf_pci_probe,
3058 	.remove		= cxgb4vf_pci_remove,
3059 	.shutdown	= cxgb4vf_pci_shutdown,
3060 };
3061 
3062 /*
3063  * Initialize global driver state.
3064  */
3065 static int __init cxgb4vf_module_init(void)
3066 {
3067 	int ret;
3068 
3069 	/*
3070 	 * Vet our module parameters.
3071 	 */
3072 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3073 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3074 			msi, MSI_MSIX, MSI_MSI);
3075 		return -EINVAL;
3076 	}
3077 
3078 	/* Debugfs support is optional, just warn if this fails */
3079 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3080 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3081 		pr_warn("could not create debugfs entry, continuing\n");
3082 
3083 	ret = pci_register_driver(&cxgb4vf_driver);
3084 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3085 		debugfs_remove(cxgb4vf_debugfs_root);
3086 	return ret;
3087 }
3088 
3089 /*
3090  * Tear down global driver state.
3091  */
3092 static void __exit cxgb4vf_module_exit(void)
3093 {
3094 	pci_unregister_driver(&cxgb4vf_driver);
3095 	debugfs_remove(cxgb4vf_debugfs_root);
3096 }
3097 
3098 module_init(cxgb4vf_module_init);
3099 module_exit(cxgb4vf_module_exit);
3100