xref: /linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision a5d9265e017f081f0dc133c0e2f45103d027b874)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_VERSION "2.0.0-ko"
59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
60 
61 /*
62  * Module Parameters.
63  * ==================
64  */
65 
66 /*
67  * Default ethtool "message level" for adapters.
68  */
69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72 
73 /*
74  * The driver uses the best interrupt scheme available on a platform in the
75  * order MSI-X then MSI.  This parameter determines which of these schemes the
76  * driver may consider as follows:
77  *
78  *     msi = 2: choose from among MSI-X and MSI
79  *     msi = 1: only consider MSI interrupts
80  *
81  * Note that unlike the Physical Function driver, this Virtual Function driver
82  * does _not_ support legacy INTx interrupts (this limitation is mandated by
83  * the PCI-E SR-IOV standard).
84  */
85 #define MSI_MSIX	2
86 #define MSI_MSI		1
87 #define MSI_DEFAULT	MSI_MSIX
88 
89 static int msi = MSI_DEFAULT;
90 
91 module_param(msi, int, 0644);
92 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
93 
94 /*
95  * Fundamental constants.
96  * ======================
97  */
98 
99 enum {
100 	MAX_TXQ_ENTRIES		= 16384,
101 	MAX_RSPQ_ENTRIES	= 16384,
102 	MAX_RX_BUFFERS		= 16384,
103 
104 	MIN_TXQ_ENTRIES		= 32,
105 	MIN_RSPQ_ENTRIES	= 128,
106 	MIN_FL_ENTRIES		= 16,
107 
108 	/*
109 	 * For purposes of manipulating the Free List size we need to
110 	 * recognize that Free Lists are actually Egress Queues (the host
111 	 * produces free buffers which the hardware consumes), Egress Queues
112 	 * indices are all in units of Egress Context Units bytes, and free
113 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
114 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
115 	 * always have at least one Egress Unit's worth of Free List entries
116 	 * unused.  See sge.c for more details ...
117 	 */
118 	EQ_UNIT = SGE_EQ_IDXSIZE,
119 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
120 	MIN_FL_RESID = FL_PER_EQ_UNIT,
121 };
122 
123 /*
124  * Global driver state.
125  * ====================
126  */
127 
128 static struct dentry *cxgb4vf_debugfs_root;
129 
130 /*
131  * OS "Callback" functions.
132  * ========================
133  */
134 
135 /*
136  * The link status has changed on the indicated "port" (Virtual Interface).
137  */
138 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
139 {
140 	struct net_device *dev = adapter->port[pidx];
141 
142 	/*
143 	 * If the port is disabled or the current recorded "link up"
144 	 * status matches the new status, just return.
145 	 */
146 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
147 		return;
148 
149 	/*
150 	 * Tell the OS that the link status has changed and print a short
151 	 * informative message on the console about the event.
152 	 */
153 	if (link_ok) {
154 		const char *s;
155 		const char *fc;
156 		const struct port_info *pi = netdev_priv(dev);
157 
158 		switch (pi->link_cfg.speed) {
159 		case 100:
160 			s = "100Mbps";
161 			break;
162 		case 1000:
163 			s = "1Gbps";
164 			break;
165 		case 10000:
166 			s = "10Gbps";
167 			break;
168 		case 25000:
169 			s = "25Gbps";
170 			break;
171 		case 40000:
172 			s = "40Gbps";
173 			break;
174 		case 100000:
175 			s = "100Gbps";
176 			break;
177 
178 		default:
179 			s = "unknown";
180 			break;
181 		}
182 
183 		switch ((int)pi->link_cfg.fc) {
184 		case PAUSE_RX:
185 			fc = "RX";
186 			break;
187 
188 		case PAUSE_TX:
189 			fc = "TX";
190 			break;
191 
192 		case PAUSE_RX | PAUSE_TX:
193 			fc = "RX/TX";
194 			break;
195 
196 		default:
197 			fc = "no";
198 			break;
199 		}
200 
201 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
202 	} else {
203 		netdev_info(dev, "link down\n");
204 	}
205 }
206 
207 /*
208  * THe port module type has changed on the indicated "port" (Virtual
209  * Interface).
210  */
211 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
212 {
213 	static const char * const mod_str[] = {
214 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
215 	};
216 	const struct net_device *dev = adapter->port[pidx];
217 	const struct port_info *pi = netdev_priv(dev);
218 
219 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
220 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
221 			 dev->name);
222 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
223 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
224 			 dev->name, mod_str[pi->mod_type]);
225 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
226 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
227 			 "module inserted\n", dev->name);
228 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
229 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
230 			 "forcing TWINAX\n", dev->name);
231 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
232 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
233 			 dev->name);
234 	else
235 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
236 			 "inserted\n", dev->name, pi->mod_type);
237 }
238 
239 static int cxgb4vf_set_addr_hash(struct port_info *pi)
240 {
241 	struct adapter *adapter = pi->adapter;
242 	u64 vec = 0;
243 	bool ucast = false;
244 	struct hash_mac_addr *entry;
245 
246 	/* Calculate the hash vector for the updated list and program it */
247 	list_for_each_entry(entry, &adapter->mac_hlist, list) {
248 		ucast |= is_unicast_ether_addr(entry->addr);
249 		vec |= (1ULL << hash_mac_addr(entry->addr));
250 	}
251 	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
252 }
253 
254 /**
255  *	cxgb4vf_change_mac - Update match filter for a MAC address.
256  *	@pi: the port_info
257  *	@viid: the VI id
258  *	@tcam_idx: TCAM index of existing filter for old value of MAC address,
259  *		   or -1
260  *	@addr: the new MAC address value
261  *	@persist: whether a new MAC allocation should be persistent
262  *	@add_smt: if true also add the address to the HW SMT
263  *
264  *	Modifies an MPS filter and sets it to the new MAC address if
265  *	@tcam_idx >= 0, or adds the MAC address to a new filter if
266  *	@tcam_idx < 0. In the latter case the address is added persistently
267  *	if @persist is %true.
268  *	Addresses are programmed to hash region, if tcam runs out of entries.
269  *
270  */
271 static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid,
272 			      int *tcam_idx, const u8 *addr, bool persistent)
273 {
274 	struct hash_mac_addr *new_entry, *entry;
275 	struct adapter *adapter = pi->adapter;
276 	int ret;
277 
278 	ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent);
279 	/* We ran out of TCAM entries. try programming hash region. */
280 	if (ret == -ENOMEM) {
281 		/* If the MAC address to be updated is in the hash addr
282 		 * list, update it from the list
283 		 */
284 		list_for_each_entry(entry, &adapter->mac_hlist, list) {
285 			if (entry->iface_mac) {
286 				ether_addr_copy(entry->addr, addr);
287 				goto set_hash;
288 			}
289 		}
290 		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
291 		if (!new_entry)
292 			return -ENOMEM;
293 		ether_addr_copy(new_entry->addr, addr);
294 		new_entry->iface_mac = true;
295 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
296 set_hash:
297 		ret = cxgb4vf_set_addr_hash(pi);
298 	} else if (ret >= 0) {
299 		*tcam_idx = ret;
300 		ret = 0;
301 	}
302 
303 	return ret;
304 }
305 
306 /*
307  * Net device operations.
308  * ======================
309  */
310 
311 
312 
313 
314 /*
315  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
316  * Interface).
317  */
318 static int link_start(struct net_device *dev)
319 {
320 	int ret;
321 	struct port_info *pi = netdev_priv(dev);
322 
323 	/*
324 	 * We do not set address filters and promiscuity here, the stack does
325 	 * that step explicitly. Enable vlan accel.
326 	 */
327 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
328 			      true);
329 	if (ret == 0)
330 		ret = cxgb4vf_change_mac(pi, pi->viid,
331 					 &pi->xact_addr_filt,
332 					 dev->dev_addr, true);
333 
334 	/*
335 	 * We don't need to actually "start the link" itself since the
336 	 * firmware will do that for us when the first Virtual Interface
337 	 * is enabled on a port.
338 	 */
339 	if (ret == 0)
340 		ret = t4vf_enable_pi(pi->adapter, pi, true, true);
341 
342 	/* The Virtual Interfaces are connected to an internal switch on the
343 	 * chip which allows VIs attached to the same port to talk to each
344 	 * other even when the port link is down.  As a result, we generally
345 	 * want to always report a VI's link as being "up", provided there are
346 	 * no errors in enabling vi.
347 	 */
348 
349 	if (ret == 0)
350 		netif_carrier_on(dev);
351 
352 	return ret;
353 }
354 
355 /*
356  * Name the MSI-X interrupts.
357  */
358 static void name_msix_vecs(struct adapter *adapter)
359 {
360 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
361 	int pidx;
362 
363 	/*
364 	 * Firmware events.
365 	 */
366 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
367 		 "%s-FWeventq", adapter->name);
368 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
369 
370 	/*
371 	 * Ethernet queues.
372 	 */
373 	for_each_port(adapter, pidx) {
374 		struct net_device *dev = adapter->port[pidx];
375 		const struct port_info *pi = netdev_priv(dev);
376 		int qs, msi;
377 
378 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
379 			snprintf(adapter->msix_info[msi].desc, namelen,
380 				 "%s-%d", dev->name, qs);
381 			adapter->msix_info[msi].desc[namelen] = 0;
382 		}
383 	}
384 }
385 
386 /*
387  * Request all of our MSI-X resources.
388  */
389 static int request_msix_queue_irqs(struct adapter *adapter)
390 {
391 	struct sge *s = &adapter->sge;
392 	int rxq, msi, err;
393 
394 	/*
395 	 * Firmware events.
396 	 */
397 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
398 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
399 	if (err)
400 		return err;
401 
402 	/*
403 	 * Ethernet queues.
404 	 */
405 	msi = MSIX_IQFLINT;
406 	for_each_ethrxq(s, rxq) {
407 		err = request_irq(adapter->msix_info[msi].vec,
408 				  t4vf_sge_intr_msix, 0,
409 				  adapter->msix_info[msi].desc,
410 				  &s->ethrxq[rxq].rspq);
411 		if (err)
412 			goto err_free_irqs;
413 		msi++;
414 	}
415 	return 0;
416 
417 err_free_irqs:
418 	while (--rxq >= 0)
419 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
420 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
421 	return err;
422 }
423 
424 /*
425  * Free our MSI-X resources.
426  */
427 static void free_msix_queue_irqs(struct adapter *adapter)
428 {
429 	struct sge *s = &adapter->sge;
430 	int rxq, msi;
431 
432 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
433 	msi = MSIX_IQFLINT;
434 	for_each_ethrxq(s, rxq)
435 		free_irq(adapter->msix_info[msi++].vec,
436 			 &s->ethrxq[rxq].rspq);
437 }
438 
439 /*
440  * Turn on NAPI and start up interrupts on a response queue.
441  */
442 static void qenable(struct sge_rspq *rspq)
443 {
444 	napi_enable(&rspq->napi);
445 
446 	/*
447 	 * 0-increment the Going To Sleep register to start the timer and
448 	 * enable interrupts.
449 	 */
450 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
451 		     CIDXINC_V(0) |
452 		     SEINTARM_V(rspq->intr_params) |
453 		     INGRESSQID_V(rspq->cntxt_id));
454 }
455 
456 /*
457  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
458  */
459 static void enable_rx(struct adapter *adapter)
460 {
461 	int rxq;
462 	struct sge *s = &adapter->sge;
463 
464 	for_each_ethrxq(s, rxq)
465 		qenable(&s->ethrxq[rxq].rspq);
466 	qenable(&s->fw_evtq);
467 
468 	/*
469 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
470 	 * its Going To Sleep register here to get it started.
471 	 */
472 	if (adapter->flags & USING_MSI)
473 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
474 			     CIDXINC_V(0) |
475 			     SEINTARM_V(s->intrq.intr_params) |
476 			     INGRESSQID_V(s->intrq.cntxt_id));
477 
478 }
479 
480 /*
481  * Wait until all NAPI handlers are descheduled.
482  */
483 static void quiesce_rx(struct adapter *adapter)
484 {
485 	struct sge *s = &adapter->sge;
486 	int rxq;
487 
488 	for_each_ethrxq(s, rxq)
489 		napi_disable(&s->ethrxq[rxq].rspq.napi);
490 	napi_disable(&s->fw_evtq.napi);
491 }
492 
493 /*
494  * Response queue handler for the firmware event queue.
495  */
496 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
497 			  const struct pkt_gl *gl)
498 {
499 	/*
500 	 * Extract response opcode and get pointer to CPL message body.
501 	 */
502 	struct adapter *adapter = rspq->adapter;
503 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
504 	void *cpl = (void *)(rsp + 1);
505 
506 	switch (opcode) {
507 	case CPL_FW6_MSG: {
508 		/*
509 		 * We've received an asynchronous message from the firmware.
510 		 */
511 		const struct cpl_fw6_msg *fw_msg = cpl;
512 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
513 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
514 		break;
515 	}
516 
517 	case CPL_FW4_MSG: {
518 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
519 		 */
520 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
521 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
522 		if (opcode != CPL_SGE_EGR_UPDATE) {
523 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
524 				, opcode);
525 			break;
526 		}
527 		cpl = (void *)p;
528 		/*FALLTHROUGH*/
529 	}
530 
531 	case CPL_SGE_EGR_UPDATE: {
532 		/*
533 		 * We've received an Egress Queue Status Update message.  We
534 		 * get these, if the SGE is configured to send these when the
535 		 * firmware passes certain points in processing our TX
536 		 * Ethernet Queue or if we make an explicit request for one.
537 		 * We use these updates to determine when we may need to
538 		 * restart a TX Ethernet Queue which was stopped for lack of
539 		 * free TX Queue Descriptors ...
540 		 */
541 		const struct cpl_sge_egr_update *p = cpl;
542 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
543 		struct sge *s = &adapter->sge;
544 		struct sge_txq *tq;
545 		struct sge_eth_txq *txq;
546 		unsigned int eq_idx;
547 
548 		/*
549 		 * Perform sanity checking on the Queue ID to make sure it
550 		 * really refers to one of our TX Ethernet Egress Queues which
551 		 * is active and matches the queue's ID.  None of these error
552 		 * conditions should ever happen so we may want to either make
553 		 * them fatal and/or conditionalized under DEBUG.
554 		 */
555 		eq_idx = EQ_IDX(s, qid);
556 		if (unlikely(eq_idx >= MAX_EGRQ)) {
557 			dev_err(adapter->pdev_dev,
558 				"Egress Update QID %d out of range\n", qid);
559 			break;
560 		}
561 		tq = s->egr_map[eq_idx];
562 		if (unlikely(tq == NULL)) {
563 			dev_err(adapter->pdev_dev,
564 				"Egress Update QID %d TXQ=NULL\n", qid);
565 			break;
566 		}
567 		txq = container_of(tq, struct sge_eth_txq, q);
568 		if (unlikely(tq->abs_id != qid)) {
569 			dev_err(adapter->pdev_dev,
570 				"Egress Update QID %d refers to TXQ %d\n",
571 				qid, tq->abs_id);
572 			break;
573 		}
574 
575 		/*
576 		 * Restart a stopped TX Queue which has less than half of its
577 		 * TX ring in use ...
578 		 */
579 		txq->q.restarts++;
580 		netif_tx_wake_queue(txq->txq);
581 		break;
582 	}
583 
584 	default:
585 		dev_err(adapter->pdev_dev,
586 			"unexpected CPL %#x on FW event queue\n", opcode);
587 	}
588 
589 	return 0;
590 }
591 
592 /*
593  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
594  * to use and initializes them.  We support multiple "Queue Sets" per port if
595  * we have MSI-X, otherwise just one queue set per port.
596  */
597 static int setup_sge_queues(struct adapter *adapter)
598 {
599 	struct sge *s = &adapter->sge;
600 	int err, pidx, msix;
601 
602 	/*
603 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
604 	 * state.
605 	 */
606 	bitmap_zero(s->starving_fl, MAX_EGRQ);
607 
608 	/*
609 	 * If we're using MSI interrupt mode we need to set up a "forwarded
610 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
611 	 * of the ingress queues will be set up to forward their interrupts to
612 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
613 	 * the intrq's queue ID as the interrupt forwarding queue for the
614 	 * subsequent calls ...
615 	 */
616 	if (adapter->flags & USING_MSI) {
617 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
618 					 adapter->port[0], 0, NULL, NULL);
619 		if (err)
620 			goto err_free_queues;
621 	}
622 
623 	/*
624 	 * Allocate our ingress queue for asynchronous firmware messages.
625 	 */
626 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
627 				 MSIX_FW, NULL, fwevtq_handler);
628 	if (err)
629 		goto err_free_queues;
630 
631 	/*
632 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
633 	 * later on ... up to the point where any interface on the adapter is
634 	 * brought up at which point lots of things get nailed down
635 	 * permanently ...
636 	 */
637 	msix = MSIX_IQFLINT;
638 	for_each_port(adapter, pidx) {
639 		struct net_device *dev = adapter->port[pidx];
640 		struct port_info *pi = netdev_priv(dev);
641 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
642 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
643 		int qs;
644 
645 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
646 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
647 						 dev, msix++,
648 						 &rxq->fl, t4vf_ethrx_handler);
649 			if (err)
650 				goto err_free_queues;
651 
652 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
653 					     netdev_get_tx_queue(dev, qs),
654 					     s->fw_evtq.cntxt_id);
655 			if (err)
656 				goto err_free_queues;
657 
658 			rxq->rspq.idx = qs;
659 			memset(&rxq->stats, 0, sizeof(rxq->stats));
660 		}
661 	}
662 
663 	/*
664 	 * Create the reverse mappings for the queues.
665 	 */
666 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
667 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
668 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
669 	for_each_port(adapter, pidx) {
670 		struct net_device *dev = adapter->port[pidx];
671 		struct port_info *pi = netdev_priv(dev);
672 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
673 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
674 		int qs;
675 
676 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
677 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
678 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
679 
680 			/*
681 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
682 			 * for Free Lists but since all of the Egress Queues
683 			 * (including Free Lists) have Relative Queue IDs
684 			 * which are computed as Absolute - Base Queue ID, we
685 			 * can synthesize the Absolute Queue IDs for the Free
686 			 * Lists.  This is useful for debugging purposes when
687 			 * we want to dump Queue Contexts via the PF Driver.
688 			 */
689 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
690 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
691 		}
692 	}
693 	return 0;
694 
695 err_free_queues:
696 	t4vf_free_sge_resources(adapter);
697 	return err;
698 }
699 
700 /*
701  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
702  * queues.  We configure the RSS CPU lookup table to distribute to the number
703  * of HW receive queues, and the response queue lookup table to narrow that
704  * down to the response queues actually configured for each "port" (Virtual
705  * Interface).  We always configure the RSS mapping for all ports since the
706  * mapping table has plenty of entries.
707  */
708 static int setup_rss(struct adapter *adapter)
709 {
710 	int pidx;
711 
712 	for_each_port(adapter, pidx) {
713 		struct port_info *pi = adap2pinfo(adapter, pidx);
714 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
715 		u16 rss[MAX_PORT_QSETS];
716 		int qs, err;
717 
718 		for (qs = 0; qs < pi->nqsets; qs++)
719 			rss[qs] = rxq[qs].rspq.abs_id;
720 
721 		err = t4vf_config_rss_range(adapter, pi->viid,
722 					    0, pi->rss_size, rss, pi->nqsets);
723 		if (err)
724 			return err;
725 
726 		/*
727 		 * Perform Global RSS Mode-specific initialization.
728 		 */
729 		switch (adapter->params.rss.mode) {
730 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
731 			/*
732 			 * If Tunnel All Lookup isn't specified in the global
733 			 * RSS Configuration, then we need to specify a
734 			 * default Ingress Queue for any ingress packets which
735 			 * aren't hashed.  We'll use our first ingress queue
736 			 * ...
737 			 */
738 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
739 				union rss_vi_config config;
740 				err = t4vf_read_rss_vi_config(adapter,
741 							      pi->viid,
742 							      &config);
743 				if (err)
744 					return err;
745 				config.basicvirtual.defaultq =
746 					rxq[0].rspq.abs_id;
747 				err = t4vf_write_rss_vi_config(adapter,
748 							       pi->viid,
749 							       &config);
750 				if (err)
751 					return err;
752 			}
753 			break;
754 		}
755 	}
756 
757 	return 0;
758 }
759 
760 /*
761  * Bring the adapter up.  Called whenever we go from no "ports" open to having
762  * one open.  This function performs the actions necessary to make an adapter
763  * operational, such as completing the initialization of HW modules, and
764  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
765  * this is called "cxgb_up" in the PF Driver.)
766  */
767 static int adapter_up(struct adapter *adapter)
768 {
769 	int err;
770 
771 	/*
772 	 * If this is the first time we've been called, perform basic
773 	 * adapter setup.  Once we've done this, many of our adapter
774 	 * parameters can no longer be changed ...
775 	 */
776 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
777 		err = setup_sge_queues(adapter);
778 		if (err)
779 			return err;
780 		err = setup_rss(adapter);
781 		if (err) {
782 			t4vf_free_sge_resources(adapter);
783 			return err;
784 		}
785 
786 		if (adapter->flags & USING_MSIX)
787 			name_msix_vecs(adapter);
788 
789 		adapter->flags |= FULL_INIT_DONE;
790 	}
791 
792 	/*
793 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
794 	 */
795 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
796 	if (adapter->flags & USING_MSIX)
797 		err = request_msix_queue_irqs(adapter);
798 	else
799 		err = request_irq(adapter->pdev->irq,
800 				  t4vf_intr_handler(adapter), 0,
801 				  adapter->name, adapter);
802 	if (err) {
803 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
804 			err);
805 		return err;
806 	}
807 
808 	/*
809 	 * Enable NAPI ingress processing and return success.
810 	 */
811 	enable_rx(adapter);
812 	t4vf_sge_start(adapter);
813 
814 	return 0;
815 }
816 
817 /*
818  * Bring the adapter down.  Called whenever the last "port" (Virtual
819  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
820  * Driver.)
821  */
822 static void adapter_down(struct adapter *adapter)
823 {
824 	/*
825 	 * Free interrupt resources.
826 	 */
827 	if (adapter->flags & USING_MSIX)
828 		free_msix_queue_irqs(adapter);
829 	else
830 		free_irq(adapter->pdev->irq, adapter);
831 
832 	/*
833 	 * Wait for NAPI handlers to finish.
834 	 */
835 	quiesce_rx(adapter);
836 }
837 
838 /*
839  * Start up a net device.
840  */
841 static int cxgb4vf_open(struct net_device *dev)
842 {
843 	int err;
844 	struct port_info *pi = netdev_priv(dev);
845 	struct adapter *adapter = pi->adapter;
846 
847 	/*
848 	 * If this is the first interface that we're opening on the "adapter",
849 	 * bring the "adapter" up now.
850 	 */
851 	if (adapter->open_device_map == 0) {
852 		err = adapter_up(adapter);
853 		if (err)
854 			return err;
855 	}
856 
857 	/* It's possible that the basic port information could have
858 	 * changed since we first read it.
859 	 */
860 	err = t4vf_update_port_info(pi);
861 	if (err < 0)
862 		return err;
863 
864 	/*
865 	 * Note that this interface is up and start everything up ...
866 	 */
867 	err = link_start(dev);
868 	if (err)
869 		goto err_unwind;
870 
871 	pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
872 
873 	netif_tx_start_all_queues(dev);
874 	set_bit(pi->port_id, &adapter->open_device_map);
875 	return 0;
876 
877 err_unwind:
878 	if (adapter->open_device_map == 0)
879 		adapter_down(adapter);
880 	return err;
881 }
882 
883 /*
884  * Shut down a net device.  This routine is called "cxgb_close" in the PF
885  * Driver ...
886  */
887 static int cxgb4vf_stop(struct net_device *dev)
888 {
889 	struct port_info *pi = netdev_priv(dev);
890 	struct adapter *adapter = pi->adapter;
891 
892 	netif_tx_stop_all_queues(dev);
893 	netif_carrier_off(dev);
894 	t4vf_enable_pi(adapter, pi, false, false);
895 
896 	clear_bit(pi->port_id, &adapter->open_device_map);
897 	if (adapter->open_device_map == 0)
898 		adapter_down(adapter);
899 	return 0;
900 }
901 
902 /*
903  * Translate our basic statistics into the standard "ifconfig" statistics.
904  */
905 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
906 {
907 	struct t4vf_port_stats stats;
908 	struct port_info *pi = netdev2pinfo(dev);
909 	struct adapter *adapter = pi->adapter;
910 	struct net_device_stats *ns = &dev->stats;
911 	int err;
912 
913 	spin_lock(&adapter->stats_lock);
914 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
915 	spin_unlock(&adapter->stats_lock);
916 
917 	memset(ns, 0, sizeof(*ns));
918 	if (err)
919 		return ns;
920 
921 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
922 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
923 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
924 			  stats.tx_ucast_frames + stats.tx_offload_frames);
925 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
926 			stats.rx_ucast_bytes);
927 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
928 			  stats.rx_ucast_frames);
929 	ns->multicast = stats.rx_mcast_frames;
930 	ns->tx_errors = stats.tx_drop_frames;
931 	ns->rx_errors = stats.rx_err_frames;
932 
933 	return ns;
934 }
935 
936 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
937 {
938 	struct port_info *pi = netdev_priv(netdev);
939 	struct adapter *adapter = pi->adapter;
940 	int ret;
941 	u64 mhash = 0;
942 	u64 uhash = 0;
943 	bool free = false;
944 	bool ucast = is_unicast_ether_addr(mac_addr);
945 	const u8 *maclist[1] = {mac_addr};
946 	struct hash_mac_addr *new_entry;
947 
948 	ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
949 				  NULL, ucast ? &uhash : &mhash, false);
950 	if (ret < 0)
951 		goto out;
952 	/* if hash != 0, then add the addr to hash addr list
953 	 * so on the end we will calculate the hash for the
954 	 * list and program it
955 	 */
956 	if (uhash || mhash) {
957 		new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
958 		if (!new_entry)
959 			return -ENOMEM;
960 		ether_addr_copy(new_entry->addr, mac_addr);
961 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
962 		ret = cxgb4vf_set_addr_hash(pi);
963 	}
964 out:
965 	return ret < 0 ? ret : 0;
966 }
967 
968 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
969 {
970 	struct port_info *pi = netdev_priv(netdev);
971 	struct adapter *adapter = pi->adapter;
972 	int ret;
973 	const u8 *maclist[1] = {mac_addr};
974 	struct hash_mac_addr *entry, *tmp;
975 
976 	/* If the MAC address to be removed is in the hash addr
977 	 * list, delete it from the list and update hash vector
978 	 */
979 	list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
980 		if (ether_addr_equal(entry->addr, mac_addr)) {
981 			list_del(&entry->list);
982 			kfree(entry);
983 			return cxgb4vf_set_addr_hash(pi);
984 		}
985 	}
986 
987 	ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
988 	return ret < 0 ? -EINVAL : 0;
989 }
990 
991 /*
992  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
993  * If @mtu is -1 it is left unchanged.
994  */
995 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
996 {
997 	struct port_info *pi = netdev_priv(dev);
998 
999 	__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1000 	__dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1001 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
1002 			       (dev->flags & IFF_PROMISC) != 0,
1003 			       (dev->flags & IFF_ALLMULTI) != 0,
1004 			       1, -1, sleep_ok);
1005 }
1006 
1007 /*
1008  * Set the current receive modes on the device.
1009  */
1010 static void cxgb4vf_set_rxmode(struct net_device *dev)
1011 {
1012 	/* unfortunately we can't return errors to the stack */
1013 	set_rxmode(dev, -1, false);
1014 }
1015 
1016 /*
1017  * Find the entry in the interrupt holdoff timer value array which comes
1018  * closest to the specified interrupt holdoff value.
1019  */
1020 static int closest_timer(const struct sge *s, int us)
1021 {
1022 	int i, timer_idx = 0, min_delta = INT_MAX;
1023 
1024 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1025 		int delta = us - s->timer_val[i];
1026 		if (delta < 0)
1027 			delta = -delta;
1028 		if (delta < min_delta) {
1029 			min_delta = delta;
1030 			timer_idx = i;
1031 		}
1032 	}
1033 	return timer_idx;
1034 }
1035 
1036 static int closest_thres(const struct sge *s, int thres)
1037 {
1038 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1039 
1040 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1041 		delta = thres - s->counter_val[i];
1042 		if (delta < 0)
1043 			delta = -delta;
1044 		if (delta < min_delta) {
1045 			min_delta = delta;
1046 			pktcnt_idx = i;
1047 		}
1048 	}
1049 	return pktcnt_idx;
1050 }
1051 
1052 /*
1053  * Return a queue's interrupt hold-off time in us.  0 means no timer.
1054  */
1055 static unsigned int qtimer_val(const struct adapter *adapter,
1056 			       const struct sge_rspq *rspq)
1057 {
1058 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1059 
1060 	return timer_idx < SGE_NTIMERS
1061 		? adapter->sge.timer_val[timer_idx]
1062 		: 0;
1063 }
1064 
1065 /**
1066  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1067  *	@adapter: the adapter
1068  *	@rspq: the RX response queue
1069  *	@us: the hold-off time in us, or 0 to disable timer
1070  *	@cnt: the hold-off packet count, or 0 to disable counter
1071  *
1072  *	Sets an RX response queue's interrupt hold-off time and packet count.
1073  *	At least one of the two needs to be enabled for the queue to generate
1074  *	interrupts.
1075  */
1076 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1077 			       unsigned int us, unsigned int cnt)
1078 {
1079 	unsigned int timer_idx;
1080 
1081 	/*
1082 	 * If both the interrupt holdoff timer and count are specified as
1083 	 * zero, default to a holdoff count of 1 ...
1084 	 */
1085 	if ((us | cnt) == 0)
1086 		cnt = 1;
1087 
1088 	/*
1089 	 * If an interrupt holdoff count has been specified, then find the
1090 	 * closest configured holdoff count and use that.  If the response
1091 	 * queue has already been created, then update its queue context
1092 	 * parameters ...
1093 	 */
1094 	if (cnt) {
1095 		int err;
1096 		u32 v, pktcnt_idx;
1097 
1098 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1099 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1100 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1101 			    FW_PARAMS_PARAM_X_V(
1102 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1103 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1104 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1105 			if (err)
1106 				return err;
1107 		}
1108 		rspq->pktcnt_idx = pktcnt_idx;
1109 	}
1110 
1111 	/*
1112 	 * Compute the closest holdoff timer index from the supplied holdoff
1113 	 * timer value.
1114 	 */
1115 	timer_idx = (us == 0
1116 		     ? SGE_TIMER_RSTRT_CNTR
1117 		     : closest_timer(&adapter->sge, us));
1118 
1119 	/*
1120 	 * Update the response queue's interrupt coalescing parameters and
1121 	 * return success.
1122 	 */
1123 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1124 			     QINTR_CNT_EN_V(cnt > 0));
1125 	return 0;
1126 }
1127 
1128 /*
1129  * Return a version number to identify the type of adapter.  The scheme is:
1130  * - bits 0..9: chip version
1131  * - bits 10..15: chip revision
1132  */
1133 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1134 {
1135 	/*
1136 	 * Chip version 4, revision 0x3f (cxgb4vf).
1137 	 */
1138 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1139 }
1140 
1141 /*
1142  * Execute the specified ioctl command.
1143  */
1144 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1145 {
1146 	int ret = 0;
1147 
1148 	switch (cmd) {
1149 	    /*
1150 	     * The VF Driver doesn't have access to any of the other
1151 	     * common Ethernet device ioctl()'s (like reading/writing
1152 	     * PHY registers, etc.
1153 	     */
1154 
1155 	default:
1156 		ret = -EOPNOTSUPP;
1157 		break;
1158 	}
1159 	return ret;
1160 }
1161 
1162 /*
1163  * Change the device's MTU.
1164  */
1165 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1166 {
1167 	int ret;
1168 	struct port_info *pi = netdev_priv(dev);
1169 
1170 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1171 			      -1, -1, -1, -1, true);
1172 	if (!ret)
1173 		dev->mtu = new_mtu;
1174 	return ret;
1175 }
1176 
1177 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1178 	netdev_features_t features)
1179 {
1180 	/*
1181 	 * Since there is no support for separate rx/tx vlan accel
1182 	 * enable/disable make sure tx flag is always in same state as rx.
1183 	 */
1184 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1185 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1186 	else
1187 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1188 
1189 	return features;
1190 }
1191 
1192 static int cxgb4vf_set_features(struct net_device *dev,
1193 	netdev_features_t features)
1194 {
1195 	struct port_info *pi = netdev_priv(dev);
1196 	netdev_features_t changed = dev->features ^ features;
1197 
1198 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1199 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1200 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1201 
1202 	return 0;
1203 }
1204 
1205 /*
1206  * Change the devices MAC address.
1207  */
1208 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1209 {
1210 	int ret;
1211 	struct sockaddr *addr = _addr;
1212 	struct port_info *pi = netdev_priv(dev);
1213 
1214 	if (!is_valid_ether_addr(addr->sa_data))
1215 		return -EADDRNOTAVAIL;
1216 
1217 	ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
1218 				 addr->sa_data, true);
1219 	if (ret < 0)
1220 		return ret;
1221 
1222 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1223 	return 0;
1224 }
1225 
1226 #ifdef CONFIG_NET_POLL_CONTROLLER
1227 /*
1228  * Poll all of our receive queues.  This is called outside of normal interrupt
1229  * context.
1230  */
1231 static void cxgb4vf_poll_controller(struct net_device *dev)
1232 {
1233 	struct port_info *pi = netdev_priv(dev);
1234 	struct adapter *adapter = pi->adapter;
1235 
1236 	if (adapter->flags & USING_MSIX) {
1237 		struct sge_eth_rxq *rxq;
1238 		int nqsets;
1239 
1240 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1241 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1242 			t4vf_sge_intr_msix(0, &rxq->rspq);
1243 			rxq++;
1244 		}
1245 	} else
1246 		t4vf_intr_handler(adapter)(0, adapter);
1247 }
1248 #endif
1249 
1250 /*
1251  * Ethtool operations.
1252  * ===================
1253  *
1254  * Note that we don't support any ethtool operations which change the physical
1255  * state of the port to which we're linked.
1256  */
1257 
1258 /**
1259  *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1260  *	@port_type: Firmware Port Type
1261  *	@mod_type: Firmware Module Type
1262  *
1263  *	Translate Firmware Port/Module type to Ethtool Port Type.
1264  */
1265 static int from_fw_port_mod_type(enum fw_port_type port_type,
1266 				 enum fw_port_module_type mod_type)
1267 {
1268 	if (port_type == FW_PORT_TYPE_BT_SGMII ||
1269 	    port_type == FW_PORT_TYPE_BT_XFI ||
1270 	    port_type == FW_PORT_TYPE_BT_XAUI) {
1271 		return PORT_TP;
1272 	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1273 		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1274 		return PORT_FIBRE;
1275 	} else if (port_type == FW_PORT_TYPE_SFP ||
1276 		   port_type == FW_PORT_TYPE_QSFP_10G ||
1277 		   port_type == FW_PORT_TYPE_QSA ||
1278 		   port_type == FW_PORT_TYPE_QSFP ||
1279 		   port_type == FW_PORT_TYPE_CR4_QSFP ||
1280 		   port_type == FW_PORT_TYPE_CR_QSFP ||
1281 		   port_type == FW_PORT_TYPE_CR2_QSFP ||
1282 		   port_type == FW_PORT_TYPE_SFP28) {
1283 		if (mod_type == FW_PORT_MOD_TYPE_LR ||
1284 		    mod_type == FW_PORT_MOD_TYPE_SR ||
1285 		    mod_type == FW_PORT_MOD_TYPE_ER ||
1286 		    mod_type == FW_PORT_MOD_TYPE_LRM)
1287 			return PORT_FIBRE;
1288 		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1289 			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1290 			return PORT_DA;
1291 		else
1292 			return PORT_OTHER;
1293 	} else if (port_type == FW_PORT_TYPE_KR4_100G ||
1294 		   port_type == FW_PORT_TYPE_KR_SFP28 ||
1295 		   port_type == FW_PORT_TYPE_KR_XLAUI) {
1296 		return PORT_NONE;
1297 	}
1298 
1299 	return PORT_OTHER;
1300 }
1301 
1302 /**
1303  *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1304  *	@port_type: Firmware Port Type
1305  *	@fw_caps: Firmware Port Capabilities
1306  *	@link_mode_mask: ethtool Link Mode Mask
1307  *
1308  *	Translate a Firmware Port Capabilities specification to an ethtool
1309  *	Link Mode Mask.
1310  */
1311 static void fw_caps_to_lmm(enum fw_port_type port_type,
1312 			   unsigned int fw_caps,
1313 			   unsigned long *link_mode_mask)
1314 {
1315 	#define SET_LMM(__lmm_name) \
1316 		__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1317 			  link_mode_mask)
1318 
1319 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1320 		do { \
1321 			if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1322 				SET_LMM(__lmm_name); \
1323 		} while (0)
1324 
1325 	switch (port_type) {
1326 	case FW_PORT_TYPE_BT_SGMII:
1327 	case FW_PORT_TYPE_BT_XFI:
1328 	case FW_PORT_TYPE_BT_XAUI:
1329 		SET_LMM(TP);
1330 		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1331 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1332 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1333 		break;
1334 
1335 	case FW_PORT_TYPE_KX4:
1336 	case FW_PORT_TYPE_KX:
1337 		SET_LMM(Backplane);
1338 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1339 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1340 		break;
1341 
1342 	case FW_PORT_TYPE_KR:
1343 		SET_LMM(Backplane);
1344 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1345 		break;
1346 
1347 	case FW_PORT_TYPE_BP_AP:
1348 		SET_LMM(Backplane);
1349 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1350 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1351 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1352 		break;
1353 
1354 	case FW_PORT_TYPE_BP4_AP:
1355 		SET_LMM(Backplane);
1356 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1357 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1358 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1359 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1360 		break;
1361 
1362 	case FW_PORT_TYPE_FIBER_XFI:
1363 	case FW_PORT_TYPE_FIBER_XAUI:
1364 	case FW_PORT_TYPE_SFP:
1365 	case FW_PORT_TYPE_QSFP_10G:
1366 	case FW_PORT_TYPE_QSA:
1367 		SET_LMM(FIBRE);
1368 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1369 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1370 		break;
1371 
1372 	case FW_PORT_TYPE_BP40_BA:
1373 	case FW_PORT_TYPE_QSFP:
1374 		SET_LMM(FIBRE);
1375 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1376 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1377 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1378 		break;
1379 
1380 	case FW_PORT_TYPE_CR_QSFP:
1381 	case FW_PORT_TYPE_SFP28:
1382 		SET_LMM(FIBRE);
1383 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1384 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1385 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1386 		break;
1387 
1388 	case FW_PORT_TYPE_KR_SFP28:
1389 		SET_LMM(Backplane);
1390 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1391 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1392 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
1393 		break;
1394 
1395 	case FW_PORT_TYPE_KR_XLAUI:
1396 		SET_LMM(Backplane);
1397 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1398 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1399 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full);
1400 		break;
1401 
1402 	case FW_PORT_TYPE_CR2_QSFP:
1403 		SET_LMM(FIBRE);
1404 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
1405 		break;
1406 
1407 	case FW_PORT_TYPE_KR4_100G:
1408 	case FW_PORT_TYPE_CR4_QSFP:
1409 		SET_LMM(FIBRE);
1410 		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
1411 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1412 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1413 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1414 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
1415 		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
1416 		break;
1417 
1418 	default:
1419 		break;
1420 	}
1421 
1422 	if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
1423 		FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
1424 		FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
1425 	} else {
1426 		SET_LMM(FEC_NONE);
1427 	}
1428 
1429 	FW_CAPS_TO_LMM(ANEG, Autoneg);
1430 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1431 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1432 
1433 	#undef FW_CAPS_TO_LMM
1434 	#undef SET_LMM
1435 }
1436 
1437 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1438 				  struct ethtool_link_ksettings *link_ksettings)
1439 {
1440 	struct port_info *pi = netdev_priv(dev);
1441 	struct ethtool_link_settings *base = &link_ksettings->base;
1442 
1443 	/* For the nonce, the Firmware doesn't send up Port State changes
1444 	 * when the Virtual Interface attached to the Port is down.  So
1445 	 * if it's down, let's grab any changes.
1446 	 */
1447 	if (!netif_running(dev))
1448 		(void)t4vf_update_port_info(pi);
1449 
1450 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1451 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1452 	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1453 
1454 	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1455 
1456 	if (pi->mdio_addr >= 0) {
1457 		base->phy_address = pi->mdio_addr;
1458 		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1459 				      ? ETH_MDIO_SUPPORTS_C22
1460 				      : ETH_MDIO_SUPPORTS_C45);
1461 	} else {
1462 		base->phy_address = 255;
1463 		base->mdio_support = 0;
1464 	}
1465 
1466 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1467 		       link_ksettings->link_modes.supported);
1468 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1469 		       link_ksettings->link_modes.advertising);
1470 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1471 		       link_ksettings->link_modes.lp_advertising);
1472 
1473 	if (netif_carrier_ok(dev)) {
1474 		base->speed = pi->link_cfg.speed;
1475 		base->duplex = DUPLEX_FULL;
1476 	} else {
1477 		base->speed = SPEED_UNKNOWN;
1478 		base->duplex = DUPLEX_UNKNOWN;
1479 	}
1480 
1481 	if (pi->link_cfg.fc & PAUSE_RX) {
1482 		if (pi->link_cfg.fc & PAUSE_TX) {
1483 			ethtool_link_ksettings_add_link_mode(link_ksettings,
1484 							     advertising,
1485 							     Pause);
1486 		} else {
1487 			ethtool_link_ksettings_add_link_mode(link_ksettings,
1488 							     advertising,
1489 							     Asym_Pause);
1490 		}
1491 	} else if (pi->link_cfg.fc & PAUSE_TX) {
1492 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1493 						     advertising,
1494 						     Asym_Pause);
1495 	}
1496 
1497 	base->autoneg = pi->link_cfg.autoneg;
1498 	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1499 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1500 						     supported, Autoneg);
1501 	if (pi->link_cfg.autoneg)
1502 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1503 						     advertising, Autoneg);
1504 
1505 	return 0;
1506 }
1507 
1508 /* Translate the Firmware FEC value into the ethtool value. */
1509 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1510 {
1511 	unsigned int eth_fec = 0;
1512 
1513 	if (fw_fec & FW_PORT_CAP32_FEC_RS)
1514 		eth_fec |= ETHTOOL_FEC_RS;
1515 	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1516 		eth_fec |= ETHTOOL_FEC_BASER;
1517 
1518 	/* if nothing is set, then FEC is off */
1519 	if (!eth_fec)
1520 		eth_fec = ETHTOOL_FEC_OFF;
1521 
1522 	return eth_fec;
1523 }
1524 
1525 /* Translate Common Code FEC value into ethtool value. */
1526 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1527 {
1528 	unsigned int eth_fec = 0;
1529 
1530 	if (cc_fec & FEC_AUTO)
1531 		eth_fec |= ETHTOOL_FEC_AUTO;
1532 	if (cc_fec & FEC_RS)
1533 		eth_fec |= ETHTOOL_FEC_RS;
1534 	if (cc_fec & FEC_BASER_RS)
1535 		eth_fec |= ETHTOOL_FEC_BASER;
1536 
1537 	/* if nothing is set, then FEC is off */
1538 	if (!eth_fec)
1539 		eth_fec = ETHTOOL_FEC_OFF;
1540 
1541 	return eth_fec;
1542 }
1543 
1544 static int cxgb4vf_get_fecparam(struct net_device *dev,
1545 				struct ethtool_fecparam *fec)
1546 {
1547 	const struct port_info *pi = netdev_priv(dev);
1548 	const struct link_config *lc = &pi->link_cfg;
1549 
1550 	/* Translate the Firmware FEC Support into the ethtool value.  We
1551 	 * always support IEEE 802.3 "automatic" selection of Link FEC type if
1552 	 * any FEC is supported.
1553 	 */
1554 	fec->fec = fwcap_to_eth_fec(lc->pcaps);
1555 	if (fec->fec != ETHTOOL_FEC_OFF)
1556 		fec->fec |= ETHTOOL_FEC_AUTO;
1557 
1558 	/* Translate the current internal FEC parameters into the
1559 	 * ethtool values.
1560 	 */
1561 	fec->active_fec = cc_to_eth_fec(lc->fec);
1562 	return 0;
1563 }
1564 
1565 /*
1566  * Return our driver information.
1567  */
1568 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1569 				struct ethtool_drvinfo *drvinfo)
1570 {
1571 	struct adapter *adapter = netdev2adap(dev);
1572 
1573 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1574 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1575 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1576 		sizeof(drvinfo->bus_info));
1577 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1578 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1579 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1580 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1581 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1582 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1583 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1584 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1585 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1586 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1587 }
1588 
1589 /*
1590  * Return current adapter message level.
1591  */
1592 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1593 {
1594 	return netdev2adap(dev)->msg_enable;
1595 }
1596 
1597 /*
1598  * Set current adapter message level.
1599  */
1600 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1601 {
1602 	netdev2adap(dev)->msg_enable = msglevel;
1603 }
1604 
1605 /*
1606  * Return the device's current Queue Set ring size parameters along with the
1607  * allowed maximum values.  Since ethtool doesn't understand the concept of
1608  * multi-queue devices, we just return the current values associated with the
1609  * first Queue Set.
1610  */
1611 static void cxgb4vf_get_ringparam(struct net_device *dev,
1612 				  struct ethtool_ringparam *rp)
1613 {
1614 	const struct port_info *pi = netdev_priv(dev);
1615 	const struct sge *s = &pi->adapter->sge;
1616 
1617 	rp->rx_max_pending = MAX_RX_BUFFERS;
1618 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1619 	rp->rx_jumbo_max_pending = 0;
1620 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1621 
1622 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1623 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1624 	rp->rx_jumbo_pending = 0;
1625 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1626 }
1627 
1628 /*
1629  * Set the Queue Set ring size parameters for the device.  Again, since
1630  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1631  * apply these new values across all of the Queue Sets associated with the
1632  * device -- after vetting them of course!
1633  */
1634 static int cxgb4vf_set_ringparam(struct net_device *dev,
1635 				 struct ethtool_ringparam *rp)
1636 {
1637 	const struct port_info *pi = netdev_priv(dev);
1638 	struct adapter *adapter = pi->adapter;
1639 	struct sge *s = &adapter->sge;
1640 	int qs;
1641 
1642 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1643 	    rp->rx_jumbo_pending ||
1644 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1645 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1646 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1647 	    rp->rx_pending < MIN_FL_ENTRIES ||
1648 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1649 		return -EINVAL;
1650 
1651 	if (adapter->flags & FULL_INIT_DONE)
1652 		return -EBUSY;
1653 
1654 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1655 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1656 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1657 		s->ethtxq[qs].q.size = rp->tx_pending;
1658 	}
1659 	return 0;
1660 }
1661 
1662 /*
1663  * Return the interrupt holdoff timer and count for the first Queue Set on the
1664  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1665  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1666  */
1667 static int cxgb4vf_get_coalesce(struct net_device *dev,
1668 				struct ethtool_coalesce *coalesce)
1669 {
1670 	const struct port_info *pi = netdev_priv(dev);
1671 	const struct adapter *adapter = pi->adapter;
1672 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1673 
1674 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1675 	coalesce->rx_max_coalesced_frames =
1676 		((rspq->intr_params & QINTR_CNT_EN_F)
1677 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1678 		 : 0);
1679 	return 0;
1680 }
1681 
1682 /*
1683  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1684  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1685  * the interrupt holdoff timer on any of the device's Queue Sets.
1686  */
1687 static int cxgb4vf_set_coalesce(struct net_device *dev,
1688 				struct ethtool_coalesce *coalesce)
1689 {
1690 	const struct port_info *pi = netdev_priv(dev);
1691 	struct adapter *adapter = pi->adapter;
1692 
1693 	return set_rxq_intr_params(adapter,
1694 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1695 				   coalesce->rx_coalesce_usecs,
1696 				   coalesce->rx_max_coalesced_frames);
1697 }
1698 
1699 /*
1700  * Report current port link pause parameter settings.
1701  */
1702 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1703 				   struct ethtool_pauseparam *pauseparam)
1704 {
1705 	struct port_info *pi = netdev_priv(dev);
1706 
1707 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1708 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1709 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1710 }
1711 
1712 /*
1713  * Identify the port by blinking the port's LED.
1714  */
1715 static int cxgb4vf_phys_id(struct net_device *dev,
1716 			   enum ethtool_phys_id_state state)
1717 {
1718 	unsigned int val;
1719 	struct port_info *pi = netdev_priv(dev);
1720 
1721 	if (state == ETHTOOL_ID_ACTIVE)
1722 		val = 0xffff;
1723 	else if (state == ETHTOOL_ID_INACTIVE)
1724 		val = 0;
1725 	else
1726 		return -EINVAL;
1727 
1728 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1729 }
1730 
1731 /*
1732  * Port stats maintained per queue of the port.
1733  */
1734 struct queue_port_stats {
1735 	u64 tso;
1736 	u64 tx_csum;
1737 	u64 rx_csum;
1738 	u64 vlan_ex;
1739 	u64 vlan_ins;
1740 	u64 lro_pkts;
1741 	u64 lro_merged;
1742 };
1743 
1744 /*
1745  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1746  * these need to match the order of statistics returned by
1747  * t4vf_get_port_stats().
1748  */
1749 static const char stats_strings[][ETH_GSTRING_LEN] = {
1750 	/*
1751 	 * These must match the layout of the t4vf_port_stats structure.
1752 	 */
1753 	"TxBroadcastBytes  ",
1754 	"TxBroadcastFrames ",
1755 	"TxMulticastBytes  ",
1756 	"TxMulticastFrames ",
1757 	"TxUnicastBytes    ",
1758 	"TxUnicastFrames   ",
1759 	"TxDroppedFrames   ",
1760 	"TxOffloadBytes    ",
1761 	"TxOffloadFrames   ",
1762 	"RxBroadcastBytes  ",
1763 	"RxBroadcastFrames ",
1764 	"RxMulticastBytes  ",
1765 	"RxMulticastFrames ",
1766 	"RxUnicastBytes    ",
1767 	"RxUnicastFrames   ",
1768 	"RxErrorFrames     ",
1769 
1770 	/*
1771 	 * These are accumulated per-queue statistics and must match the
1772 	 * order of the fields in the queue_port_stats structure.
1773 	 */
1774 	"TSO               ",
1775 	"TxCsumOffload     ",
1776 	"RxCsumGood        ",
1777 	"VLANextractions   ",
1778 	"VLANinsertions    ",
1779 	"GROPackets        ",
1780 	"GROMerged         ",
1781 };
1782 
1783 /*
1784  * Return the number of statistics in the specified statistics set.
1785  */
1786 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1787 {
1788 	switch (sset) {
1789 	case ETH_SS_STATS:
1790 		return ARRAY_SIZE(stats_strings);
1791 	default:
1792 		return -EOPNOTSUPP;
1793 	}
1794 	/*NOTREACHED*/
1795 }
1796 
1797 /*
1798  * Return the strings for the specified statistics set.
1799  */
1800 static void cxgb4vf_get_strings(struct net_device *dev,
1801 				u32 sset,
1802 				u8 *data)
1803 {
1804 	switch (sset) {
1805 	case ETH_SS_STATS:
1806 		memcpy(data, stats_strings, sizeof(stats_strings));
1807 		break;
1808 	}
1809 }
1810 
1811 /*
1812  * Small utility routine to accumulate queue statistics across the queues of
1813  * a "port".
1814  */
1815 static void collect_sge_port_stats(const struct adapter *adapter,
1816 				   const struct port_info *pi,
1817 				   struct queue_port_stats *stats)
1818 {
1819 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1820 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1821 	int qs;
1822 
1823 	memset(stats, 0, sizeof(*stats));
1824 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1825 		stats->tso += txq->tso;
1826 		stats->tx_csum += txq->tx_cso;
1827 		stats->rx_csum += rxq->stats.rx_cso;
1828 		stats->vlan_ex += rxq->stats.vlan_ex;
1829 		stats->vlan_ins += txq->vlan_ins;
1830 		stats->lro_pkts += rxq->stats.lro_pkts;
1831 		stats->lro_merged += rxq->stats.lro_merged;
1832 	}
1833 }
1834 
1835 /*
1836  * Return the ETH_SS_STATS statistics set.
1837  */
1838 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1839 				      struct ethtool_stats *stats,
1840 				      u64 *data)
1841 {
1842 	struct port_info *pi = netdev2pinfo(dev);
1843 	struct adapter *adapter = pi->adapter;
1844 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1845 				      (struct t4vf_port_stats *)data);
1846 	if (err)
1847 		memset(data, 0, sizeof(struct t4vf_port_stats));
1848 
1849 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1850 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1851 }
1852 
1853 /*
1854  * Return the size of our register map.
1855  */
1856 static int cxgb4vf_get_regs_len(struct net_device *dev)
1857 {
1858 	return T4VF_REGMAP_SIZE;
1859 }
1860 
1861 /*
1862  * Dump a block of registers, start to end inclusive, into a buffer.
1863  */
1864 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1865 			   unsigned int start, unsigned int end)
1866 {
1867 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1868 
1869 	for ( ; start <= end; start += sizeof(u32)) {
1870 		/*
1871 		 * Avoid reading the Mailbox Control register since that
1872 		 * can trigger a Mailbox Ownership Arbitration cycle and
1873 		 * interfere with communication with the firmware.
1874 		 */
1875 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1876 			*bp++ = 0xffff;
1877 		else
1878 			*bp++ = t4_read_reg(adapter, start);
1879 	}
1880 }
1881 
1882 /*
1883  * Copy our entire register map into the provided buffer.
1884  */
1885 static void cxgb4vf_get_regs(struct net_device *dev,
1886 			     struct ethtool_regs *regs,
1887 			     void *regbuf)
1888 {
1889 	struct adapter *adapter = netdev2adap(dev);
1890 
1891 	regs->version = mk_adap_vers(adapter);
1892 
1893 	/*
1894 	 * Fill in register buffer with our register map.
1895 	 */
1896 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1897 
1898 	reg_block_dump(adapter, regbuf,
1899 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1900 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1901 	reg_block_dump(adapter, regbuf,
1902 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1903 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1904 
1905 	/* T5 adds new registers in the PL Register map.
1906 	 */
1907 	reg_block_dump(adapter, regbuf,
1908 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1909 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1910 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1911 	reg_block_dump(adapter, regbuf,
1912 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1913 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1914 
1915 	reg_block_dump(adapter, regbuf,
1916 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1917 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1918 }
1919 
1920 /*
1921  * Report current Wake On LAN settings.
1922  */
1923 static void cxgb4vf_get_wol(struct net_device *dev,
1924 			    struct ethtool_wolinfo *wol)
1925 {
1926 	wol->supported = 0;
1927 	wol->wolopts = 0;
1928 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1929 }
1930 
1931 /*
1932  * TCP Segmentation Offload flags which we support.
1933  */
1934 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1935 
1936 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1937 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
1938 	.get_fecparam		= cxgb4vf_get_fecparam,
1939 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1940 	.get_msglevel		= cxgb4vf_get_msglevel,
1941 	.set_msglevel		= cxgb4vf_set_msglevel,
1942 	.get_ringparam		= cxgb4vf_get_ringparam,
1943 	.set_ringparam		= cxgb4vf_set_ringparam,
1944 	.get_coalesce		= cxgb4vf_get_coalesce,
1945 	.set_coalesce		= cxgb4vf_set_coalesce,
1946 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1947 	.get_link		= ethtool_op_get_link,
1948 	.get_strings		= cxgb4vf_get_strings,
1949 	.set_phys_id		= cxgb4vf_phys_id,
1950 	.get_sset_count		= cxgb4vf_get_sset_count,
1951 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1952 	.get_regs_len		= cxgb4vf_get_regs_len,
1953 	.get_regs		= cxgb4vf_get_regs,
1954 	.get_wol		= cxgb4vf_get_wol,
1955 };
1956 
1957 /*
1958  * /sys/kernel/debug/cxgb4vf support code and data.
1959  * ================================================
1960  */
1961 
1962 /*
1963  * Show Firmware Mailbox Command/Reply Log
1964  *
1965  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1966  * it's possible that we can catch things during a log update and therefore
1967  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1968  * If we ever decide that we want to make sure that we're dumping a coherent
1969  * log, we'd need to perform locking in the mailbox logging and in
1970  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1971  * like we do for the Firmware Device Log.  But as stated above, meh ...
1972  */
1973 static int mboxlog_show(struct seq_file *seq, void *v)
1974 {
1975 	struct adapter *adapter = seq->private;
1976 	struct mbox_cmd_log *log = adapter->mbox_log;
1977 	struct mbox_cmd *entry;
1978 	int entry_idx, i;
1979 
1980 	if (v == SEQ_START_TOKEN) {
1981 		seq_printf(seq,
1982 			   "%10s  %15s  %5s  %5s  %s\n",
1983 			   "Seq#", "Tstamp", "Atime", "Etime",
1984 			   "Command/Reply");
1985 		return 0;
1986 	}
1987 
1988 	entry_idx = log->cursor + ((uintptr_t)v - 2);
1989 	if (entry_idx >= log->size)
1990 		entry_idx -= log->size;
1991 	entry = mbox_cmd_log_entry(log, entry_idx);
1992 
1993 	/* skip over unused entries */
1994 	if (entry->timestamp == 0)
1995 		return 0;
1996 
1997 	seq_printf(seq, "%10u  %15llu  %5d  %5d",
1998 		   entry->seqno, entry->timestamp,
1999 		   entry->access, entry->execute);
2000 	for (i = 0; i < MBOX_LEN / 8; i++) {
2001 		u64 flit = entry->cmd[i];
2002 		u32 hi = (u32)(flit >> 32);
2003 		u32 lo = (u32)flit;
2004 
2005 		seq_printf(seq, "  %08x %08x", hi, lo);
2006 	}
2007 	seq_puts(seq, "\n");
2008 	return 0;
2009 }
2010 
2011 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
2012 {
2013 	struct adapter *adapter = seq->private;
2014 	struct mbox_cmd_log *log = adapter->mbox_log;
2015 
2016 	return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
2017 }
2018 
2019 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
2020 {
2021 	return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
2022 }
2023 
2024 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
2025 {
2026 	++*pos;
2027 	return mboxlog_get_idx(seq, *pos);
2028 }
2029 
2030 static void mboxlog_stop(struct seq_file *seq, void *v)
2031 {
2032 }
2033 
2034 static const struct seq_operations mboxlog_seq_ops = {
2035 	.start = mboxlog_start,
2036 	.next  = mboxlog_next,
2037 	.stop  = mboxlog_stop,
2038 	.show  = mboxlog_show
2039 };
2040 
2041 static int mboxlog_open(struct inode *inode, struct file *file)
2042 {
2043 	int res = seq_open(file, &mboxlog_seq_ops);
2044 
2045 	if (!res) {
2046 		struct seq_file *seq = file->private_data;
2047 
2048 		seq->private = inode->i_private;
2049 	}
2050 	return res;
2051 }
2052 
2053 static const struct file_operations mboxlog_fops = {
2054 	.owner   = THIS_MODULE,
2055 	.open    = mboxlog_open,
2056 	.read    = seq_read,
2057 	.llseek  = seq_lseek,
2058 	.release = seq_release,
2059 };
2060 
2061 /*
2062  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
2063  */
2064 #define QPL	4
2065 
2066 static int sge_qinfo_show(struct seq_file *seq, void *v)
2067 {
2068 	struct adapter *adapter = seq->private;
2069 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2070 	int qs, r = (uintptr_t)v - 1;
2071 
2072 	if (r)
2073 		seq_putc(seq, '\n');
2074 
2075 	#define S3(fmt_spec, s, v) \
2076 		do {\
2077 			seq_printf(seq, "%-12s", s); \
2078 			for (qs = 0; qs < n; ++qs) \
2079 				seq_printf(seq, " %16" fmt_spec, v); \
2080 			seq_putc(seq, '\n'); \
2081 		} while (0)
2082 	#define S(s, v)		S3("s", s, v)
2083 	#define T(s, v)		S3("u", s, txq[qs].v)
2084 	#define R(s, v)		S3("u", s, rxq[qs].v)
2085 
2086 	if (r < eth_entries) {
2087 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2088 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2089 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2090 
2091 		S("QType:", "Ethernet");
2092 		S("Interface:",
2093 		  (rxq[qs].rspq.netdev
2094 		   ? rxq[qs].rspq.netdev->name
2095 		   : "N/A"));
2096 		S3("d", "Port:",
2097 		   (rxq[qs].rspq.netdev
2098 		    ? ((struct port_info *)
2099 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
2100 		    : -1));
2101 		T("TxQ ID:", q.abs_id);
2102 		T("TxQ size:", q.size);
2103 		T("TxQ inuse:", q.in_use);
2104 		T("TxQ PIdx:", q.pidx);
2105 		T("TxQ CIdx:", q.cidx);
2106 		R("RspQ ID:", rspq.abs_id);
2107 		R("RspQ size:", rspq.size);
2108 		R("RspQE size:", rspq.iqe_len);
2109 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2110 		S3("u", "Intr pktcnt:",
2111 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2112 		R("RspQ CIdx:", rspq.cidx);
2113 		R("RspQ Gen:", rspq.gen);
2114 		R("FL ID:", fl.abs_id);
2115 		R("FL size:", fl.size - MIN_FL_RESID);
2116 		R("FL avail:", fl.avail);
2117 		R("FL PIdx:", fl.pidx);
2118 		R("FL CIdx:", fl.cidx);
2119 		return 0;
2120 	}
2121 
2122 	r -= eth_entries;
2123 	if (r == 0) {
2124 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2125 
2126 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2127 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2128 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2129 			   qtimer_val(adapter, evtq));
2130 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2131 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
2132 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2133 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2134 	} else if (r == 1) {
2135 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2136 
2137 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2138 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2139 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2140 			   qtimer_val(adapter, intrq));
2141 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2142 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
2143 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2144 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2145 	}
2146 
2147 	#undef R
2148 	#undef T
2149 	#undef S
2150 	#undef S3
2151 
2152 	return 0;
2153 }
2154 
2155 /*
2156  * Return the number of "entries" in our "file".  We group the multi-Queue
2157  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2158  *
2159  *     Ethernet RX/TX Queue Sets
2160  *     Firmware Event Queue
2161  *     Forwarded Interrupt Queue (if in MSI mode)
2162  */
2163 static int sge_queue_entries(const struct adapter *adapter)
2164 {
2165 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2166 		((adapter->flags & USING_MSI) != 0);
2167 }
2168 
2169 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2170 {
2171 	int entries = sge_queue_entries(seq->private);
2172 
2173 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2174 }
2175 
2176 static void sge_queue_stop(struct seq_file *seq, void *v)
2177 {
2178 }
2179 
2180 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2181 {
2182 	int entries = sge_queue_entries(seq->private);
2183 
2184 	++*pos;
2185 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2186 }
2187 
2188 static const struct seq_operations sge_qinfo_seq_ops = {
2189 	.start = sge_queue_start,
2190 	.next  = sge_queue_next,
2191 	.stop  = sge_queue_stop,
2192 	.show  = sge_qinfo_show
2193 };
2194 
2195 static int sge_qinfo_open(struct inode *inode, struct file *file)
2196 {
2197 	int res = seq_open(file, &sge_qinfo_seq_ops);
2198 
2199 	if (!res) {
2200 		struct seq_file *seq = file->private_data;
2201 		seq->private = inode->i_private;
2202 	}
2203 	return res;
2204 }
2205 
2206 static const struct file_operations sge_qinfo_debugfs_fops = {
2207 	.owner   = THIS_MODULE,
2208 	.open    = sge_qinfo_open,
2209 	.read    = seq_read,
2210 	.llseek  = seq_lseek,
2211 	.release = seq_release,
2212 };
2213 
2214 /*
2215  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2216  */
2217 #define QPL	4
2218 
2219 static int sge_qstats_show(struct seq_file *seq, void *v)
2220 {
2221 	struct adapter *adapter = seq->private;
2222 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2223 	int qs, r = (uintptr_t)v - 1;
2224 
2225 	if (r)
2226 		seq_putc(seq, '\n');
2227 
2228 	#define S3(fmt, s, v) \
2229 		do { \
2230 			seq_printf(seq, "%-16s", s); \
2231 			for (qs = 0; qs < n; ++qs) \
2232 				seq_printf(seq, " %8" fmt, v); \
2233 			seq_putc(seq, '\n'); \
2234 		} while (0)
2235 	#define S(s, v)		S3("s", s, v)
2236 
2237 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
2238 	#define T(s, v)		T3("lu", s, v)
2239 
2240 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
2241 	#define R(s, v)		R3("lu", s, v)
2242 
2243 	if (r < eth_entries) {
2244 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2245 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2246 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2247 
2248 		S("QType:", "Ethernet");
2249 		S("Interface:",
2250 		  (rxq[qs].rspq.netdev
2251 		   ? rxq[qs].rspq.netdev->name
2252 		   : "N/A"));
2253 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2254 		R("RxPackets:", stats.pkts);
2255 		R("RxCSO:", stats.rx_cso);
2256 		R("VLANxtract:", stats.vlan_ex);
2257 		R("LROmerged:", stats.lro_merged);
2258 		R("LROpackets:", stats.lro_pkts);
2259 		R("RxDrops:", stats.rx_drops);
2260 		T("TSO:", tso);
2261 		T("TxCSO:", tx_cso);
2262 		T("VLANins:", vlan_ins);
2263 		T("TxQFull:", q.stops);
2264 		T("TxQRestarts:", q.restarts);
2265 		T("TxMapErr:", mapping_err);
2266 		R("FLAllocErr:", fl.alloc_failed);
2267 		R("FLLrgAlcErr:", fl.large_alloc_failed);
2268 		R("FLStarving:", fl.starving);
2269 		return 0;
2270 	}
2271 
2272 	r -= eth_entries;
2273 	if (r == 0) {
2274 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2275 
2276 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2277 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2278 			   evtq->unhandled_irqs);
2279 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2280 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2281 	} else if (r == 1) {
2282 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2283 
2284 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2285 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2286 			   intrq->unhandled_irqs);
2287 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2288 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2289 	}
2290 
2291 	#undef R
2292 	#undef T
2293 	#undef S
2294 	#undef R3
2295 	#undef T3
2296 	#undef S3
2297 
2298 	return 0;
2299 }
2300 
2301 /*
2302  * Return the number of "entries" in our "file".  We group the multi-Queue
2303  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2304  *
2305  *     Ethernet RX/TX Queue Sets
2306  *     Firmware Event Queue
2307  *     Forwarded Interrupt Queue (if in MSI mode)
2308  */
2309 static int sge_qstats_entries(const struct adapter *adapter)
2310 {
2311 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2312 		((adapter->flags & USING_MSI) != 0);
2313 }
2314 
2315 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2316 {
2317 	int entries = sge_qstats_entries(seq->private);
2318 
2319 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2320 }
2321 
2322 static void sge_qstats_stop(struct seq_file *seq, void *v)
2323 {
2324 }
2325 
2326 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2327 {
2328 	int entries = sge_qstats_entries(seq->private);
2329 
2330 	(*pos)++;
2331 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2332 }
2333 
2334 static const struct seq_operations sge_qstats_seq_ops = {
2335 	.start = sge_qstats_start,
2336 	.next  = sge_qstats_next,
2337 	.stop  = sge_qstats_stop,
2338 	.show  = sge_qstats_show
2339 };
2340 
2341 static int sge_qstats_open(struct inode *inode, struct file *file)
2342 {
2343 	int res = seq_open(file, &sge_qstats_seq_ops);
2344 
2345 	if (res == 0) {
2346 		struct seq_file *seq = file->private_data;
2347 		seq->private = inode->i_private;
2348 	}
2349 	return res;
2350 }
2351 
2352 static const struct file_operations sge_qstats_proc_fops = {
2353 	.owner   = THIS_MODULE,
2354 	.open    = sge_qstats_open,
2355 	.read    = seq_read,
2356 	.llseek  = seq_lseek,
2357 	.release = seq_release,
2358 };
2359 
2360 /*
2361  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2362  */
2363 static int resources_show(struct seq_file *seq, void *v)
2364 {
2365 	struct adapter *adapter = seq->private;
2366 	struct vf_resources *vfres = &adapter->params.vfres;
2367 
2368 	#define S(desc, fmt, var) \
2369 		seq_printf(seq, "%-60s " fmt "\n", \
2370 			   desc " (" #var "):", vfres->var)
2371 
2372 	S("Virtual Interfaces", "%d", nvi);
2373 	S("Egress Queues", "%d", neq);
2374 	S("Ethernet Control", "%d", nethctrl);
2375 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2376 	S("Ingress Queues", "%d", niq);
2377 	S("Traffic Class", "%d", tc);
2378 	S("Port Access Rights Mask", "%#x", pmask);
2379 	S("MAC Address Filters", "%d", nexactf);
2380 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2381 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2382 
2383 	#undef S
2384 
2385 	return 0;
2386 }
2387 DEFINE_SHOW_ATTRIBUTE(resources);
2388 
2389 /*
2390  * Show Virtual Interfaces.
2391  */
2392 static int interfaces_show(struct seq_file *seq, void *v)
2393 {
2394 	if (v == SEQ_START_TOKEN) {
2395 		seq_puts(seq, "Interface  Port   VIID\n");
2396 	} else {
2397 		struct adapter *adapter = seq->private;
2398 		int pidx = (uintptr_t)v - 2;
2399 		struct net_device *dev = adapter->port[pidx];
2400 		struct port_info *pi = netdev_priv(dev);
2401 
2402 		seq_printf(seq, "%9s  %4d  %#5x\n",
2403 			   dev->name, pi->port_id, pi->viid);
2404 	}
2405 	return 0;
2406 }
2407 
2408 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2409 {
2410 	return pos <= adapter->params.nports
2411 		? (void *)(uintptr_t)(pos + 1)
2412 		: NULL;
2413 }
2414 
2415 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2416 {
2417 	return *pos
2418 		? interfaces_get_idx(seq->private, *pos)
2419 		: SEQ_START_TOKEN;
2420 }
2421 
2422 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2423 {
2424 	(*pos)++;
2425 	return interfaces_get_idx(seq->private, *pos);
2426 }
2427 
2428 static void interfaces_stop(struct seq_file *seq, void *v)
2429 {
2430 }
2431 
2432 static const struct seq_operations interfaces_seq_ops = {
2433 	.start = interfaces_start,
2434 	.next  = interfaces_next,
2435 	.stop  = interfaces_stop,
2436 	.show  = interfaces_show
2437 };
2438 
2439 static int interfaces_open(struct inode *inode, struct file *file)
2440 {
2441 	int res = seq_open(file, &interfaces_seq_ops);
2442 
2443 	if (res == 0) {
2444 		struct seq_file *seq = file->private_data;
2445 		seq->private = inode->i_private;
2446 	}
2447 	return res;
2448 }
2449 
2450 static const struct file_operations interfaces_proc_fops = {
2451 	.owner   = THIS_MODULE,
2452 	.open    = interfaces_open,
2453 	.read    = seq_read,
2454 	.llseek  = seq_lseek,
2455 	.release = seq_release,
2456 };
2457 
2458 /*
2459  * /sys/kernel/debugfs/cxgb4vf/ files list.
2460  */
2461 struct cxgb4vf_debugfs_entry {
2462 	const char *name;		/* name of debugfs node */
2463 	umode_t mode;			/* file system mode */
2464 	const struct file_operations *fops;
2465 };
2466 
2467 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2468 	{ "mboxlog",    0444, &mboxlog_fops },
2469 	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
2470 	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
2471 	{ "resources",  0444, &resources_fops },
2472 	{ "interfaces", 0444, &interfaces_proc_fops },
2473 };
2474 
2475 /*
2476  * Module and device initialization and cleanup code.
2477  * ==================================================
2478  */
2479 
2480 /*
2481  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2482  * directory (debugfs_root) has already been set up.
2483  */
2484 static int setup_debugfs(struct adapter *adapter)
2485 {
2486 	int i;
2487 
2488 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2489 
2490 	/*
2491 	 * Debugfs support is best effort.
2492 	 */
2493 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2494 		(void)debugfs_create_file(debugfs_files[i].name,
2495 				  debugfs_files[i].mode,
2496 				  adapter->debugfs_root,
2497 				  (void *)adapter,
2498 				  debugfs_files[i].fops);
2499 
2500 	return 0;
2501 }
2502 
2503 /*
2504  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2505  * it to our caller to tear down the directory (debugfs_root).
2506  */
2507 static void cleanup_debugfs(struct adapter *adapter)
2508 {
2509 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2510 
2511 	/*
2512 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2513 	 * individual entries because a call will be made to
2514 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2515 	 * persistent state.
2516 	 */
2517 	/* nothing to do */
2518 }
2519 
2520 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2521  * knowing our Virtual Function Resources and may be called a second time if
2522  * we fall back from MSI-X to MSI Interrupt Mode.
2523  */
2524 static void size_nports_qsets(struct adapter *adapter)
2525 {
2526 	struct vf_resources *vfres = &adapter->params.vfres;
2527 	unsigned int ethqsets, pmask_nports;
2528 
2529 	/* The number of "ports" which we support is equal to the number of
2530 	 * Virtual Interfaces with which we've been provisioned.
2531 	 */
2532 	adapter->params.nports = vfres->nvi;
2533 	if (adapter->params.nports > MAX_NPORTS) {
2534 		dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2535 			 " allowed virtual interfaces\n", MAX_NPORTS,
2536 			 adapter->params.nports);
2537 		adapter->params.nports = MAX_NPORTS;
2538 	}
2539 
2540 	/* We may have been provisioned with more VIs than the number of
2541 	 * ports we're allowed to access (our Port Access Rights Mask).
2542 	 * This is obviously a configuration conflict but we don't want to
2543 	 * crash the kernel or anything silly just because of that.
2544 	 */
2545 	pmask_nports = hweight32(adapter->params.vfres.pmask);
2546 	if (pmask_nports < adapter->params.nports) {
2547 		dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2548 			 " virtual interfaces; limited by Port Access Rights"
2549 			 " mask %#x\n", pmask_nports, adapter->params.nports,
2550 			 adapter->params.vfres.pmask);
2551 		adapter->params.nports = pmask_nports;
2552 	}
2553 
2554 	/* We need to reserve an Ingress Queue for the Asynchronous Firmware
2555 	 * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2556 	 * reserve an Ingress Queue for a Forwarded Interrupts.
2557 	 *
2558 	 * The rest of the FL/Intr-capable ingress queues will be matched up
2559 	 * one-for-one with Ethernet/Control egress queues in order to form
2560 	 * "Queue Sets" which will be aportioned between the "ports".  For
2561 	 * each Queue Set, we'll need the ability to allocate two Egress
2562 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2563 	 * Ethernet Queue.
2564 	 *
2565 	 * Note that even if we're currently configured to use MSI-X
2566 	 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2567 	 * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2568 	 * happens we'll need to adjust things later.
2569 	 */
2570 	ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2571 	if (vfres->nethctrl != ethqsets)
2572 		ethqsets = min(vfres->nethctrl, ethqsets);
2573 	if (vfres->neq < ethqsets*2)
2574 		ethqsets = vfres->neq/2;
2575 	if (ethqsets > MAX_ETH_QSETS)
2576 		ethqsets = MAX_ETH_QSETS;
2577 	adapter->sge.max_ethqsets = ethqsets;
2578 
2579 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2580 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2581 			 " virtual interfaces (too few Queue Sets)\n",
2582 			 adapter->sge.max_ethqsets, adapter->params.nports);
2583 		adapter->params.nports = adapter->sge.max_ethqsets;
2584 	}
2585 }
2586 
2587 /*
2588  * Perform early "adapter" initialization.  This is where we discover what
2589  * adapter parameters we're going to be using and initialize basic adapter
2590  * hardware support.
2591  */
2592 static int adap_init0(struct adapter *adapter)
2593 {
2594 	struct sge_params *sge_params = &adapter->params.sge;
2595 	struct sge *s = &adapter->sge;
2596 	int err;
2597 	u32 param, val = 0;
2598 
2599 	/*
2600 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2601 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2602 	 * issue an FLR because of a self- deadlock on the device semaphore.
2603 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2604 	 * cases where they're needed -- for instance, some versions of KVM
2605 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2606 	 * use the firmware based reset in order to reset any per function
2607 	 * state.
2608 	 */
2609 	err = t4vf_fw_reset(adapter);
2610 	if (err < 0) {
2611 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2612 		return err;
2613 	}
2614 
2615 	/*
2616 	 * Grab basic operational parameters.  These will predominantly have
2617 	 * been set up by the Physical Function Driver or will be hard coded
2618 	 * into the adapter.  We just have to live with them ...  Note that
2619 	 * we _must_ get our VPD parameters before our SGE parameters because
2620 	 * we need to know the adapter's core clock from the VPD in order to
2621 	 * properly decode the SGE Timer Values.
2622 	 */
2623 	err = t4vf_get_dev_params(adapter);
2624 	if (err) {
2625 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2626 			" device parameters: err=%d\n", err);
2627 		return err;
2628 	}
2629 	err = t4vf_get_vpd_params(adapter);
2630 	if (err) {
2631 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2632 			" VPD parameters: err=%d\n", err);
2633 		return err;
2634 	}
2635 	err = t4vf_get_sge_params(adapter);
2636 	if (err) {
2637 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2638 			" SGE parameters: err=%d\n", err);
2639 		return err;
2640 	}
2641 	err = t4vf_get_rss_glb_config(adapter);
2642 	if (err) {
2643 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2644 			" RSS parameters: err=%d\n", err);
2645 		return err;
2646 	}
2647 	if (adapter->params.rss.mode !=
2648 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2649 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2650 			" mode %d\n", adapter->params.rss.mode);
2651 		return -EINVAL;
2652 	}
2653 	err = t4vf_sge_init(adapter);
2654 	if (err) {
2655 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2656 			" err=%d\n", err);
2657 		return err;
2658 	}
2659 
2660 	/* If we're running on newer firmware, let it know that we're
2661 	 * prepared to deal with encapsulated CPL messages.  Older
2662 	 * firmware won't understand this and we'll just get
2663 	 * unencapsulated messages ...
2664 	 */
2665 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2666 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2667 	val = 1;
2668 	(void) t4vf_set_params(adapter, 1, &param, &val);
2669 
2670 	/*
2671 	 * Retrieve our RX interrupt holdoff timer values and counter
2672 	 * threshold values from the SGE parameters.
2673 	 */
2674 	s->timer_val[0] = core_ticks_to_us(adapter,
2675 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2676 	s->timer_val[1] = core_ticks_to_us(adapter,
2677 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2678 	s->timer_val[2] = core_ticks_to_us(adapter,
2679 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2680 	s->timer_val[3] = core_ticks_to_us(adapter,
2681 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2682 	s->timer_val[4] = core_ticks_to_us(adapter,
2683 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2684 	s->timer_val[5] = core_ticks_to_us(adapter,
2685 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2686 
2687 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2688 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2689 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2690 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2691 
2692 	/*
2693 	 * Grab our Virtual Interface resource allocation, extract the
2694 	 * features that we're interested in and do a bit of sanity testing on
2695 	 * what we discover.
2696 	 */
2697 	err = t4vf_get_vfres(adapter);
2698 	if (err) {
2699 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2700 			" resources: err=%d\n", err);
2701 		return err;
2702 	}
2703 
2704 	/* Check for various parameter sanity issues */
2705 	if (adapter->params.vfres.pmask == 0) {
2706 		dev_err(adapter->pdev_dev, "no port access configured\n"
2707 			"usable!\n");
2708 		return -EINVAL;
2709 	}
2710 	if (adapter->params.vfres.nvi == 0) {
2711 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2712 			"usable!\n");
2713 		return -EINVAL;
2714 	}
2715 
2716 	/* Initialize nports and max_ethqsets now that we have our Virtual
2717 	 * Function Resources.
2718 	 */
2719 	size_nports_qsets(adapter);
2720 
2721 	return 0;
2722 }
2723 
2724 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2725 			     u8 pkt_cnt_idx, unsigned int size,
2726 			     unsigned int iqe_size)
2727 {
2728 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2729 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2730 			      QINTR_CNT_EN_F : 0));
2731 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2732 			    ? pkt_cnt_idx
2733 			    : 0);
2734 	rspq->iqe_len = iqe_size;
2735 	rspq->size = size;
2736 }
2737 
2738 /*
2739  * Perform default configuration of DMA queues depending on the number and
2740  * type of ports we found and the number of available CPUs.  Most settings can
2741  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2742  * being brought up for the first time.
2743  */
2744 static void cfg_queues(struct adapter *adapter)
2745 {
2746 	struct sge *s = &adapter->sge;
2747 	int q10g, n10g, qidx, pidx, qs;
2748 	size_t iqe_size;
2749 
2750 	/*
2751 	 * We should not be called till we know how many Queue Sets we can
2752 	 * support.  In particular, this means that we need to know what kind
2753 	 * of interrupts we'll be using ...
2754 	 */
2755 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2756 
2757 	/*
2758 	 * Count the number of 10GbE Virtual Interfaces that we have.
2759 	 */
2760 	n10g = 0;
2761 	for_each_port(adapter, pidx)
2762 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2763 
2764 	/*
2765 	 * We default to 1 queue per non-10G port and up to # of cores queues
2766 	 * per 10G port.
2767 	 */
2768 	if (n10g == 0)
2769 		q10g = 0;
2770 	else {
2771 		int n1g = (adapter->params.nports - n10g);
2772 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2773 		if (q10g > num_online_cpus())
2774 			q10g = num_online_cpus();
2775 	}
2776 
2777 	/*
2778 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2779 	 * The layout will be established in setup_sge_queues() when the
2780 	 * adapter is brough up for the first time.
2781 	 */
2782 	qidx = 0;
2783 	for_each_port(adapter, pidx) {
2784 		struct port_info *pi = adap2pinfo(adapter, pidx);
2785 
2786 		pi->first_qset = qidx;
2787 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2788 		qidx += pi->nqsets;
2789 	}
2790 	s->ethqsets = qidx;
2791 
2792 	/*
2793 	 * The Ingress Queue Entry Size for our various Response Queues needs
2794 	 * to be big enough to accommodate the largest message we can receive
2795 	 * from the chip/firmware; which is 64 bytes ...
2796 	 */
2797 	iqe_size = 64;
2798 
2799 	/*
2800 	 * Set up default Queue Set parameters ...  Start off with the
2801 	 * shortest interrupt holdoff timer.
2802 	 */
2803 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2804 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2805 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2806 
2807 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2808 		rxq->fl.size = 72;
2809 		txq->q.size = 1024;
2810 	}
2811 
2812 	/*
2813 	 * The firmware event queue is used for link state changes and
2814 	 * notifications of TX DMA completions.
2815 	 */
2816 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2817 
2818 	/*
2819 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2820 	 * mode.  In this mode all interrupts associated with RX queues will
2821 	 * be forwarded to a single queue which we'll associate with our MSI
2822 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2823 	 * queue will indicate which ingress queue needs servicing ...  This
2824 	 * queue needs to be large enough to accommodate all of the ingress
2825 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2826 	 * from equalling the CIDX if every ingress queue has an outstanding
2827 	 * interrupt).  The queue doesn't need to be any larger because no
2828 	 * ingress queue will ever have more than one outstanding interrupt at
2829 	 * any time ...
2830 	 */
2831 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2832 		  iqe_size);
2833 }
2834 
2835 /*
2836  * Reduce the number of Ethernet queues across all ports to at most n.
2837  * n provides at least one queue per port.
2838  */
2839 static void reduce_ethqs(struct adapter *adapter, int n)
2840 {
2841 	int i;
2842 	struct port_info *pi;
2843 
2844 	/*
2845 	 * While we have too many active Ether Queue Sets, interate across the
2846 	 * "ports" and reduce their individual Queue Set allocations.
2847 	 */
2848 	BUG_ON(n < adapter->params.nports);
2849 	while (n < adapter->sge.ethqsets)
2850 		for_each_port(adapter, i) {
2851 			pi = adap2pinfo(adapter, i);
2852 			if (pi->nqsets > 1) {
2853 				pi->nqsets--;
2854 				adapter->sge.ethqsets--;
2855 				if (adapter->sge.ethqsets <= n)
2856 					break;
2857 			}
2858 		}
2859 
2860 	/*
2861 	 * Reassign the starting Queue Sets for each of the "ports" ...
2862 	 */
2863 	n = 0;
2864 	for_each_port(adapter, i) {
2865 		pi = adap2pinfo(adapter, i);
2866 		pi->first_qset = n;
2867 		n += pi->nqsets;
2868 	}
2869 }
2870 
2871 /*
2872  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2873  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2874  * need.  Minimally we need one for every Virtual Interface plus those needed
2875  * for our "extras".  Note that this process may lower the maximum number of
2876  * allowed Queue Sets ...
2877  */
2878 static int enable_msix(struct adapter *adapter)
2879 {
2880 	int i, want, need, nqsets;
2881 	struct msix_entry entries[MSIX_ENTRIES];
2882 	struct sge *s = &adapter->sge;
2883 
2884 	for (i = 0; i < MSIX_ENTRIES; ++i)
2885 		entries[i].entry = i;
2886 
2887 	/*
2888 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2889 	 * plus those needed for our "extras" (for example, the firmware
2890 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2891 	 * Interface plus those needed for our "extras".  So now we get to see
2892 	 * if the song is right ...
2893 	 */
2894 	want = s->max_ethqsets + MSIX_EXTRAS;
2895 	need = adapter->params.nports + MSIX_EXTRAS;
2896 
2897 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2898 	if (want < 0)
2899 		return want;
2900 
2901 	nqsets = want - MSIX_EXTRAS;
2902 	if (nqsets < s->max_ethqsets) {
2903 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2904 			 " for %d Queue Sets\n", nqsets);
2905 		s->max_ethqsets = nqsets;
2906 		if (nqsets < s->ethqsets)
2907 			reduce_ethqs(adapter, nqsets);
2908 	}
2909 	for (i = 0; i < want; ++i)
2910 		adapter->msix_info[i].vec = entries[i].vector;
2911 
2912 	return 0;
2913 }
2914 
2915 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2916 	.ndo_open		= cxgb4vf_open,
2917 	.ndo_stop		= cxgb4vf_stop,
2918 	.ndo_start_xmit		= t4vf_eth_xmit,
2919 	.ndo_get_stats		= cxgb4vf_get_stats,
2920 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2921 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2922 	.ndo_validate_addr	= eth_validate_addr,
2923 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2924 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2925 	.ndo_fix_features	= cxgb4vf_fix_features,
2926 	.ndo_set_features	= cxgb4vf_set_features,
2927 #ifdef CONFIG_NET_POLL_CONTROLLER
2928 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2929 #endif
2930 };
2931 
2932 /*
2933  * "Probe" a device: initialize a device and construct all kernel and driver
2934  * state needed to manage the device.  This routine is called "init_one" in
2935  * the PF Driver ...
2936  */
2937 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2938 			     const struct pci_device_id *ent)
2939 {
2940 	int pci_using_dac;
2941 	int err, pidx;
2942 	unsigned int pmask;
2943 	struct adapter *adapter;
2944 	struct port_info *pi;
2945 	struct net_device *netdev;
2946 	unsigned int pf;
2947 
2948 	/*
2949 	 * Print our driver banner the first time we're called to initialize a
2950 	 * device.
2951 	 */
2952 	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2953 
2954 	/*
2955 	 * Initialize generic PCI device state.
2956 	 */
2957 	err = pci_enable_device(pdev);
2958 	if (err) {
2959 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2960 		return err;
2961 	}
2962 
2963 	/*
2964 	 * Reserve PCI resources for the device.  If we can't get them some
2965 	 * other driver may have already claimed the device ...
2966 	 */
2967 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2968 	if (err) {
2969 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2970 		goto err_disable_device;
2971 	}
2972 
2973 	/*
2974 	 * Set up our DMA mask: try for 64-bit address masking first and
2975 	 * fall back to 32-bit if we can't get 64 bits ...
2976 	 */
2977 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2978 	if (err == 0) {
2979 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2980 		if (err) {
2981 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2982 				" coherent allocations\n");
2983 			goto err_release_regions;
2984 		}
2985 		pci_using_dac = 1;
2986 	} else {
2987 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2988 		if (err != 0) {
2989 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2990 			goto err_release_regions;
2991 		}
2992 		pci_using_dac = 0;
2993 	}
2994 
2995 	/*
2996 	 * Enable bus mastering for the device ...
2997 	 */
2998 	pci_set_master(pdev);
2999 
3000 	/*
3001 	 * Allocate our adapter data structure and attach it to the device.
3002 	 */
3003 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
3004 	if (!adapter) {
3005 		err = -ENOMEM;
3006 		goto err_release_regions;
3007 	}
3008 	pci_set_drvdata(pdev, adapter);
3009 	adapter->pdev = pdev;
3010 	adapter->pdev_dev = &pdev->dev;
3011 
3012 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
3013 				    (sizeof(struct mbox_cmd) *
3014 				     T4VF_OS_LOG_MBOX_CMDS),
3015 				    GFP_KERNEL);
3016 	if (!adapter->mbox_log) {
3017 		err = -ENOMEM;
3018 		goto err_free_adapter;
3019 	}
3020 	adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
3021 
3022 	/*
3023 	 * Initialize SMP data synchronization resources.
3024 	 */
3025 	spin_lock_init(&adapter->stats_lock);
3026 	spin_lock_init(&adapter->mbox_lock);
3027 	INIT_LIST_HEAD(&adapter->mlist.list);
3028 
3029 	/*
3030 	 * Map our I/O registers in BAR0.
3031 	 */
3032 	adapter->regs = pci_ioremap_bar(pdev, 0);
3033 	if (!adapter->regs) {
3034 		dev_err(&pdev->dev, "cannot map device registers\n");
3035 		err = -ENOMEM;
3036 		goto err_free_adapter;
3037 	}
3038 
3039 	/* Wait for the device to become ready before proceeding ...
3040 	 */
3041 	err = t4vf_prep_adapter(adapter);
3042 	if (err) {
3043 		dev_err(adapter->pdev_dev, "device didn't become ready:"
3044 			" err=%d\n", err);
3045 		goto err_unmap_bar0;
3046 	}
3047 
3048 	/* For T5 and later we want to use the new BAR-based User Doorbells,
3049 	 * so we need to map BAR2 here ...
3050 	 */
3051 	if (!is_t4(adapter->params.chip)) {
3052 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
3053 					   pci_resource_len(pdev, 2));
3054 		if (!adapter->bar2) {
3055 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
3056 			err = -ENOMEM;
3057 			goto err_unmap_bar0;
3058 		}
3059 	}
3060 	/*
3061 	 * Initialize adapter level features.
3062 	 */
3063 	adapter->name = pci_name(pdev);
3064 	adapter->msg_enable = DFLT_MSG_ENABLE;
3065 
3066 	/* If possible, we use PCIe Relaxed Ordering Attribute to deliver
3067 	 * Ingress Packet Data to Free List Buffers in order to allow for
3068 	 * chipset performance optimizations between the Root Complex and
3069 	 * Memory Controllers.  (Messages to the associated Ingress Queue
3070 	 * notifying new Packet Placement in the Free Lists Buffers will be
3071 	 * send without the Relaxed Ordering Attribute thus guaranteeing that
3072 	 * all preceding PCIe Transaction Layer Packets will be processed
3073 	 * first.)  But some Root Complexes have various issues with Upstream
3074 	 * Transaction Layer Packets with the Relaxed Ordering Attribute set.
3075 	 * The PCIe devices which under the Root Complexes will be cleared the
3076 	 * Relaxed Ordering bit in the configuration space, So we check our
3077 	 * PCIe configuration space to see if it's flagged with advice against
3078 	 * using Relaxed Ordering.
3079 	 */
3080 	if (!pcie_relaxed_ordering_enabled(pdev))
3081 		adapter->flags |= ROOT_NO_RELAXED_ORDERING;
3082 
3083 	err = adap_init0(adapter);
3084 	if (err)
3085 		goto err_unmap_bar;
3086 
3087 	/* Initialize hash mac addr list */
3088 	INIT_LIST_HEAD(&adapter->mac_hlist);
3089 
3090 	/*
3091 	 * Allocate our "adapter ports" and stitch everything together.
3092 	 */
3093 	pmask = adapter->params.vfres.pmask;
3094 	pf = t4vf_get_pf_from_vf(adapter);
3095 	for_each_port(adapter, pidx) {
3096 		int port_id, viid;
3097 		u8 mac[ETH_ALEN];
3098 		unsigned int naddr = 1;
3099 
3100 		/*
3101 		 * We simplistically allocate our virtual interfaces
3102 		 * sequentially across the port numbers to which we have
3103 		 * access rights.  This should be configurable in some manner
3104 		 * ...
3105 		 */
3106 		if (pmask == 0)
3107 			break;
3108 		port_id = ffs(pmask) - 1;
3109 		pmask &= ~(1 << port_id);
3110 		viid = t4vf_alloc_vi(adapter, port_id);
3111 		if (viid < 0) {
3112 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
3113 				" err=%d\n", port_id, viid);
3114 			err = viid;
3115 			goto err_free_dev;
3116 		}
3117 
3118 		/*
3119 		 * Allocate our network device and stitch things together.
3120 		 */
3121 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
3122 					   MAX_PORT_QSETS);
3123 		if (netdev == NULL) {
3124 			t4vf_free_vi(adapter, viid);
3125 			err = -ENOMEM;
3126 			goto err_free_dev;
3127 		}
3128 		adapter->port[pidx] = netdev;
3129 		SET_NETDEV_DEV(netdev, &pdev->dev);
3130 		pi = netdev_priv(netdev);
3131 		pi->adapter = adapter;
3132 		pi->pidx = pidx;
3133 		pi->port_id = port_id;
3134 		pi->viid = viid;
3135 
3136 		/*
3137 		 * Initialize the starting state of our "port" and register
3138 		 * it.
3139 		 */
3140 		pi->xact_addr_filt = -1;
3141 		netif_carrier_off(netdev);
3142 		netdev->irq = pdev->irq;
3143 
3144 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
3145 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3146 			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
3147 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
3148 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3149 			NETIF_F_HIGHDMA;
3150 		netdev->features = netdev->hw_features |
3151 				   NETIF_F_HW_VLAN_CTAG_TX;
3152 		if (pci_using_dac)
3153 			netdev->features |= NETIF_F_HIGHDMA;
3154 
3155 		netdev->priv_flags |= IFF_UNICAST_FLT;
3156 		netdev->min_mtu = 81;
3157 		netdev->max_mtu = ETH_MAX_MTU;
3158 
3159 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
3160 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3161 		netdev->dev_port = pi->port_id;
3162 
3163 		/*
3164 		 * Initialize the hardware/software state for the port.
3165 		 */
3166 		err = t4vf_port_init(adapter, pidx);
3167 		if (err) {
3168 			dev_err(&pdev->dev, "cannot initialize port %d\n",
3169 				pidx);
3170 			goto err_free_dev;
3171 		}
3172 
3173 		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3174 		if (err) {
3175 			dev_err(&pdev->dev,
3176 				"unable to determine MAC ACL address, "
3177 				"continuing anyway.. (status %d)\n", err);
3178 		} else if (naddr && adapter->params.vfres.nvi == 1) {
3179 			struct sockaddr addr;
3180 
3181 			ether_addr_copy(addr.sa_data, mac);
3182 			err = cxgb4vf_set_mac_addr(netdev, &addr);
3183 			if (err) {
3184 				dev_err(&pdev->dev,
3185 					"unable to set MAC address %pM\n",
3186 					mac);
3187 				goto err_free_dev;
3188 			}
3189 			dev_info(&pdev->dev,
3190 				 "Using assigned MAC ACL: %pM\n", mac);
3191 		}
3192 	}
3193 
3194 	/* See what interrupts we'll be using.  If we've been configured to
3195 	 * use MSI-X interrupts, try to enable them but fall back to using
3196 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3197 	 * get MSI interrupts we bail with the error.
3198 	 */
3199 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3200 		adapter->flags |= USING_MSIX;
3201 	else {
3202 		if (msi == MSI_MSIX) {
3203 			dev_info(adapter->pdev_dev,
3204 				 "Unable to use MSI-X Interrupts; falling "
3205 				 "back to MSI Interrupts\n");
3206 
3207 			/* We're going to need a Forwarded Interrupt Queue so
3208 			 * that may cut into how many Queue Sets we can
3209 			 * support.
3210 			 */
3211 			msi = MSI_MSI;
3212 			size_nports_qsets(adapter);
3213 		}
3214 		err = pci_enable_msi(pdev);
3215 		if (err) {
3216 			dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3217 				" err=%d\n", err);
3218 			goto err_free_dev;
3219 		}
3220 		adapter->flags |= USING_MSI;
3221 	}
3222 
3223 	/* Now that we know how many "ports" we have and what interrupt
3224 	 * mechanism we're going to use, we can configure our queue resources.
3225 	 */
3226 	cfg_queues(adapter);
3227 
3228 	/*
3229 	 * The "card" is now ready to go.  If any errors occur during device
3230 	 * registration we do not fail the whole "card" but rather proceed
3231 	 * only with the ports we manage to register successfully.  However we
3232 	 * must register at least one net device.
3233 	 */
3234 	for_each_port(adapter, pidx) {
3235 		struct port_info *pi = netdev_priv(adapter->port[pidx]);
3236 		netdev = adapter->port[pidx];
3237 		if (netdev == NULL)
3238 			continue;
3239 
3240 		netif_set_real_num_tx_queues(netdev, pi->nqsets);
3241 		netif_set_real_num_rx_queues(netdev, pi->nqsets);
3242 
3243 		err = register_netdev(netdev);
3244 		if (err) {
3245 			dev_warn(&pdev->dev, "cannot register net device %s,"
3246 				 " skipping\n", netdev->name);
3247 			continue;
3248 		}
3249 
3250 		set_bit(pidx, &adapter->registered_device_map);
3251 	}
3252 	if (adapter->registered_device_map == 0) {
3253 		dev_err(&pdev->dev, "could not register any net devices\n");
3254 		goto err_disable_interrupts;
3255 	}
3256 
3257 	/*
3258 	 * Set up our debugfs entries.
3259 	 */
3260 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3261 		adapter->debugfs_root =
3262 			debugfs_create_dir(pci_name(pdev),
3263 					   cxgb4vf_debugfs_root);
3264 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
3265 			dev_warn(&pdev->dev, "could not create debugfs"
3266 				 " directory");
3267 		else
3268 			setup_debugfs(adapter);
3269 	}
3270 
3271 	/*
3272 	 * Print a short notice on the existence and configuration of the new
3273 	 * VF network device ...
3274 	 */
3275 	for_each_port(adapter, pidx) {
3276 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3277 			 adapter->port[pidx]->name,
3278 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
3279 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
3280 	}
3281 
3282 	/*
3283 	 * Return success!
3284 	 */
3285 	return 0;
3286 
3287 	/*
3288 	 * Error recovery and exit code.  Unwind state that's been created
3289 	 * so far and return the error.
3290 	 */
3291 err_disable_interrupts:
3292 	if (adapter->flags & USING_MSIX) {
3293 		pci_disable_msix(adapter->pdev);
3294 		adapter->flags &= ~USING_MSIX;
3295 	} else if (adapter->flags & USING_MSI) {
3296 		pci_disable_msi(adapter->pdev);
3297 		adapter->flags &= ~USING_MSI;
3298 	}
3299 
3300 err_free_dev:
3301 	for_each_port(adapter, pidx) {
3302 		netdev = adapter->port[pidx];
3303 		if (netdev == NULL)
3304 			continue;
3305 		pi = netdev_priv(netdev);
3306 		t4vf_free_vi(adapter, pi->viid);
3307 		if (test_bit(pidx, &adapter->registered_device_map))
3308 			unregister_netdev(netdev);
3309 		free_netdev(netdev);
3310 	}
3311 
3312 err_unmap_bar:
3313 	if (!is_t4(adapter->params.chip))
3314 		iounmap(adapter->bar2);
3315 
3316 err_unmap_bar0:
3317 	iounmap(adapter->regs);
3318 
3319 err_free_adapter:
3320 	kfree(adapter->mbox_log);
3321 	kfree(adapter);
3322 
3323 err_release_regions:
3324 	pci_release_regions(pdev);
3325 	pci_clear_master(pdev);
3326 
3327 err_disable_device:
3328 	pci_disable_device(pdev);
3329 
3330 	return err;
3331 }
3332 
3333 /*
3334  * "Remove" a device: tear down all kernel and driver state created in the
3335  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3336  * that this is called "remove_one" in the PF Driver.)
3337  */
3338 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3339 {
3340 	struct adapter *adapter = pci_get_drvdata(pdev);
3341 	struct hash_mac_addr *entry, *tmp;
3342 
3343 	/*
3344 	 * Tear down driver state associated with device.
3345 	 */
3346 	if (adapter) {
3347 		int pidx;
3348 
3349 		/*
3350 		 * Stop all of our activity.  Unregister network port,
3351 		 * disable interrupts, etc.
3352 		 */
3353 		for_each_port(adapter, pidx)
3354 			if (test_bit(pidx, &adapter->registered_device_map))
3355 				unregister_netdev(adapter->port[pidx]);
3356 		t4vf_sge_stop(adapter);
3357 		if (adapter->flags & USING_MSIX) {
3358 			pci_disable_msix(adapter->pdev);
3359 			adapter->flags &= ~USING_MSIX;
3360 		} else if (adapter->flags & USING_MSI) {
3361 			pci_disable_msi(adapter->pdev);
3362 			adapter->flags &= ~USING_MSI;
3363 		}
3364 
3365 		/*
3366 		 * Tear down our debugfs entries.
3367 		 */
3368 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3369 			cleanup_debugfs(adapter);
3370 			debugfs_remove_recursive(adapter->debugfs_root);
3371 		}
3372 
3373 		/*
3374 		 * Free all of the various resources which we've acquired ...
3375 		 */
3376 		t4vf_free_sge_resources(adapter);
3377 		for_each_port(adapter, pidx) {
3378 			struct net_device *netdev = adapter->port[pidx];
3379 			struct port_info *pi;
3380 
3381 			if (netdev == NULL)
3382 				continue;
3383 
3384 			pi = netdev_priv(netdev);
3385 			t4vf_free_vi(adapter, pi->viid);
3386 			free_netdev(netdev);
3387 		}
3388 		iounmap(adapter->regs);
3389 		if (!is_t4(adapter->params.chip))
3390 			iounmap(adapter->bar2);
3391 		kfree(adapter->mbox_log);
3392 		list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
3393 					 list) {
3394 			list_del(&entry->list);
3395 			kfree(entry);
3396 		}
3397 		kfree(adapter);
3398 	}
3399 
3400 	/*
3401 	 * Disable the device and release its PCI resources.
3402 	 */
3403 	pci_disable_device(pdev);
3404 	pci_clear_master(pdev);
3405 	pci_release_regions(pdev);
3406 }
3407 
3408 /*
3409  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3410  * delivery.
3411  */
3412 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3413 {
3414 	struct adapter *adapter;
3415 	int pidx;
3416 
3417 	adapter = pci_get_drvdata(pdev);
3418 	if (!adapter)
3419 		return;
3420 
3421 	/* Disable all Virtual Interfaces.  This will shut down the
3422 	 * delivery of all ingress packets into the chip for these
3423 	 * Virtual Interfaces.
3424 	 */
3425 	for_each_port(adapter, pidx)
3426 		if (test_bit(pidx, &adapter->registered_device_map))
3427 			unregister_netdev(adapter->port[pidx]);
3428 
3429 	/* Free up all Queues which will prevent further DMA and
3430 	 * Interrupts allowing various internal pathways to drain.
3431 	 */
3432 	t4vf_sge_stop(adapter);
3433 	if (adapter->flags & USING_MSIX) {
3434 		pci_disable_msix(adapter->pdev);
3435 		adapter->flags &= ~USING_MSIX;
3436 	} else if (adapter->flags & USING_MSI) {
3437 		pci_disable_msi(adapter->pdev);
3438 		adapter->flags &= ~USING_MSI;
3439 	}
3440 
3441 	/*
3442 	 * Free up all Queues which will prevent further DMA and
3443 	 * Interrupts allowing various internal pathways to drain.
3444 	 */
3445 	t4vf_free_sge_resources(adapter);
3446 	pci_set_drvdata(pdev, NULL);
3447 }
3448 
3449 /* Macros needed to support the PCI Device ID Table ...
3450  */
3451 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3452 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3453 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3454 
3455 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3456 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3457 
3458 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3459 
3460 #include "../cxgb4/t4_pci_id_tbl.h"
3461 
3462 MODULE_DESCRIPTION(DRV_DESC);
3463 MODULE_AUTHOR("Chelsio Communications");
3464 MODULE_LICENSE("Dual BSD/GPL");
3465 MODULE_VERSION(DRV_VERSION);
3466 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3467 
3468 static struct pci_driver cxgb4vf_driver = {
3469 	.name		= KBUILD_MODNAME,
3470 	.id_table	= cxgb4vf_pci_tbl,
3471 	.probe		= cxgb4vf_pci_probe,
3472 	.remove		= cxgb4vf_pci_remove,
3473 	.shutdown	= cxgb4vf_pci_shutdown,
3474 };
3475 
3476 /*
3477  * Initialize global driver state.
3478  */
3479 static int __init cxgb4vf_module_init(void)
3480 {
3481 	int ret;
3482 
3483 	/*
3484 	 * Vet our module parameters.
3485 	 */
3486 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3487 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3488 			msi, MSI_MSIX, MSI_MSI);
3489 		return -EINVAL;
3490 	}
3491 
3492 	/* Debugfs support is optional, just warn if this fails */
3493 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3494 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3495 		pr_warn("could not create debugfs entry, continuing\n");
3496 
3497 	ret = pci_register_driver(&cxgb4vf_driver);
3498 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3499 		debugfs_remove(cxgb4vf_debugfs_root);
3500 	return ret;
3501 }
3502 
3503 /*
3504  * Tear down global driver state.
3505  */
3506 static void __exit cxgb4vf_module_exit(void)
3507 {
3508 	pci_unregister_driver(&cxgb4vf_driver);
3509 	debugfs_remove(cxgb4vf_debugfs_root);
3510 }
3511 
3512 module_init(cxgb4vf_module_init);
3513 module_exit(cxgb4vf_module_exit);
3514