xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 3ef51c5fb9163f2aafb1c14729e06a8bf0c4d113)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	DEVMETHOD_END
133 };
134 
135 static driver_t cxgb_controller_driver = {
136 	"cxgbc",
137 	cxgb_controller_methods,
138 	sizeof(struct adapter)
139 };
140 
141 static devclass_t	cxgb_controller_devclass;
142 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
143 
144 /*
145  * Attachment glue for the ports.  Attachment is done directly to the
146  * controller device.
147  */
148 static int cxgb_port_probe(device_t);
149 static int cxgb_port_attach(device_t);
150 static int cxgb_port_detach(device_t);
151 
152 static device_method_t cxgb_port_methods[] = {
153 	DEVMETHOD(device_probe,		cxgb_port_probe),
154 	DEVMETHOD(device_attach,	cxgb_port_attach),
155 	DEVMETHOD(device_detach,	cxgb_port_detach),
156 	{ 0, 0 }
157 };
158 
159 static driver_t cxgb_port_driver = {
160 	"cxgb",
161 	cxgb_port_methods,
162 	0
163 };
164 
165 static d_ioctl_t cxgb_extension_ioctl;
166 static d_open_t cxgb_extension_open;
167 static d_close_t cxgb_extension_close;
168 
169 static struct cdevsw cxgb_cdevsw = {
170        .d_version =    D_VERSION,
171        .d_flags =      0,
172        .d_open =       cxgb_extension_open,
173        .d_close =      cxgb_extension_close,
174        .d_ioctl =      cxgb_extension_ioctl,
175        .d_name =       "cxgb",
176 };
177 
178 static devclass_t	cxgb_port_devclass;
179 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
180 
181 /*
182  * The driver uses the best interrupt scheme available on a platform in the
183  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
184  * of these schemes the driver may consider as follows:
185  *
186  * msi = 2: choose from among all three options
187  * msi = 1 : only consider MSI and pin interrupts
188  * msi = 0: force pin interrupts
189  */
190 static int msi_allowed = 2;
191 
192 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
193 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
194 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
195     "MSI-X, MSI, INTx selector");
196 
197 /*
198  * The driver enables offload as a default.
199  * To disable it, use ofld_disable = 1.
200  */
201 static int ofld_disable = 0;
202 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
203 SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
204     "disable ULP offload");
205 
206 /*
207  * The driver uses an auto-queue algorithm by default.
208  * To disable it and force a single queue-set per port, use multiq = 0
209  */
210 static int multiq = 1;
211 TUNABLE_INT("hw.cxgb.multiq", &multiq);
212 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
213     "use min(ncpus/ports, 8) queue-sets per port");
214 
215 /*
216  * By default the driver will not update the firmware unless
217  * it was compiled against a newer version
218  *
219  */
220 static int force_fw_update = 0;
221 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
222 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
223     "update firmware even if up to date");
224 
225 int cxgb_use_16k_clusters = -1;
226 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
227 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
228     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
229 
230 static int nfilters = -1;
231 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
232 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
233     &nfilters, 0, "max number of entries in the filter table");
234 
235 enum {
236 	MAX_TXQ_ENTRIES      = 16384,
237 	MAX_CTRL_TXQ_ENTRIES = 1024,
238 	MAX_RSPQ_ENTRIES     = 16384,
239 	MAX_RX_BUFFERS       = 16384,
240 	MAX_RX_JUMBO_BUFFERS = 16384,
241 	MIN_TXQ_ENTRIES      = 4,
242 	MIN_CTRL_TXQ_ENTRIES = 4,
243 	MIN_RSPQ_ENTRIES     = 32,
244 	MIN_FL_ENTRIES       = 32,
245 	MIN_FL_JUMBO_ENTRIES = 32
246 };
247 
248 struct filter_info {
249 	u32 sip;
250 	u32 sip_mask;
251 	u32 dip;
252 	u16 sport;
253 	u16 dport;
254 	u32 vlan:12;
255 	u32 vlan_prio:3;
256 	u32 mac_hit:1;
257 	u32 mac_idx:4;
258 	u32 mac_vld:1;
259 	u32 pkt_type:2;
260 	u32 report_filter_id:1;
261 	u32 pass:1;
262 	u32 rss:1;
263 	u32 qset:3;
264 	u32 locked:1;
265 	u32 valid:1;
266 };
267 
268 enum { FILTER_NO_VLAN_PRI = 7 };
269 
270 #define EEPROM_MAGIC 0x38E2F10C
271 
272 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
273 
274 /* Table for probing the cards.  The desc field isn't actually used */
275 struct cxgb_ident {
276 	uint16_t	vendor;
277 	uint16_t	device;
278 	int		index;
279 	char		*desc;
280 } cxgb_identifiers[] = {
281 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
282 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
283 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
286 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
288 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
289 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
295 	{0, 0, 0, NULL}
296 };
297 
298 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
299 
300 
301 static __inline char
302 t3rev2char(struct adapter *adapter)
303 {
304 	char rev = 'z';
305 
306 	switch(adapter->params.rev) {
307 	case T3_REV_A:
308 		rev = 'a';
309 		break;
310 	case T3_REV_B:
311 	case T3_REV_B2:
312 		rev = 'b';
313 		break;
314 	case T3_REV_C:
315 		rev = 'c';
316 		break;
317 	}
318 	return rev;
319 }
320 
321 static struct cxgb_ident *
322 cxgb_get_ident(device_t dev)
323 {
324 	struct cxgb_ident *id;
325 
326 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
327 		if ((id->vendor == pci_get_vendor(dev)) &&
328 		    (id->device == pci_get_device(dev))) {
329 			return (id);
330 		}
331 	}
332 	return (NULL);
333 }
334 
335 static const struct adapter_info *
336 cxgb_get_adapter_info(device_t dev)
337 {
338 	struct cxgb_ident *id;
339 	const struct adapter_info *ai;
340 
341 	id = cxgb_get_ident(dev);
342 	if (id == NULL)
343 		return (NULL);
344 
345 	ai = t3_get_adapter_info(id->index);
346 
347 	return (ai);
348 }
349 
350 static int
351 cxgb_controller_probe(device_t dev)
352 {
353 	const struct adapter_info *ai;
354 	char *ports, buf[80];
355 	int nports;
356 
357 	ai = cxgb_get_adapter_info(dev);
358 	if (ai == NULL)
359 		return (ENXIO);
360 
361 	nports = ai->nports0 + ai->nports1;
362 	if (nports == 1)
363 		ports = "port";
364 	else
365 		ports = "ports";
366 
367 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
368 	device_set_desc_copy(dev, buf);
369 	return (BUS_PROBE_DEFAULT);
370 }
371 
372 #define FW_FNAME "cxgb_t3fw"
373 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
374 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
375 
376 static int
377 upgrade_fw(adapter_t *sc)
378 {
379 	const struct firmware *fw;
380 	int status;
381 	u32 vers;
382 
383 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
384 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
385 		return (ENOENT);
386 	} else
387 		device_printf(sc->dev, "installing firmware on card\n");
388 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
389 
390 	if (status != 0) {
391 		device_printf(sc->dev, "failed to install firmware: %d\n",
392 		    status);
393 	} else {
394 		t3_get_fw_version(sc, &vers);
395 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
396 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
397 		    G_FW_VERSION_MICRO(vers));
398 	}
399 
400 	firmware_put(fw, FIRMWARE_UNLOAD);
401 
402 	return (status);
403 }
404 
405 /*
406  * The cxgb_controller_attach function is responsible for the initial
407  * bringup of the device.  Its responsibilities include:
408  *
409  *  1. Determine if the device supports MSI or MSI-X.
410  *  2. Allocate bus resources so that we can access the Base Address Register
411  *  3. Create and initialize mutexes for the controller and its control
412  *     logic such as SGE and MDIO.
413  *  4. Call hardware specific setup routine for the adapter as a whole.
414  *  5. Allocate the BAR for doing MSI-X.
415  *  6. Setup the line interrupt iff MSI-X is not supported.
416  *  7. Create the driver's taskq.
417  *  8. Start one task queue service thread.
418  *  9. Check if the firmware and SRAM are up-to-date.  They will be
419  *     auto-updated later (before FULL_INIT_DONE), if required.
420  * 10. Create a child device for each MAC (port)
421  * 11. Initialize T3 private state.
422  * 12. Trigger the LED
423  * 13. Setup offload iff supported.
424  * 14. Reset/restart the tick callout.
425  * 15. Attach sysctls
426  *
427  * NOTE: Any modification or deviation from this list MUST be reflected in
428  * the above comment.  Failure to do so will result in problems on various
429  * error conditions including link flapping.
430  */
431 static int
432 cxgb_controller_attach(device_t dev)
433 {
434 	device_t child;
435 	const struct adapter_info *ai;
436 	struct adapter *sc;
437 	int i, error = 0;
438 	uint32_t vers;
439 	int port_qsets = 1;
440 	int msi_needed, reg;
441 	char buf[80];
442 
443 	sc = device_get_softc(dev);
444 	sc->dev = dev;
445 	sc->msi_count = 0;
446 	ai = cxgb_get_adapter_info(dev);
447 
448 	/* find the PCIe link width and set max read request to 4KB*/
449 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
450 		uint16_t lnk;
451 
452 		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
453 		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
454 		if (sc->link_width < 8 &&
455 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
456 			device_printf(sc->dev,
457 			    "PCIe x%d Link, expect reduced performance\n",
458 			    sc->link_width);
459 		}
460 
461 		pci_set_max_read_req(dev, 4096);
462 	}
463 
464 	touch_bars(dev);
465 	pci_enable_busmaster(dev);
466 	/*
467 	 * Allocate the registers and make them available to the driver.
468 	 * The registers that we care about for NIC mode are in BAR 0
469 	 */
470 	sc->regs_rid = PCIR_BAR(0);
471 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
472 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
473 		device_printf(dev, "Cannot allocate BAR region 0\n");
474 		return (ENXIO);
475 	}
476 
477 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
478 	    device_get_unit(dev));
479 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
480 
481 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
482 	    device_get_unit(dev));
483 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
484 	    device_get_unit(dev));
485 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
486 	    device_get_unit(dev));
487 
488 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
489 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
490 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
491 
492 	sc->bt = rman_get_bustag(sc->regs_res);
493 	sc->bh = rman_get_bushandle(sc->regs_res);
494 	sc->mmio_len = rman_get_size(sc->regs_res);
495 
496 	for (i = 0; i < MAX_NPORTS; i++)
497 		sc->port[i].adapter = sc;
498 
499 	if (t3_prep_adapter(sc, ai, 1) < 0) {
500 		printf("prep adapter failed\n");
501 		error = ENODEV;
502 		goto out;
503 	}
504 
505 	sc->udbs_rid = PCIR_BAR(2);
506 	sc->udbs_res = NULL;
507 	if (is_offload(sc) &&
508 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
509 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
510 		device_printf(dev, "Cannot allocate BAR region 1\n");
511 		error = ENXIO;
512 		goto out;
513 	}
514 
515         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
516 	 * enough messages for the queue sets.  If that fails, try falling
517 	 * back to MSI.  If that fails, then try falling back to the legacy
518 	 * interrupt pin model.
519 	 */
520 	sc->msix_regs_rid = 0x20;
521 	if ((msi_allowed >= 2) &&
522 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
523 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
524 
525 		if (multiq)
526 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
527 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
528 
529 		if (pci_msix_count(dev) == 0 ||
530 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
531 		    sc->msi_count != msi_needed) {
532 			device_printf(dev, "alloc msix failed - "
533 				      "msi_count=%d, msi_needed=%d, err=%d; "
534 				      "will try MSI\n", sc->msi_count,
535 				      msi_needed, error);
536 			sc->msi_count = 0;
537 			port_qsets = 1;
538 			pci_release_msi(dev);
539 			bus_release_resource(dev, SYS_RES_MEMORY,
540 			    sc->msix_regs_rid, sc->msix_regs_res);
541 			sc->msix_regs_res = NULL;
542 		} else {
543 			sc->flags |= USING_MSIX;
544 			sc->cxgb_intr = cxgb_async_intr;
545 			device_printf(dev,
546 				      "using MSI-X interrupts (%u vectors)\n",
547 				      sc->msi_count);
548 		}
549 	}
550 
551 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
552 		sc->msi_count = 1;
553 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
554 			device_printf(dev, "alloc msi failed - "
555 				      "err=%d; will try INTx\n", error);
556 			sc->msi_count = 0;
557 			port_qsets = 1;
558 			pci_release_msi(dev);
559 		} else {
560 			sc->flags |= USING_MSI;
561 			sc->cxgb_intr = t3_intr_msi;
562 			device_printf(dev, "using MSI interrupts\n");
563 		}
564 	}
565 	if (sc->msi_count == 0) {
566 		device_printf(dev, "using line interrupts\n");
567 		sc->cxgb_intr = t3b_intr;
568 	}
569 
570 	/* Create a private taskqueue thread for handling driver events */
571 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
572 	    taskqueue_thread_enqueue, &sc->tq);
573 	if (sc->tq == NULL) {
574 		device_printf(dev, "failed to allocate controller task queue\n");
575 		goto out;
576 	}
577 
578 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
579 	    device_get_nameunit(dev));
580 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
581 
582 
583 	/* Create a periodic callout for checking adapter status */
584 	callout_init(&sc->cxgb_tick_ch, TRUE);
585 
586 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
587 		/*
588 		 * Warn user that a firmware update will be attempted in init.
589 		 */
590 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
591 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
592 		sc->flags &= ~FW_UPTODATE;
593 	} else {
594 		sc->flags |= FW_UPTODATE;
595 	}
596 
597 	if (t3_check_tpsram_version(sc) < 0) {
598 		/*
599 		 * Warn user that a firmware update will be attempted in init.
600 		 */
601 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
602 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
603 		sc->flags &= ~TPS_UPTODATE;
604 	} else {
605 		sc->flags |= TPS_UPTODATE;
606 	}
607 
608 	/*
609 	 * Create a child device for each MAC.  The ethernet attachment
610 	 * will be done in these children.
611 	 */
612 	for (i = 0; i < (sc)->params.nports; i++) {
613 		struct port_info *pi;
614 
615 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
616 			device_printf(dev, "failed to add child port\n");
617 			error = EINVAL;
618 			goto out;
619 		}
620 		pi = &sc->port[i];
621 		pi->adapter = sc;
622 		pi->nqsets = port_qsets;
623 		pi->first_qset = i*port_qsets;
624 		pi->port_id = i;
625 		pi->tx_chan = i >= ai->nports0;
626 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
627 		sc->rxpkt_map[pi->txpkt_intf] = i;
628 		sc->port[i].tx_chan = i >= ai->nports0;
629 		sc->portdev[i] = child;
630 		device_set_softc(child, pi);
631 	}
632 	if ((error = bus_generic_attach(dev)) != 0)
633 		goto out;
634 
635 	/* initialize sge private state */
636 	t3_sge_init_adapter(sc);
637 
638 	t3_led_ready(sc);
639 
640 	cxgb_offload_init();
641 	if (is_offload(sc)) {
642 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
643 		cxgb_adapter_ofld(sc);
644         }
645 	error = t3_get_fw_version(sc, &vers);
646 	if (error)
647 		goto out;
648 
649 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
650 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
651 	    G_FW_VERSION_MICRO(vers));
652 
653 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
654 		 ai->desc, is_offload(sc) ? "R" : "",
655 		 sc->params.vpd.ec, sc->params.vpd.sn);
656 	device_set_desc_copy(dev, buf);
657 
658 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
659 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
660 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
661 
662 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
663 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
664 	t3_add_attach_sysctls(sc);
665 out:
666 	if (error)
667 		cxgb_free(sc);
668 
669 	return (error);
670 }
671 
672 /*
673  * The cxgb_controller_detach routine is called with the device is
674  * unloaded from the system.
675  */
676 
677 static int
678 cxgb_controller_detach(device_t dev)
679 {
680 	struct adapter *sc;
681 
682 	sc = device_get_softc(dev);
683 
684 	cxgb_free(sc);
685 
686 	return (0);
687 }
688 
689 /*
690  * The cxgb_free() is called by the cxgb_controller_detach() routine
691  * to tear down the structures that were built up in
692  * cxgb_controller_attach(), and should be the final piece of work
693  * done when fully unloading the driver.
694  *
695  *
696  *  1. Shutting down the threads started by the cxgb_controller_attach()
697  *     routine.
698  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
699  *  3. Detaching all of the port devices created during the
700  *     cxgb_controller_attach() routine.
701  *  4. Removing the device children created via cxgb_controller_attach().
702  *  5. Releasing PCI resources associated with the device.
703  *  6. Turning off the offload support, iff it was turned on.
704  *  7. Destroying the mutexes created in cxgb_controller_attach().
705  *
706  */
707 static void
708 cxgb_free(struct adapter *sc)
709 {
710 	int i, nqsets = 0;
711 
712 	ADAPTER_LOCK(sc);
713 	sc->flags |= CXGB_SHUTDOWN;
714 	ADAPTER_UNLOCK(sc);
715 
716 	/*
717 	 * Make sure all child devices are gone.
718 	 */
719 	bus_generic_detach(sc->dev);
720 	for (i = 0; i < (sc)->params.nports; i++) {
721 		if (sc->portdev[i] &&
722 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
723 			device_printf(sc->dev, "failed to delete child port\n");
724 		nqsets += sc->port[i].nqsets;
725 	}
726 
727 	/*
728 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
729 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
730 	 * all open devices have been closed.
731 	 */
732 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
733 					   __func__, sc->open_device_map));
734 	for (i = 0; i < sc->params.nports; i++) {
735 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
736 						  __func__, i));
737 	}
738 
739 	/*
740 	 * Finish off the adapter's callouts.
741 	 */
742 	callout_drain(&sc->cxgb_tick_ch);
743 	callout_drain(&sc->sge_timer_ch);
744 
745 	/*
746 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
747 	 * sysctls are cleaned up by the kernel linker.
748 	 */
749 	if (sc->flags & FULL_INIT_DONE) {
750  		t3_free_sge_resources(sc, nqsets);
751  		sc->flags &= ~FULL_INIT_DONE;
752  	}
753 
754 	/*
755 	 * Release all interrupt resources.
756 	 */
757 	cxgb_teardown_interrupts(sc);
758 	if (sc->flags & (USING_MSI | USING_MSIX)) {
759 		device_printf(sc->dev, "releasing msi message(s)\n");
760 		pci_release_msi(sc->dev);
761 	} else {
762 		device_printf(sc->dev, "no msi message to release\n");
763 	}
764 
765 	if (sc->msix_regs_res != NULL) {
766 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
767 		    sc->msix_regs_res);
768 	}
769 
770 	/*
771 	 * Free the adapter's taskqueue.
772 	 */
773 	if (sc->tq != NULL) {
774 		taskqueue_free(sc->tq);
775 		sc->tq = NULL;
776 	}
777 
778 	if (is_offload(sc)) {
779 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
780 		cxgb_adapter_unofld(sc);
781 	}
782 
783 #ifdef notyet
784 	if (sc->flags & CXGB_OFLD_INIT)
785 		cxgb_offload_deactivate(sc);
786 #endif
787 	free(sc->filters, M_DEVBUF);
788 	t3_sge_free(sc);
789 
790 	cxgb_offload_exit();
791 
792 	if (sc->udbs_res != NULL)
793 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
794 		    sc->udbs_res);
795 
796 	if (sc->regs_res != NULL)
797 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
798 		    sc->regs_res);
799 
800 	MTX_DESTROY(&sc->mdio_lock);
801 	MTX_DESTROY(&sc->sge.reg_lock);
802 	MTX_DESTROY(&sc->elmer_lock);
803 	ADAPTER_LOCK_DEINIT(sc);
804 }
805 
806 /**
807  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
808  *	@sc: the controller softc
809  *
810  *	Determines how many sets of SGE queues to use and initializes them.
811  *	We support multiple queue sets per port if we have MSI-X, otherwise
812  *	just one queue set per port.
813  */
814 static int
815 setup_sge_qsets(adapter_t *sc)
816 {
817 	int i, j, err, irq_idx = 0, qset_idx = 0;
818 	u_int ntxq = SGE_TXQ_PER_SET;
819 
820 	if ((err = t3_sge_alloc(sc)) != 0) {
821 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
822 		return (err);
823 	}
824 
825 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
826 		irq_idx = -1;
827 
828 	for (i = 0; i < (sc)->params.nports; i++) {
829 		struct port_info *pi = &sc->port[i];
830 
831 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
832 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
833 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
834 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
835 			if (err) {
836 				t3_free_sge_resources(sc, qset_idx);
837 				device_printf(sc->dev,
838 				    "t3_sge_alloc_qset failed with %d\n", err);
839 				return (err);
840 			}
841 		}
842 	}
843 
844 	return (0);
845 }
846 
847 static void
848 cxgb_teardown_interrupts(adapter_t *sc)
849 {
850 	int i;
851 
852 	for (i = 0; i < SGE_QSETS; i++) {
853 		if (sc->msix_intr_tag[i] == NULL) {
854 
855 			/* Should have been setup fully or not at all */
856 			KASSERT(sc->msix_irq_res[i] == NULL &&
857 				sc->msix_irq_rid[i] == 0,
858 				("%s: half-done interrupt (%d).", __func__, i));
859 
860 			continue;
861 		}
862 
863 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
864 				  sc->msix_intr_tag[i]);
865 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
866 				     sc->msix_irq_res[i]);
867 
868 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
869 		sc->msix_irq_rid[i] = 0;
870 	}
871 
872 	if (sc->intr_tag) {
873 		KASSERT(sc->irq_res != NULL,
874 			("%s: half-done interrupt.", __func__));
875 
876 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
877 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
878 				     sc->irq_res);
879 
880 		sc->irq_res = sc->intr_tag = NULL;
881 		sc->irq_rid = 0;
882 	}
883 }
884 
885 static int
886 cxgb_setup_interrupts(adapter_t *sc)
887 {
888 	struct resource *res;
889 	void *tag;
890 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
891 
892 	sc->irq_rid = intr_flag ? 1 : 0;
893 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
894 					     RF_SHAREABLE | RF_ACTIVE);
895 	if (sc->irq_res == NULL) {
896 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
897 			      intr_flag, sc->irq_rid);
898 		err = EINVAL;
899 		sc->irq_rid = 0;
900 	} else {
901 		err = bus_setup_intr(sc->dev, sc->irq_res,
902 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
903 		    sc->cxgb_intr, sc, &sc->intr_tag);
904 
905 		if (err) {
906 			device_printf(sc->dev,
907 				      "Cannot set up interrupt (%x, %u, %d)\n",
908 				      intr_flag, sc->irq_rid, err);
909 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
910 					     sc->irq_res);
911 			sc->irq_res = sc->intr_tag = NULL;
912 			sc->irq_rid = 0;
913 		}
914 	}
915 
916 	/* That's all for INTx or MSI */
917 	if (!(intr_flag & USING_MSIX) || err)
918 		return (err);
919 
920 	for (i = 0; i < sc->msi_count - 1; i++) {
921 		rid = i + 2;
922 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
923 					     RF_SHAREABLE | RF_ACTIVE);
924 		if (res == NULL) {
925 			device_printf(sc->dev, "Cannot allocate interrupt "
926 				      "for message %d\n", rid);
927 			err = EINVAL;
928 			break;
929 		}
930 
931 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
932 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
933 		if (err) {
934 			device_printf(sc->dev, "Cannot set up interrupt "
935 				      "for message %d (%d)\n", rid, err);
936 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
937 			break;
938 		}
939 
940 		sc->msix_irq_rid[i] = rid;
941 		sc->msix_irq_res[i] = res;
942 		sc->msix_intr_tag[i] = tag;
943 	}
944 
945 	if (err)
946 		cxgb_teardown_interrupts(sc);
947 
948 	return (err);
949 }
950 
951 
952 static int
953 cxgb_port_probe(device_t dev)
954 {
955 	struct port_info *p;
956 	char buf[80];
957 	const char *desc;
958 
959 	p = device_get_softc(dev);
960 	desc = p->phy.desc;
961 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
962 	device_set_desc_copy(dev, buf);
963 	return (0);
964 }
965 
966 
967 static int
968 cxgb_makedev(struct port_info *pi)
969 {
970 
971 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
972 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
973 
974 	if (pi->port_cdev == NULL)
975 		return (ENOMEM);
976 
977 	pi->port_cdev->si_drv1 = (void *)pi;
978 
979 	return (0);
980 }
981 
982 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
983     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
984     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
985 #define CXGB_CAP_ENABLE CXGB_CAP
986 
987 static int
988 cxgb_port_attach(device_t dev)
989 {
990 	struct port_info *p;
991 	struct ifnet *ifp;
992 	int err;
993 	struct adapter *sc;
994 
995 	p = device_get_softc(dev);
996 	sc = p->adapter;
997 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
998 	    device_get_unit(device_get_parent(dev)), p->port_id);
999 	PORT_LOCK_INIT(p, p->lockbuf);
1000 
1001 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1002 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1003 
1004 	/* Allocate an ifnet object and set it up */
1005 	ifp = p->ifp = if_alloc(IFT_ETHER);
1006 	if (ifp == NULL) {
1007 		device_printf(dev, "Cannot allocate ifnet\n");
1008 		return (ENOMEM);
1009 	}
1010 
1011 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1012 	ifp->if_init = cxgb_init;
1013 	ifp->if_softc = p;
1014 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1015 	ifp->if_ioctl = cxgb_ioctl;
1016 	ifp->if_transmit = cxgb_transmit;
1017 	ifp->if_qflush = cxgb_qflush;
1018 
1019 	ifp->if_capabilities = CXGB_CAP;
1020 	ifp->if_capenable = CXGB_CAP_ENABLE;
1021 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1022 
1023 	/*
1024 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1025 	 */
1026 	if (sc->params.nports > 2) {
1027 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1028 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1029 		ifp->if_hwassist &= ~CSUM_TSO;
1030 	}
1031 
1032 	ether_ifattach(ifp, p->hw_addr);
1033 
1034 #ifdef DEFAULT_JUMBO
1035 	if (sc->params.nports <= 2)
1036 		ifp->if_mtu = ETHERMTU_JUMBO;
1037 #endif
1038 	if ((err = cxgb_makedev(p)) != 0) {
1039 		printf("makedev failed %d\n", err);
1040 		return (err);
1041 	}
1042 
1043 	/* Create a list of media supported by this port */
1044 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1045 	    cxgb_media_status);
1046 	cxgb_build_medialist(p);
1047 
1048 	t3_sge_init_port(p);
1049 
1050 	return (err);
1051 }
1052 
1053 /*
1054  * cxgb_port_detach() is called via the device_detach methods when
1055  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1056  * removing the device from the view of the kernel, i.e. from all
1057  * interfaces lists etc.  This routine is only called when the driver is
1058  * being unloaded, not when the link goes down.
1059  */
1060 static int
1061 cxgb_port_detach(device_t dev)
1062 {
1063 	struct port_info *p;
1064 	struct adapter *sc;
1065 	int i;
1066 
1067 	p = device_get_softc(dev);
1068 	sc = p->adapter;
1069 
1070 	/* Tell cxgb_ioctl and if_init that the port is going away */
1071 	ADAPTER_LOCK(sc);
1072 	SET_DOOMED(p);
1073 	wakeup(&sc->flags);
1074 	while (IS_BUSY(sc))
1075 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1076 	SET_BUSY(sc);
1077 	ADAPTER_UNLOCK(sc);
1078 
1079 	if (p->port_cdev != NULL)
1080 		destroy_dev(p->port_cdev);
1081 
1082 	cxgb_uninit_synchronized(p);
1083 	ether_ifdetach(p->ifp);
1084 
1085 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1086 		struct sge_qset *qs = &sc->sge.qs[i];
1087 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1088 
1089 		callout_drain(&txq->txq_watchdog);
1090 		callout_drain(&txq->txq_timer);
1091 	}
1092 
1093 	PORT_LOCK_DEINIT(p);
1094 	if_free(p->ifp);
1095 	p->ifp = NULL;
1096 
1097 	ADAPTER_LOCK(sc);
1098 	CLR_BUSY(sc);
1099 	wakeup_one(&sc->flags);
1100 	ADAPTER_UNLOCK(sc);
1101 	return (0);
1102 }
1103 
1104 void
1105 t3_fatal_err(struct adapter *sc)
1106 {
1107 	u_int fw_status[4];
1108 
1109 	if (sc->flags & FULL_INIT_DONE) {
1110 		t3_sge_stop(sc);
1111 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1112 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1113 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1114 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1115 		t3_intr_disable(sc);
1116 	}
1117 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1118 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1119 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1120 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1121 }
1122 
1123 int
1124 t3_os_find_pci_capability(adapter_t *sc, int cap)
1125 {
1126 	device_t dev;
1127 	struct pci_devinfo *dinfo;
1128 	pcicfgregs *cfg;
1129 	uint32_t status;
1130 	uint8_t ptr;
1131 
1132 	dev = sc->dev;
1133 	dinfo = device_get_ivars(dev);
1134 	cfg = &dinfo->cfg;
1135 
1136 	status = pci_read_config(dev, PCIR_STATUS, 2);
1137 	if (!(status & PCIM_STATUS_CAPPRESENT))
1138 		return (0);
1139 
1140 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1141 	case 0:
1142 	case 1:
1143 		ptr = PCIR_CAP_PTR;
1144 		break;
1145 	case 2:
1146 		ptr = PCIR_CAP_PTR_2;
1147 		break;
1148 	default:
1149 		return (0);
1150 		break;
1151 	}
1152 	ptr = pci_read_config(dev, ptr, 1);
1153 
1154 	while (ptr != 0) {
1155 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1156 			return (ptr);
1157 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1158 	}
1159 
1160 	return (0);
1161 }
1162 
1163 int
1164 t3_os_pci_save_state(struct adapter *sc)
1165 {
1166 	device_t dev;
1167 	struct pci_devinfo *dinfo;
1168 
1169 	dev = sc->dev;
1170 	dinfo = device_get_ivars(dev);
1171 
1172 	pci_cfg_save(dev, dinfo, 0);
1173 	return (0);
1174 }
1175 
1176 int
1177 t3_os_pci_restore_state(struct adapter *sc)
1178 {
1179 	device_t dev;
1180 	struct pci_devinfo *dinfo;
1181 
1182 	dev = sc->dev;
1183 	dinfo = device_get_ivars(dev);
1184 
1185 	pci_cfg_restore(dev, dinfo);
1186 	return (0);
1187 }
1188 
1189 /**
1190  *	t3_os_link_changed - handle link status changes
1191  *	@sc: the adapter associated with the link change
1192  *	@port_id: the port index whose link status has changed
1193  *	@link_status: the new status of the link
1194  *	@speed: the new speed setting
1195  *	@duplex: the new duplex setting
1196  *	@fc: the new flow-control setting
1197  *
1198  *	This is the OS-dependent handler for link status changes.  The OS
1199  *	neutral handler takes care of most of the processing for these events,
1200  *	then calls this handler for any OS-specific processing.
1201  */
1202 void
1203 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1204      int duplex, int fc, int mac_was_reset)
1205 {
1206 	struct port_info *pi = &adapter->port[port_id];
1207 	struct ifnet *ifp = pi->ifp;
1208 
1209 	/* no race with detach, so ifp should always be good */
1210 	KASSERT(ifp, ("%s: if detached.", __func__));
1211 
1212 	/* Reapply mac settings if they were lost due to a reset */
1213 	if (mac_was_reset) {
1214 		PORT_LOCK(pi);
1215 		cxgb_update_mac_settings(pi);
1216 		PORT_UNLOCK(pi);
1217 	}
1218 
1219 	if (link_status) {
1220 		ifp->if_baudrate = IF_Mbps(speed);
1221 		if_link_state_change(ifp, LINK_STATE_UP);
1222 	} else
1223 		if_link_state_change(ifp, LINK_STATE_DOWN);
1224 }
1225 
1226 /**
1227  *	t3_os_phymod_changed - handle PHY module changes
1228  *	@phy: the PHY reporting the module change
1229  *	@mod_type: new module type
1230  *
1231  *	This is the OS-dependent handler for PHY module changes.  It is
1232  *	invoked when a PHY module is removed or inserted for any OS-specific
1233  *	processing.
1234  */
1235 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1236 {
1237 	static const char *mod_str[] = {
1238 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1239 	};
1240 	struct port_info *pi = &adap->port[port_id];
1241 	int mod = pi->phy.modtype;
1242 
1243 	if (mod != pi->media.ifm_cur->ifm_data)
1244 		cxgb_build_medialist(pi);
1245 
1246 	if (mod == phy_modtype_none)
1247 		if_printf(pi->ifp, "PHY module unplugged\n");
1248 	else {
1249 		KASSERT(mod < ARRAY_SIZE(mod_str),
1250 			("invalid PHY module type %d", mod));
1251 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1252 	}
1253 }
1254 
1255 void
1256 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1257 {
1258 
1259 	/*
1260 	 * The ifnet might not be allocated before this gets called,
1261 	 * as this is called early on in attach by t3_prep_adapter
1262 	 * save the address off in the port structure
1263 	 */
1264 	if (cxgb_debug)
1265 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1266 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1267 }
1268 
1269 /*
1270  * Programs the XGMAC based on the settings in the ifnet.  These settings
1271  * include MTU, MAC address, mcast addresses, etc.
1272  */
1273 static void
1274 cxgb_update_mac_settings(struct port_info *p)
1275 {
1276 	struct ifnet *ifp = p->ifp;
1277 	struct t3_rx_mode rm;
1278 	struct cmac *mac = &p->mac;
1279 	int mtu, hwtagging;
1280 
1281 	PORT_LOCK_ASSERT_OWNED(p);
1282 
1283 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1284 
1285 	mtu = ifp->if_mtu;
1286 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1287 		mtu += ETHER_VLAN_ENCAP_LEN;
1288 
1289 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1290 
1291 	t3_mac_set_mtu(mac, mtu);
1292 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1293 	t3_mac_set_address(mac, 0, p->hw_addr);
1294 	t3_init_rx_mode(&rm, p);
1295 	t3_mac_set_rx_mode(mac, &rm);
1296 }
1297 
1298 
1299 static int
1300 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1301 			      unsigned long n)
1302 {
1303 	int attempts = 5;
1304 
1305 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1306 		if (!--attempts)
1307 			return (ETIMEDOUT);
1308 		t3_os_sleep(10);
1309 	}
1310 	return 0;
1311 }
1312 
1313 static int
1314 init_tp_parity(struct adapter *adap)
1315 {
1316 	int i;
1317 	struct mbuf *m;
1318 	struct cpl_set_tcb_field *greq;
1319 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1320 
1321 	t3_tp_set_offload_mode(adap, 1);
1322 
1323 	for (i = 0; i < 16; i++) {
1324 		struct cpl_smt_write_req *req;
1325 
1326 		m = m_gethdr(M_WAITOK, MT_DATA);
1327 		req = mtod(m, struct cpl_smt_write_req *);
1328 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1329 		memset(req, 0, sizeof(*req));
1330 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1331 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1332 		req->iff = i;
1333 		t3_mgmt_tx(adap, m);
1334 	}
1335 
1336 	for (i = 0; i < 2048; i++) {
1337 		struct cpl_l2t_write_req *req;
1338 
1339 		m = m_gethdr(M_WAITOK, MT_DATA);
1340 		req = mtod(m, struct cpl_l2t_write_req *);
1341 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1342 		memset(req, 0, sizeof(*req));
1343 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1344 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1345 		req->params = htonl(V_L2T_W_IDX(i));
1346 		t3_mgmt_tx(adap, m);
1347 	}
1348 
1349 	for (i = 0; i < 2048; i++) {
1350 		struct cpl_rte_write_req *req;
1351 
1352 		m = m_gethdr(M_WAITOK, MT_DATA);
1353 		req = mtod(m, struct cpl_rte_write_req *);
1354 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1355 		memset(req, 0, sizeof(*req));
1356 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1357 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1358 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1359 		t3_mgmt_tx(adap, m);
1360 	}
1361 
1362 	m = m_gethdr(M_WAITOK, MT_DATA);
1363 	greq = mtod(m, struct cpl_set_tcb_field *);
1364 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1365 	memset(greq, 0, sizeof(*greq));
1366 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1367 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1368 	greq->mask = htobe64(1);
1369 	t3_mgmt_tx(adap, m);
1370 
1371 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1372 	t3_tp_set_offload_mode(adap, 0);
1373 	return (i);
1374 }
1375 
1376 /**
1377  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1378  *	@adap: the adapter
1379  *
1380  *	Sets up RSS to distribute packets to multiple receive queues.  We
1381  *	configure the RSS CPU lookup table to distribute to the number of HW
1382  *	receive queues, and the response queue lookup table to narrow that
1383  *	down to the response queues actually configured for each port.
1384  *	We always configure the RSS mapping for two ports since the mapping
1385  *	table has plenty of entries.
1386  */
1387 static void
1388 setup_rss(adapter_t *adap)
1389 {
1390 	int i;
1391 	u_int nq[2];
1392 	uint8_t cpus[SGE_QSETS + 1];
1393 	uint16_t rspq_map[RSS_TABLE_SIZE];
1394 
1395 	for (i = 0; i < SGE_QSETS; ++i)
1396 		cpus[i] = i;
1397 	cpus[SGE_QSETS] = 0xff;
1398 
1399 	nq[0] = nq[1] = 0;
1400 	for_each_port(adap, i) {
1401 		const struct port_info *pi = adap2pinfo(adap, i);
1402 
1403 		nq[pi->tx_chan] += pi->nqsets;
1404 	}
1405 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1406 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1407 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1408 	}
1409 
1410 	/* Calculate the reverse RSS map table */
1411 	for (i = 0; i < SGE_QSETS; ++i)
1412 		adap->rrss_map[i] = 0xff;
1413 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1414 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1415 			adap->rrss_map[rspq_map[i]] = i;
1416 
1417 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1418 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1419 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1420 	              cpus, rspq_map);
1421 
1422 }
1423 
1424 /*
1425  * Sends an mbuf to an offload queue driver
1426  * after dealing with any active network taps.
1427  */
1428 static inline int
1429 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1430 {
1431 	int ret;
1432 
1433 	ret = t3_offload_tx(tdev, m);
1434 	return (ret);
1435 }
1436 
1437 static int
1438 write_smt_entry(struct adapter *adapter, int idx)
1439 {
1440 	struct port_info *pi = &adapter->port[idx];
1441 	struct cpl_smt_write_req *req;
1442 	struct mbuf *m;
1443 
1444 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1445 		return (ENOMEM);
1446 
1447 	req = mtod(m, struct cpl_smt_write_req *);
1448 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1449 
1450 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1451 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1452 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1453 	req->iff = idx;
1454 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1455 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1456 
1457 	m_set_priority(m, 1);
1458 
1459 	offload_tx(&adapter->tdev, m);
1460 
1461 	return (0);
1462 }
1463 
1464 static int
1465 init_smt(struct adapter *adapter)
1466 {
1467 	int i;
1468 
1469 	for_each_port(adapter, i)
1470 		write_smt_entry(adapter, i);
1471 	return 0;
1472 }
1473 
1474 static void
1475 init_port_mtus(adapter_t *adapter)
1476 {
1477 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1478 
1479 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1480 }
1481 
1482 static void
1483 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1484 			      int hi, int port)
1485 {
1486 	struct mbuf *m;
1487 	struct mngt_pktsched_wr *req;
1488 
1489 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1490 	if (m) {
1491 		req = mtod(m, struct mngt_pktsched_wr *);
1492 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1493 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1494 		req->sched = sched;
1495 		req->idx = qidx;
1496 		req->min = lo;
1497 		req->max = hi;
1498 		req->binding = port;
1499 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1500 		t3_mgmt_tx(adap, m);
1501 	}
1502 }
1503 
1504 static void
1505 bind_qsets(adapter_t *sc)
1506 {
1507 	int i, j;
1508 
1509 	for (i = 0; i < (sc)->params.nports; ++i) {
1510 		const struct port_info *pi = adap2pinfo(sc, i);
1511 
1512 		for (j = 0; j < pi->nqsets; ++j) {
1513 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1514 					  -1, pi->tx_chan);
1515 
1516 		}
1517 	}
1518 }
1519 
1520 static void
1521 update_tpeeprom(struct adapter *adap)
1522 {
1523 	const struct firmware *tpeeprom;
1524 
1525 	uint32_t version;
1526 	unsigned int major, minor;
1527 	int ret, len;
1528 	char rev, name[32];
1529 
1530 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1531 
1532 	major = G_TP_VERSION_MAJOR(version);
1533 	minor = G_TP_VERSION_MINOR(version);
1534 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1535 		return;
1536 
1537 	rev = t3rev2char(adap);
1538 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1539 
1540 	tpeeprom = firmware_get(name);
1541 	if (tpeeprom == NULL) {
1542 		device_printf(adap->dev,
1543 			      "could not load TP EEPROM: unable to load %s\n",
1544 			      name);
1545 		return;
1546 	}
1547 
1548 	len = tpeeprom->datasize - 4;
1549 
1550 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1551 	if (ret)
1552 		goto release_tpeeprom;
1553 
1554 	if (len != TP_SRAM_LEN) {
1555 		device_printf(adap->dev,
1556 			      "%s length is wrong len=%d expected=%d\n", name,
1557 			      len, TP_SRAM_LEN);
1558 		return;
1559 	}
1560 
1561 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1562 	    TP_SRAM_OFFSET);
1563 
1564 	if (!ret) {
1565 		device_printf(adap->dev,
1566 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1567 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1568 	} else
1569 		device_printf(adap->dev,
1570 			      "Protocol SRAM image update in EEPROM failed\n");
1571 
1572 release_tpeeprom:
1573 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1574 
1575 	return;
1576 }
1577 
1578 static int
1579 update_tpsram(struct adapter *adap)
1580 {
1581 	const struct firmware *tpsram;
1582 	int ret;
1583 	char rev, name[32];
1584 
1585 	rev = t3rev2char(adap);
1586 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1587 
1588 	update_tpeeprom(adap);
1589 
1590 	tpsram = firmware_get(name);
1591 	if (tpsram == NULL){
1592 		device_printf(adap->dev, "could not load TP SRAM\n");
1593 		return (EINVAL);
1594 	} else
1595 		device_printf(adap->dev, "updating TP SRAM\n");
1596 
1597 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1598 	if (ret)
1599 		goto release_tpsram;
1600 
1601 	ret = t3_set_proto_sram(adap, tpsram->data);
1602 	if (ret)
1603 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1604 
1605 release_tpsram:
1606 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1607 
1608 	return ret;
1609 }
1610 
1611 /**
1612  *	cxgb_up - enable the adapter
1613  *	@adap: adapter being enabled
1614  *
1615  *	Called when the first port is enabled, this function performs the
1616  *	actions necessary to make an adapter operational, such as completing
1617  *	the initialization of HW modules, and enabling interrupts.
1618  */
1619 static int
1620 cxgb_up(struct adapter *sc)
1621 {
1622 	int err = 0;
1623 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1624 
1625 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1626 					   __func__, sc->open_device_map));
1627 
1628 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1629 
1630 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1631 
1632 		if ((sc->flags & FW_UPTODATE) == 0)
1633 			if ((err = upgrade_fw(sc)))
1634 				goto out;
1635 
1636 		if ((sc->flags & TPS_UPTODATE) == 0)
1637 			if ((err = update_tpsram(sc)))
1638 				goto out;
1639 
1640 		if (is_offload(sc) && nfilters != 0) {
1641 			sc->params.mc5.nservers = 0;
1642 
1643 			if (nfilters < 0)
1644 				sc->params.mc5.nfilters = mxf;
1645 			else
1646 				sc->params.mc5.nfilters = min(nfilters, mxf);
1647 		}
1648 
1649 		err = t3_init_hw(sc, 0);
1650 		if (err)
1651 			goto out;
1652 
1653 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1654 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1655 
1656 		err = setup_sge_qsets(sc);
1657 		if (err)
1658 			goto out;
1659 
1660 		alloc_filters(sc);
1661 		setup_rss(sc);
1662 
1663 		t3_intr_clear(sc);
1664 		err = cxgb_setup_interrupts(sc);
1665 		if (err)
1666 			goto out;
1667 
1668 		t3_add_configured_sysctls(sc);
1669 		sc->flags |= FULL_INIT_DONE;
1670 	}
1671 
1672 	t3_intr_clear(sc);
1673 	t3_sge_start(sc);
1674 	t3_intr_enable(sc);
1675 
1676 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1677 	    is_offload(sc) && init_tp_parity(sc) == 0)
1678 		sc->flags |= TP_PARITY_INIT;
1679 
1680 	if (sc->flags & TP_PARITY_INIT) {
1681 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1682 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1683 	}
1684 
1685 	if (!(sc->flags & QUEUES_BOUND)) {
1686 		bind_qsets(sc);
1687 		setup_hw_filters(sc);
1688 		sc->flags |= QUEUES_BOUND;
1689 	}
1690 
1691 	t3_sge_reset_adapter(sc);
1692 out:
1693 	return (err);
1694 }
1695 
1696 /*
1697  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1698  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1699  * during controller_detach, not here.
1700  */
1701 static void
1702 cxgb_down(struct adapter *sc)
1703 {
1704 	t3_sge_stop(sc);
1705 	t3_intr_disable(sc);
1706 }
1707 
1708 static int
1709 offload_open(struct port_info *pi)
1710 {
1711 	struct adapter *sc = pi->adapter;
1712 	struct t3cdev *tdev = &sc->tdev;
1713 
1714 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1715 
1716 	t3_tp_set_offload_mode(sc, 1);
1717 	tdev->lldev = pi->ifp;
1718 	init_port_mtus(sc);
1719 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1720 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1721 	init_smt(sc);
1722 	cxgb_add_clients(tdev);
1723 
1724 	return (0);
1725 }
1726 
1727 static int
1728 offload_close(struct t3cdev *tdev)
1729 {
1730 	struct adapter *adapter = tdev2adap(tdev);
1731 
1732 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1733 		return (0);
1734 
1735 	/* Call back all registered clients */
1736 	cxgb_remove_clients(tdev);
1737 
1738 	tdev->lldev = NULL;
1739 	cxgb_set_dummy_ops(tdev);
1740 	t3_tp_set_offload_mode(adapter, 0);
1741 
1742 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1743 
1744 	return (0);
1745 }
1746 
1747 /*
1748  * if_init for cxgb ports.
1749  */
1750 static void
1751 cxgb_init(void *arg)
1752 {
1753 	struct port_info *p = arg;
1754 	struct adapter *sc = p->adapter;
1755 
1756 	ADAPTER_LOCK(sc);
1757 	cxgb_init_locked(p); /* releases adapter lock */
1758 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1759 }
1760 
1761 static int
1762 cxgb_init_locked(struct port_info *p)
1763 {
1764 	struct adapter *sc = p->adapter;
1765 	struct ifnet *ifp = p->ifp;
1766 	struct cmac *mac = &p->mac;
1767 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1768 
1769 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1770 
1771 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1772 		gave_up_lock = 1;
1773 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1774 			rc = EINTR;
1775 			goto done;
1776 		}
1777 	}
1778 	if (IS_DOOMED(p)) {
1779 		rc = ENXIO;
1780 		goto done;
1781 	}
1782 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1783 
1784 	/*
1785 	 * The code that runs during one-time adapter initialization can sleep
1786 	 * so it's important not to hold any locks across it.
1787 	 */
1788 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1789 
1790 	if (may_sleep) {
1791 		SET_BUSY(sc);
1792 		gave_up_lock = 1;
1793 		ADAPTER_UNLOCK(sc);
1794 	}
1795 
1796 	if (sc->open_device_map == 0) {
1797 		if ((rc = cxgb_up(sc)) != 0)
1798 			goto done;
1799 
1800 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1801 			log(LOG_WARNING,
1802 			    "Could not initialize offload capabilities\n");
1803 	}
1804 
1805 	PORT_LOCK(p);
1806 	if (isset(&sc->open_device_map, p->port_id) &&
1807 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1808 		PORT_UNLOCK(p);
1809 		goto done;
1810 	}
1811 	t3_port_intr_enable(sc, p->port_id);
1812 	if (!mac->multiport)
1813 		t3_mac_init(mac);
1814 	cxgb_update_mac_settings(p);
1815 	t3_link_start(&p->phy, mac, &p->link_config);
1816 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1817 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1818 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1819 	PORT_UNLOCK(p);
1820 
1821 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1822 		struct sge_qset *qs = &sc->sge.qs[i];
1823 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1824 
1825 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1826 				 txq->txq_watchdog.c_cpu);
1827 	}
1828 
1829 	/* all ok */
1830 	setbit(&sc->open_device_map, p->port_id);
1831 	callout_reset(&p->link_check_ch,
1832 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1833 	    link_check_callout, p);
1834 
1835 done:
1836 	if (may_sleep) {
1837 		ADAPTER_LOCK(sc);
1838 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1839 		CLR_BUSY(sc);
1840 	}
1841 	if (gave_up_lock)
1842 		wakeup_one(&sc->flags);
1843 	ADAPTER_UNLOCK(sc);
1844 	return (rc);
1845 }
1846 
1847 static int
1848 cxgb_uninit_locked(struct port_info *p)
1849 {
1850 	struct adapter *sc = p->adapter;
1851 	int rc;
1852 
1853 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1854 
1855 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1856 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1857 			rc = EINTR;
1858 			goto done;
1859 		}
1860 	}
1861 	if (IS_DOOMED(p)) {
1862 		rc = ENXIO;
1863 		goto done;
1864 	}
1865 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1866 	SET_BUSY(sc);
1867 	ADAPTER_UNLOCK(sc);
1868 
1869 	rc = cxgb_uninit_synchronized(p);
1870 
1871 	ADAPTER_LOCK(sc);
1872 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1873 	CLR_BUSY(sc);
1874 	wakeup_one(&sc->flags);
1875 done:
1876 	ADAPTER_UNLOCK(sc);
1877 	return (rc);
1878 }
1879 
1880 /*
1881  * Called on "ifconfig down", and from port_detach
1882  */
1883 static int
1884 cxgb_uninit_synchronized(struct port_info *pi)
1885 {
1886 	struct adapter *sc = pi->adapter;
1887 	struct ifnet *ifp = pi->ifp;
1888 
1889 	/*
1890 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1891 	 */
1892 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1893 
1894 	/*
1895 	 * Clear this port's bit from the open device map, and then drain all
1896 	 * the tasks that can access/manipulate this port's port_info or ifp.
1897 	 * We disable this port's interrupts here and so the slow/ext
1898 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1899 	 * be enqueued every second but the runs after this drain will not see
1900 	 * this port in the open device map.
1901 	 *
1902 	 * A well behaved task must take open_device_map into account and ignore
1903 	 * ports that are not open.
1904 	 */
1905 	clrbit(&sc->open_device_map, pi->port_id);
1906 	t3_port_intr_disable(sc, pi->port_id);
1907 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1908 	taskqueue_drain(sc->tq, &sc->tick_task);
1909 
1910 	callout_drain(&pi->link_check_ch);
1911 	taskqueue_drain(sc->tq, &pi->link_check_task);
1912 
1913 	PORT_LOCK(pi);
1914 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1915 
1916 	/* disable pause frames */
1917 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1918 
1919 	/* Reset RX FIFO HWM */
1920 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1921 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1922 
1923 	DELAY(100 * 1000);
1924 
1925 	/* Wait for TXFIFO empty */
1926 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1927 			F_TXFIFO_EMPTY, 1, 20, 5);
1928 
1929 	DELAY(100 * 1000);
1930 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1931 
1932 
1933 	pi->phy.ops->power_down(&pi->phy, 1);
1934 
1935 	PORT_UNLOCK(pi);
1936 
1937 	pi->link_config.link_ok = 0;
1938 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1939 
1940 	if ((sc->open_device_map & PORT_MASK) == 0)
1941 		offload_close(&sc->tdev);
1942 
1943 	if (sc->open_device_map == 0)
1944 		cxgb_down(pi->adapter);
1945 
1946 	return (0);
1947 }
1948 
1949 /*
1950  * Mark lro enabled or disabled in all qsets for this port
1951  */
1952 static int
1953 cxgb_set_lro(struct port_info *p, int enabled)
1954 {
1955 	int i;
1956 	struct adapter *adp = p->adapter;
1957 	struct sge_qset *q;
1958 
1959 	for (i = 0; i < p->nqsets; i++) {
1960 		q = &adp->sge.qs[p->first_qset + i];
1961 		q->lro.enabled = (enabled != 0);
1962 	}
1963 	return (0);
1964 }
1965 
1966 static int
1967 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1968 {
1969 	struct port_info *p = ifp->if_softc;
1970 	struct adapter *sc = p->adapter;
1971 	struct ifreq *ifr = (struct ifreq *)data;
1972 	int flags, error = 0, mtu;
1973 	uint32_t mask;
1974 
1975 	switch (command) {
1976 	case SIOCSIFMTU:
1977 		ADAPTER_LOCK(sc);
1978 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1979 		if (error) {
1980 fail:
1981 			ADAPTER_UNLOCK(sc);
1982 			return (error);
1983 		}
1984 
1985 		mtu = ifr->ifr_mtu;
1986 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1987 			error = EINVAL;
1988 		} else {
1989 			ifp->if_mtu = mtu;
1990 			PORT_LOCK(p);
1991 			cxgb_update_mac_settings(p);
1992 			PORT_UNLOCK(p);
1993 		}
1994 		ADAPTER_UNLOCK(sc);
1995 		break;
1996 	case SIOCSIFFLAGS:
1997 		ADAPTER_LOCK(sc);
1998 		if (IS_DOOMED(p)) {
1999 			error = ENXIO;
2000 			goto fail;
2001 		}
2002 		if (ifp->if_flags & IFF_UP) {
2003 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2004 				flags = p->if_flags;
2005 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2006 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2007 					if (IS_BUSY(sc)) {
2008 						error = EBUSY;
2009 						goto fail;
2010 					}
2011 					PORT_LOCK(p);
2012 					cxgb_update_mac_settings(p);
2013 					PORT_UNLOCK(p);
2014 				}
2015 				ADAPTER_UNLOCK(sc);
2016 			} else
2017 				error = cxgb_init_locked(p);
2018 			p->if_flags = ifp->if_flags;
2019 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2020 			error = cxgb_uninit_locked(p);
2021 		else
2022 			ADAPTER_UNLOCK(sc);
2023 
2024 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2025 		break;
2026 	case SIOCADDMULTI:
2027 	case SIOCDELMULTI:
2028 		ADAPTER_LOCK(sc);
2029 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2030 		if (error)
2031 			goto fail;
2032 
2033 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2034 			PORT_LOCK(p);
2035 			cxgb_update_mac_settings(p);
2036 			PORT_UNLOCK(p);
2037 		}
2038 		ADAPTER_UNLOCK(sc);
2039 
2040 		break;
2041 	case SIOCSIFCAP:
2042 		ADAPTER_LOCK(sc);
2043 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044 		if (error)
2045 			goto fail;
2046 
2047 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2048 		if (mask & IFCAP_TXCSUM) {
2049 			ifp->if_capenable ^= IFCAP_TXCSUM;
2050 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2051 
2052 			if (IFCAP_TSO & ifp->if_capenable &&
2053 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2054 				ifp->if_capenable &= ~IFCAP_TSO;
2055 				ifp->if_hwassist &= ~CSUM_TSO;
2056 				if_printf(ifp,
2057 				    "tso disabled due to -txcsum.\n");
2058 			}
2059 		}
2060 		if (mask & IFCAP_RXCSUM)
2061 			ifp->if_capenable ^= IFCAP_RXCSUM;
2062 		if (mask & IFCAP_TSO) {
2063 			ifp->if_capenable ^= IFCAP_TSO;
2064 
2065 			if (IFCAP_TSO & ifp->if_capenable) {
2066 				if (IFCAP_TXCSUM & ifp->if_capenable)
2067 					ifp->if_hwassist |= CSUM_TSO;
2068 				else {
2069 					ifp->if_capenable &= ~IFCAP_TSO;
2070 					ifp->if_hwassist &= ~CSUM_TSO;
2071 					if_printf(ifp,
2072 					    "enable txcsum first.\n");
2073 					error = EAGAIN;
2074 				}
2075 			} else
2076 				ifp->if_hwassist &= ~CSUM_TSO;
2077 		}
2078 		if (mask & IFCAP_LRO) {
2079 			ifp->if_capenable ^= IFCAP_LRO;
2080 
2081 			/* Safe to do this even if cxgb_up not called yet */
2082 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2083 		}
2084 		if (mask & IFCAP_VLAN_HWTAGGING) {
2085 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2086 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2087 				PORT_LOCK(p);
2088 				cxgb_update_mac_settings(p);
2089 				PORT_UNLOCK(p);
2090 			}
2091 		}
2092 		if (mask & IFCAP_VLAN_MTU) {
2093 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2094 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2095 				PORT_LOCK(p);
2096 				cxgb_update_mac_settings(p);
2097 				PORT_UNLOCK(p);
2098 			}
2099 		}
2100 		if (mask & IFCAP_VLAN_HWTSO)
2101 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2102 		if (mask & IFCAP_VLAN_HWCSUM)
2103 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2104 
2105 #ifdef VLAN_CAPABILITIES
2106 		VLAN_CAPABILITIES(ifp);
2107 #endif
2108 		ADAPTER_UNLOCK(sc);
2109 		break;
2110 	case SIOCSIFMEDIA:
2111 	case SIOCGIFMEDIA:
2112 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2113 		break;
2114 	default:
2115 		error = ether_ioctl(ifp, command, data);
2116 	}
2117 
2118 	return (error);
2119 }
2120 
2121 static int
2122 cxgb_media_change(struct ifnet *ifp)
2123 {
2124 	return (EOPNOTSUPP);
2125 }
2126 
2127 /*
2128  * Translates phy->modtype to the correct Ethernet media subtype.
2129  */
2130 static int
2131 cxgb_ifm_type(int mod)
2132 {
2133 	switch (mod) {
2134 	case phy_modtype_sr:
2135 		return (IFM_10G_SR);
2136 	case phy_modtype_lr:
2137 		return (IFM_10G_LR);
2138 	case phy_modtype_lrm:
2139 		return (IFM_10G_LRM);
2140 	case phy_modtype_twinax:
2141 		return (IFM_10G_TWINAX);
2142 	case phy_modtype_twinax_long:
2143 		return (IFM_10G_TWINAX_LONG);
2144 	case phy_modtype_none:
2145 		return (IFM_NONE);
2146 	case phy_modtype_unknown:
2147 		return (IFM_UNKNOWN);
2148 	}
2149 
2150 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2151 	return (IFM_UNKNOWN);
2152 }
2153 
2154 /*
2155  * Rebuilds the ifmedia list for this port, and sets the current media.
2156  */
2157 static void
2158 cxgb_build_medialist(struct port_info *p)
2159 {
2160 	struct cphy *phy = &p->phy;
2161 	struct ifmedia *media = &p->media;
2162 	int mod = phy->modtype;
2163 	int m = IFM_ETHER | IFM_FDX;
2164 
2165 	PORT_LOCK(p);
2166 
2167 	ifmedia_removeall(media);
2168 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2169 		/* Copper (RJ45) */
2170 
2171 		if (phy->caps & SUPPORTED_10000baseT_Full)
2172 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2173 
2174 		if (phy->caps & SUPPORTED_1000baseT_Full)
2175 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2176 
2177 		if (phy->caps & SUPPORTED_100baseT_Full)
2178 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2179 
2180 		if (phy->caps & SUPPORTED_10baseT_Full)
2181 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2182 
2183 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2184 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2185 
2186 	} else if (phy->caps & SUPPORTED_TP) {
2187 		/* Copper (CX4) */
2188 
2189 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2190 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2191 
2192 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2193 		ifmedia_set(media, m | IFM_10G_CX4);
2194 
2195 	} else if (phy->caps & SUPPORTED_FIBRE &&
2196 		   phy->caps & SUPPORTED_10000baseT_Full) {
2197 		/* 10G optical (but includes SFP+ twinax) */
2198 
2199 		m |= cxgb_ifm_type(mod);
2200 		if (IFM_SUBTYPE(m) == IFM_NONE)
2201 			m &= ~IFM_FDX;
2202 
2203 		ifmedia_add(media, m, mod, NULL);
2204 		ifmedia_set(media, m);
2205 
2206 	} else if (phy->caps & SUPPORTED_FIBRE &&
2207 		   phy->caps & SUPPORTED_1000baseT_Full) {
2208 		/* 1G optical */
2209 
2210 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2211 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2212 		ifmedia_set(media, m | IFM_1000_SX);
2213 
2214 	} else {
2215 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2216 			    phy->caps));
2217 	}
2218 
2219 	PORT_UNLOCK(p);
2220 }
2221 
2222 static void
2223 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2224 {
2225 	struct port_info *p = ifp->if_softc;
2226 	struct ifmedia_entry *cur = p->media.ifm_cur;
2227 	int speed = p->link_config.speed;
2228 
2229 	if (cur->ifm_data != p->phy.modtype) {
2230 		cxgb_build_medialist(p);
2231 		cur = p->media.ifm_cur;
2232 	}
2233 
2234 	ifmr->ifm_status = IFM_AVALID;
2235 	if (!p->link_config.link_ok)
2236 		return;
2237 
2238 	ifmr->ifm_status |= IFM_ACTIVE;
2239 
2240 	/*
2241 	 * active and current will differ iff current media is autoselect.  That
2242 	 * can happen only for copper RJ45.
2243 	 */
2244 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2245 		return;
2246 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2247 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2248 
2249 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2250 	if (speed == SPEED_10000)
2251 		ifmr->ifm_active |= IFM_10G_T;
2252 	else if (speed == SPEED_1000)
2253 		ifmr->ifm_active |= IFM_1000_T;
2254 	else if (speed == SPEED_100)
2255 		ifmr->ifm_active |= IFM_100_TX;
2256 	else if (speed == SPEED_10)
2257 		ifmr->ifm_active |= IFM_10_T;
2258 	else
2259 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2260 			    speed));
2261 }
2262 
2263 static void
2264 cxgb_async_intr(void *data)
2265 {
2266 	adapter_t *sc = data;
2267 
2268 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2269 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2270 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2271 }
2272 
2273 static void
2274 link_check_callout(void *arg)
2275 {
2276 	struct port_info *pi = arg;
2277 	struct adapter *sc = pi->adapter;
2278 
2279 	if (!isset(&sc->open_device_map, pi->port_id))
2280 		return;
2281 
2282 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2283 }
2284 
2285 static void
2286 check_link_status(void *arg, int pending)
2287 {
2288 	struct port_info *pi = arg;
2289 	struct adapter *sc = pi->adapter;
2290 
2291 	if (!isset(&sc->open_device_map, pi->port_id))
2292 		return;
2293 
2294 	t3_link_changed(sc, pi->port_id);
2295 
2296 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2297 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2298 }
2299 
2300 void
2301 t3_os_link_intr(struct port_info *pi)
2302 {
2303 	/*
2304 	 * Schedule a link check in the near future.  If the link is flapping
2305 	 * rapidly we'll keep resetting the callout and delaying the check until
2306 	 * things stabilize a bit.
2307 	 */
2308 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2309 }
2310 
2311 static void
2312 check_t3b2_mac(struct adapter *sc)
2313 {
2314 	int i;
2315 
2316 	if (sc->flags & CXGB_SHUTDOWN)
2317 		return;
2318 
2319 	for_each_port(sc, i) {
2320 		struct port_info *p = &sc->port[i];
2321 		int status;
2322 #ifdef INVARIANTS
2323 		struct ifnet *ifp = p->ifp;
2324 #endif
2325 
2326 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2327 		    !p->link_config.link_ok)
2328 			continue;
2329 
2330 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2331 			("%s: state mismatch (drv_flags %x, device_map %x)",
2332 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2333 
2334 		PORT_LOCK(p);
2335 		status = t3b2_mac_watchdog_task(&p->mac);
2336 		if (status == 1)
2337 			p->mac.stats.num_toggled++;
2338 		else if (status == 2) {
2339 			struct cmac *mac = &p->mac;
2340 
2341 			cxgb_update_mac_settings(p);
2342 			t3_link_start(&p->phy, mac, &p->link_config);
2343 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2344 			t3_port_intr_enable(sc, p->port_id);
2345 			p->mac.stats.num_resets++;
2346 		}
2347 		PORT_UNLOCK(p);
2348 	}
2349 }
2350 
2351 static void
2352 cxgb_tick(void *arg)
2353 {
2354 	adapter_t *sc = (adapter_t *)arg;
2355 
2356 	if (sc->flags & CXGB_SHUTDOWN)
2357 		return;
2358 
2359 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2360 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2361 }
2362 
2363 static void
2364 cxgb_tick_handler(void *arg, int count)
2365 {
2366 	adapter_t *sc = (adapter_t *)arg;
2367 	const struct adapter_params *p = &sc->params;
2368 	int i;
2369 	uint32_t cause, reset;
2370 
2371 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2372 		return;
2373 
2374 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2375 		check_t3b2_mac(sc);
2376 
2377 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2378 	if (cause) {
2379 		struct sge_qset *qs = &sc->sge.qs[0];
2380 		uint32_t mask, v;
2381 
2382 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2383 
2384 		mask = 1;
2385 		for (i = 0; i < SGE_QSETS; i++) {
2386 			if (v & mask)
2387 				qs[i].rspq.starved++;
2388 			mask <<= 1;
2389 		}
2390 
2391 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2392 
2393 		for (i = 0; i < SGE_QSETS * 2; i++) {
2394 			if (v & mask) {
2395 				qs[i / 2].fl[i % 2].empty++;
2396 			}
2397 			mask <<= 1;
2398 		}
2399 
2400 		/* clear */
2401 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2402 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2403 	}
2404 
2405 	for (i = 0; i < sc->params.nports; i++) {
2406 		struct port_info *pi = &sc->port[i];
2407 		struct ifnet *ifp = pi->ifp;
2408 		struct cmac *mac = &pi->mac;
2409 		struct mac_stats *mstats = &mac->stats;
2410 		int drops, j;
2411 
2412 		if (!isset(&sc->open_device_map, pi->port_id))
2413 			continue;
2414 
2415 		PORT_LOCK(pi);
2416 		t3_mac_update_stats(mac);
2417 		PORT_UNLOCK(pi);
2418 
2419 		ifp->if_opackets = mstats->tx_frames;
2420 		ifp->if_ipackets = mstats->rx_frames;
2421 		ifp->if_obytes = mstats->tx_octets;
2422 		ifp->if_ibytes = mstats->rx_octets;
2423 		ifp->if_omcasts = mstats->tx_mcast_frames;
2424 		ifp->if_imcasts = mstats->rx_mcast_frames;
2425 		ifp->if_collisions = mstats->tx_total_collisions;
2426 		ifp->if_iqdrops = mstats->rx_cong_drops;
2427 
2428 		drops = 0;
2429 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2430 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2431 		ifp->if_snd.ifq_drops = drops;
2432 
2433 		ifp->if_oerrors =
2434 		    mstats->tx_excess_collisions +
2435 		    mstats->tx_underrun +
2436 		    mstats->tx_len_errs +
2437 		    mstats->tx_mac_internal_errs +
2438 		    mstats->tx_excess_deferral +
2439 		    mstats->tx_fcs_errs;
2440 		ifp->if_ierrors =
2441 		    mstats->rx_jabber +
2442 		    mstats->rx_data_errs +
2443 		    mstats->rx_sequence_errs +
2444 		    mstats->rx_runt +
2445 		    mstats->rx_too_long +
2446 		    mstats->rx_mac_internal_errs +
2447 		    mstats->rx_short +
2448 		    mstats->rx_fcs_errs;
2449 
2450 		if (mac->multiport)
2451 			continue;
2452 
2453 		/* Count rx fifo overflows, once per second */
2454 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2455 		reset = 0;
2456 		if (cause & F_RXFIFO_OVERFLOW) {
2457 			mac->stats.rx_fifo_ovfl++;
2458 			reset |= F_RXFIFO_OVERFLOW;
2459 		}
2460 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2461 	}
2462 }
2463 
2464 static void
2465 touch_bars(device_t dev)
2466 {
2467 	/*
2468 	 * Don't enable yet
2469 	 */
2470 #if !defined(__LP64__) && 0
2471 	u32 v;
2472 
2473 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2474 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2475 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2476 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2477 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2478 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2479 #endif
2480 }
2481 
2482 static int
2483 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2484 {
2485 	uint8_t *buf;
2486 	int err = 0;
2487 	u32 aligned_offset, aligned_len, *p;
2488 	struct adapter *adapter = pi->adapter;
2489 
2490 
2491 	aligned_offset = offset & ~3;
2492 	aligned_len = (len + (offset & 3) + 3) & ~3;
2493 
2494 	if (aligned_offset != offset || aligned_len != len) {
2495 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2496 		if (!buf)
2497 			return (ENOMEM);
2498 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2499 		if (!err && aligned_len > 4)
2500 			err = t3_seeprom_read(adapter,
2501 					      aligned_offset + aligned_len - 4,
2502 					      (u32 *)&buf[aligned_len - 4]);
2503 		if (err)
2504 			goto out;
2505 		memcpy(buf + (offset & 3), data, len);
2506 	} else
2507 		buf = (uint8_t *)(uintptr_t)data;
2508 
2509 	err = t3_seeprom_wp(adapter, 0);
2510 	if (err)
2511 		goto out;
2512 
2513 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2514 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2515 		aligned_offset += 4;
2516 	}
2517 
2518 	if (!err)
2519 		err = t3_seeprom_wp(adapter, 1);
2520 out:
2521 	if (buf != data)
2522 		free(buf, M_DEVBUF);
2523 	return err;
2524 }
2525 
2526 
2527 static int
2528 in_range(int val, int lo, int hi)
2529 {
2530 	return val < 0 || (val <= hi && val >= lo);
2531 }
2532 
2533 static int
2534 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2535 {
2536        return (0);
2537 }
2538 
2539 static int
2540 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2541 {
2542        return (0);
2543 }
2544 
2545 static int
2546 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2547     int fflag, struct thread *td)
2548 {
2549 	int mmd, error = 0;
2550 	struct port_info *pi = dev->si_drv1;
2551 	adapter_t *sc = pi->adapter;
2552 
2553 #ifdef PRIV_SUPPORTED
2554 	if (priv_check(td, PRIV_DRIVER)) {
2555 		if (cxgb_debug)
2556 			printf("user does not have access to privileged ioctls\n");
2557 		return (EPERM);
2558 	}
2559 #else
2560 	if (suser(td)) {
2561 		if (cxgb_debug)
2562 			printf("user does not have access to privileged ioctls\n");
2563 		return (EPERM);
2564 	}
2565 #endif
2566 
2567 	switch (cmd) {
2568 	case CHELSIO_GET_MIIREG: {
2569 		uint32_t val;
2570 		struct cphy *phy = &pi->phy;
2571 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2572 
2573 		if (!phy->mdio_read)
2574 			return (EOPNOTSUPP);
2575 		if (is_10G(sc)) {
2576 			mmd = mid->phy_id >> 8;
2577 			if (!mmd)
2578 				mmd = MDIO_DEV_PCS;
2579 			else if (mmd > MDIO_DEV_VEND2)
2580 				return (EINVAL);
2581 
2582 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2583 					     mid->reg_num, &val);
2584 		} else
2585 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2586 					     mid->reg_num & 0x1f, &val);
2587 		if (error == 0)
2588 			mid->val_out = val;
2589 		break;
2590 	}
2591 	case CHELSIO_SET_MIIREG: {
2592 		struct cphy *phy = &pi->phy;
2593 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2594 
2595 		if (!phy->mdio_write)
2596 			return (EOPNOTSUPP);
2597 		if (is_10G(sc)) {
2598 			mmd = mid->phy_id >> 8;
2599 			if (!mmd)
2600 				mmd = MDIO_DEV_PCS;
2601 			else if (mmd > MDIO_DEV_VEND2)
2602 				return (EINVAL);
2603 
2604 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2605 					      mmd, mid->reg_num, mid->val_in);
2606 		} else
2607 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2608 					      mid->reg_num & 0x1f,
2609 					      mid->val_in);
2610 		break;
2611 	}
2612 	case CHELSIO_SETREG: {
2613 		struct ch_reg *edata = (struct ch_reg *)data;
2614 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2615 			return (EFAULT);
2616 		t3_write_reg(sc, edata->addr, edata->val);
2617 		break;
2618 	}
2619 	case CHELSIO_GETREG: {
2620 		struct ch_reg *edata = (struct ch_reg *)data;
2621 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2622 			return (EFAULT);
2623 		edata->val = t3_read_reg(sc, edata->addr);
2624 		break;
2625 	}
2626 	case CHELSIO_GET_SGE_CONTEXT: {
2627 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2628 		mtx_lock_spin(&sc->sge.reg_lock);
2629 		switch (ecntxt->cntxt_type) {
2630 		case CNTXT_TYPE_EGRESS:
2631 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2632 			    ecntxt->data);
2633 			break;
2634 		case CNTXT_TYPE_FL:
2635 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2636 			    ecntxt->data);
2637 			break;
2638 		case CNTXT_TYPE_RSP:
2639 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2640 			    ecntxt->data);
2641 			break;
2642 		case CNTXT_TYPE_CQ:
2643 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2644 			    ecntxt->data);
2645 			break;
2646 		default:
2647 			error = EINVAL;
2648 			break;
2649 		}
2650 		mtx_unlock_spin(&sc->sge.reg_lock);
2651 		break;
2652 	}
2653 	case CHELSIO_GET_SGE_DESC: {
2654 		struct ch_desc *edesc = (struct ch_desc *)data;
2655 		int ret;
2656 		if (edesc->queue_num >= SGE_QSETS * 6)
2657 			return (EINVAL);
2658 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2659 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2660 		if (ret < 0)
2661 			return (EINVAL);
2662 		edesc->size = ret;
2663 		break;
2664 	}
2665 	case CHELSIO_GET_QSET_PARAMS: {
2666 		struct qset_params *q;
2667 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2668 		int q1 = pi->first_qset;
2669 		int nqsets = pi->nqsets;
2670 		int i;
2671 
2672 		if (t->qset_idx >= nqsets)
2673 			return EINVAL;
2674 
2675 		i = q1 + t->qset_idx;
2676 		q = &sc->params.sge.qset[i];
2677 		t->rspq_size   = q->rspq_size;
2678 		t->txq_size[0] = q->txq_size[0];
2679 		t->txq_size[1] = q->txq_size[1];
2680 		t->txq_size[2] = q->txq_size[2];
2681 		t->fl_size[0]  = q->fl_size;
2682 		t->fl_size[1]  = q->jumbo_size;
2683 		t->polling     = q->polling;
2684 		t->lro         = q->lro;
2685 		t->intr_lat    = q->coalesce_usecs;
2686 		t->cong_thres  = q->cong_thres;
2687 		t->qnum        = i;
2688 
2689 		if ((sc->flags & FULL_INIT_DONE) == 0)
2690 			t->vector = 0;
2691 		else if (sc->flags & USING_MSIX)
2692 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2693 		else
2694 			t->vector = rman_get_start(sc->irq_res);
2695 
2696 		break;
2697 	}
2698 	case CHELSIO_GET_QSET_NUM: {
2699 		struct ch_reg *edata = (struct ch_reg *)data;
2700 		edata->val = pi->nqsets;
2701 		break;
2702 	}
2703 	case CHELSIO_LOAD_FW: {
2704 		uint8_t *fw_data;
2705 		uint32_t vers;
2706 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2707 
2708 		/*
2709 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2710 		 *
2711 		 * FW_UPTODATE is also set so the rest of the initialization
2712 		 * will not overwrite what was loaded here.  This gives you the
2713 		 * flexibility to load any firmware (and maybe shoot yourself in
2714 		 * the foot).
2715 		 */
2716 
2717 		ADAPTER_LOCK(sc);
2718 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2719 			ADAPTER_UNLOCK(sc);
2720 			return (EBUSY);
2721 		}
2722 
2723 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2724 		if (!fw_data)
2725 			error = ENOMEM;
2726 		else
2727 			error = copyin(t->buf, fw_data, t->len);
2728 
2729 		if (!error)
2730 			error = -t3_load_fw(sc, fw_data, t->len);
2731 
2732 		if (t3_get_fw_version(sc, &vers) == 0) {
2733 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2734 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2735 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2736 		}
2737 
2738 		if (!error)
2739 			sc->flags |= FW_UPTODATE;
2740 
2741 		free(fw_data, M_DEVBUF);
2742 		ADAPTER_UNLOCK(sc);
2743 		break;
2744 	}
2745 	case CHELSIO_LOAD_BOOT: {
2746 		uint8_t *boot_data;
2747 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2748 
2749 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2750 		if (!boot_data)
2751 			return ENOMEM;
2752 
2753 		error = copyin(t->buf, boot_data, t->len);
2754 		if (!error)
2755 			error = -t3_load_boot(sc, boot_data, t->len);
2756 
2757 		free(boot_data, M_DEVBUF);
2758 		break;
2759 	}
2760 	case CHELSIO_GET_PM: {
2761 		struct ch_pm *m = (struct ch_pm *)data;
2762 		struct tp_params *p = &sc->params.tp;
2763 
2764 		if (!is_offload(sc))
2765 			return (EOPNOTSUPP);
2766 
2767 		m->tx_pg_sz = p->tx_pg_size;
2768 		m->tx_num_pg = p->tx_num_pgs;
2769 		m->rx_pg_sz  = p->rx_pg_size;
2770 		m->rx_num_pg = p->rx_num_pgs;
2771 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2772 
2773 		break;
2774 	}
2775 	case CHELSIO_SET_PM: {
2776 		struct ch_pm *m = (struct ch_pm *)data;
2777 		struct tp_params *p = &sc->params.tp;
2778 
2779 		if (!is_offload(sc))
2780 			return (EOPNOTSUPP);
2781 		if (sc->flags & FULL_INIT_DONE)
2782 			return (EBUSY);
2783 
2784 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2785 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2786 			return (EINVAL);	/* not power of 2 */
2787 		if (!(m->rx_pg_sz & 0x14000))
2788 			return (EINVAL);	/* not 16KB or 64KB */
2789 		if (!(m->tx_pg_sz & 0x1554000))
2790 			return (EINVAL);
2791 		if (m->tx_num_pg == -1)
2792 			m->tx_num_pg = p->tx_num_pgs;
2793 		if (m->rx_num_pg == -1)
2794 			m->rx_num_pg = p->rx_num_pgs;
2795 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2796 			return (EINVAL);
2797 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2798 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2799 			return (EINVAL);
2800 
2801 		p->rx_pg_size = m->rx_pg_sz;
2802 		p->tx_pg_size = m->tx_pg_sz;
2803 		p->rx_num_pgs = m->rx_num_pg;
2804 		p->tx_num_pgs = m->tx_num_pg;
2805 		break;
2806 	}
2807 	case CHELSIO_SETMTUTAB: {
2808 		struct ch_mtus *m = (struct ch_mtus *)data;
2809 		int i;
2810 
2811 		if (!is_offload(sc))
2812 			return (EOPNOTSUPP);
2813 		if (offload_running(sc))
2814 			return (EBUSY);
2815 		if (m->nmtus != NMTUS)
2816 			return (EINVAL);
2817 		if (m->mtus[0] < 81)         /* accommodate SACK */
2818 			return (EINVAL);
2819 
2820 		/*
2821 		 * MTUs must be in ascending order
2822 		 */
2823 		for (i = 1; i < NMTUS; ++i)
2824 			if (m->mtus[i] < m->mtus[i - 1])
2825 				return (EINVAL);
2826 
2827 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2828 		break;
2829 	}
2830 	case CHELSIO_GETMTUTAB: {
2831 		struct ch_mtus *m = (struct ch_mtus *)data;
2832 
2833 		if (!is_offload(sc))
2834 			return (EOPNOTSUPP);
2835 
2836 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2837 		m->nmtus = NMTUS;
2838 		break;
2839 	}
2840 	case CHELSIO_GET_MEM: {
2841 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2842 		struct mc7 *mem;
2843 		uint8_t *useraddr;
2844 		u64 buf[32];
2845 
2846 		/*
2847 		 * Use these to avoid modifying len/addr in the return
2848 		 * struct
2849 		 */
2850 		uint32_t len = t->len, addr = t->addr;
2851 
2852 		if (!is_offload(sc))
2853 			return (EOPNOTSUPP);
2854 		if (!(sc->flags & FULL_INIT_DONE))
2855 			return (EIO);         /* need the memory controllers */
2856 		if ((addr & 0x7) || (len & 0x7))
2857 			return (EINVAL);
2858 		if (t->mem_id == MEM_CM)
2859 			mem = &sc->cm;
2860 		else if (t->mem_id == MEM_PMRX)
2861 			mem = &sc->pmrx;
2862 		else if (t->mem_id == MEM_PMTX)
2863 			mem = &sc->pmtx;
2864 		else
2865 			return (EINVAL);
2866 
2867 		/*
2868 		 * Version scheme:
2869 		 * bits 0..9: chip version
2870 		 * bits 10..15: chip revision
2871 		 */
2872 		t->version = 3 | (sc->params.rev << 10);
2873 
2874 		/*
2875 		 * Read 256 bytes at a time as len can be large and we don't
2876 		 * want to use huge intermediate buffers.
2877 		 */
2878 		useraddr = (uint8_t *)t->buf;
2879 		while (len) {
2880 			unsigned int chunk = min(len, sizeof(buf));
2881 
2882 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2883 			if (error)
2884 				return (-error);
2885 			if (copyout(buf, useraddr, chunk))
2886 				return (EFAULT);
2887 			useraddr += chunk;
2888 			addr += chunk;
2889 			len -= chunk;
2890 		}
2891 		break;
2892 	}
2893 	case CHELSIO_READ_TCAM_WORD: {
2894 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2895 
2896 		if (!is_offload(sc))
2897 			return (EOPNOTSUPP);
2898 		if (!(sc->flags & FULL_INIT_DONE))
2899 			return (EIO);         /* need MC5 */
2900 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2901 		break;
2902 	}
2903 	case CHELSIO_SET_TRACE_FILTER: {
2904 		struct ch_trace *t = (struct ch_trace *)data;
2905 		const struct trace_params *tp;
2906 
2907 		tp = (const struct trace_params *)&t->sip;
2908 		if (t->config_tx)
2909 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2910 					       t->trace_tx);
2911 		if (t->config_rx)
2912 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2913 					       t->trace_rx);
2914 		break;
2915 	}
2916 	case CHELSIO_SET_PKTSCHED: {
2917 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2918 		if (sc->open_device_map == 0)
2919 			return (EAGAIN);
2920 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2921 		    p->binding);
2922 		break;
2923 	}
2924 	case CHELSIO_IFCONF_GETREGS: {
2925 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2926 		int reglen = cxgb_get_regs_len();
2927 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2928 		if (buf == NULL) {
2929 			return (ENOMEM);
2930 		}
2931 		if (regs->len > reglen)
2932 			regs->len = reglen;
2933 		else if (regs->len < reglen)
2934 			error = ENOBUFS;
2935 
2936 		if (!error) {
2937 			cxgb_get_regs(sc, regs, buf);
2938 			error = copyout(buf, regs->data, reglen);
2939 		}
2940 		free(buf, M_DEVBUF);
2941 
2942 		break;
2943 	}
2944 	case CHELSIO_SET_HW_SCHED: {
2945 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2946 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2947 
2948 		if ((sc->flags & FULL_INIT_DONE) == 0)
2949 			return (EAGAIN);       /* need TP to be initialized */
2950 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2951 		    !in_range(t->channel, 0, 1) ||
2952 		    !in_range(t->kbps, 0, 10000000) ||
2953 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2954 		    !in_range(t->flow_ipg, 0,
2955 			      dack_ticks_to_usec(sc, 0x7ff)))
2956 			return (EINVAL);
2957 
2958 		if (t->kbps >= 0) {
2959 			error = t3_config_sched(sc, t->kbps, t->sched);
2960 			if (error < 0)
2961 				return (-error);
2962 		}
2963 		if (t->class_ipg >= 0)
2964 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2965 		if (t->flow_ipg >= 0) {
2966 			t->flow_ipg *= 1000;     /* us -> ns */
2967 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2968 		}
2969 		if (t->mode >= 0) {
2970 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2971 
2972 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2973 					 bit, t->mode ? bit : 0);
2974 		}
2975 		if (t->channel >= 0)
2976 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2977 					 1 << t->sched, t->channel << t->sched);
2978 		break;
2979 	}
2980 	case CHELSIO_GET_EEPROM: {
2981 		int i;
2982 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2983 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2984 
2985 		if (buf == NULL) {
2986 			return (ENOMEM);
2987 		}
2988 		e->magic = EEPROM_MAGIC;
2989 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2990 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2991 
2992 		if (!error)
2993 			error = copyout(buf + e->offset, e->data, e->len);
2994 
2995 		free(buf, M_DEVBUF);
2996 		break;
2997 	}
2998 	case CHELSIO_CLEAR_STATS: {
2999 		if (!(sc->flags & FULL_INIT_DONE))
3000 			return EAGAIN;
3001 
3002 		PORT_LOCK(pi);
3003 		t3_mac_update_stats(&pi->mac);
3004 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3005 		PORT_UNLOCK(pi);
3006 		break;
3007 	}
3008 	case CHELSIO_GET_UP_LA: {
3009 		struct ch_up_la *la = (struct ch_up_la *)data;
3010 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3011 		if (buf == NULL) {
3012 			return (ENOMEM);
3013 		}
3014 		if (la->bufsize < LA_BUFSIZE)
3015 			error = ENOBUFS;
3016 
3017 		if (!error)
3018 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3019 					      &la->bufsize, buf);
3020 		if (!error)
3021 			error = copyout(buf, la->data, la->bufsize);
3022 
3023 		free(buf, M_DEVBUF);
3024 		break;
3025 	}
3026 	case CHELSIO_GET_UP_IOQS: {
3027 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3028 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3029 		uint32_t *v;
3030 
3031 		if (buf == NULL) {
3032 			return (ENOMEM);
3033 		}
3034 		if (ioqs->bufsize < IOQS_BUFSIZE)
3035 			error = ENOBUFS;
3036 
3037 		if (!error)
3038 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3039 
3040 		if (!error) {
3041 			v = (uint32_t *)buf;
3042 
3043 			ioqs->ioq_rx_enable = *v++;
3044 			ioqs->ioq_tx_enable = *v++;
3045 			ioqs->ioq_rx_status = *v++;
3046 			ioqs->ioq_tx_status = *v++;
3047 
3048 			error = copyout(v, ioqs->data, ioqs->bufsize);
3049 		}
3050 
3051 		free(buf, M_DEVBUF);
3052 		break;
3053 	}
3054 	case CHELSIO_SET_FILTER: {
3055 		struct ch_filter *f = (struct ch_filter *)data;;
3056 		struct filter_info *p;
3057 		unsigned int nfilters = sc->params.mc5.nfilters;
3058 
3059 		if (!is_offload(sc))
3060 			return (EOPNOTSUPP);	/* No TCAM */
3061 		if (!(sc->flags & FULL_INIT_DONE))
3062 			return (EAGAIN);	/* mc5 not setup yet */
3063 		if (nfilters == 0)
3064 			return (EBUSY);		/* TOE will use TCAM */
3065 
3066 		/* sanity checks */
3067 		if (f->filter_id >= nfilters ||
3068 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3069 		    (f->val.sport && f->mask.sport != 0xffff) ||
3070 		    (f->val.dport && f->mask.dport != 0xffff) ||
3071 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3072 		    (f->val.vlan_prio &&
3073 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3074 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3075 		    f->qset >= SGE_QSETS ||
3076 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3077 			return (EINVAL);
3078 
3079 		/* Was allocated with M_WAITOK */
3080 		KASSERT(sc->filters, ("filter table NULL\n"));
3081 
3082 		p = &sc->filters[f->filter_id];
3083 		if (p->locked)
3084 			return (EPERM);
3085 
3086 		bzero(p, sizeof(*p));
3087 		p->sip = f->val.sip;
3088 		p->sip_mask = f->mask.sip;
3089 		p->dip = f->val.dip;
3090 		p->sport = f->val.sport;
3091 		p->dport = f->val.dport;
3092 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3093 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3094 		    FILTER_NO_VLAN_PRI;
3095 		p->mac_hit = f->mac_hit;
3096 		p->mac_vld = f->mac_addr_idx != 0xffff;
3097 		p->mac_idx = f->mac_addr_idx;
3098 		p->pkt_type = f->proto;
3099 		p->report_filter_id = f->want_filter_id;
3100 		p->pass = f->pass;
3101 		p->rss = f->rss;
3102 		p->qset = f->qset;
3103 
3104 		error = set_filter(sc, f->filter_id, p);
3105 		if (error == 0)
3106 			p->valid = 1;
3107 		break;
3108 	}
3109 	case CHELSIO_DEL_FILTER: {
3110 		struct ch_filter *f = (struct ch_filter *)data;
3111 		struct filter_info *p;
3112 		unsigned int nfilters = sc->params.mc5.nfilters;
3113 
3114 		if (!is_offload(sc))
3115 			return (EOPNOTSUPP);
3116 		if (!(sc->flags & FULL_INIT_DONE))
3117 			return (EAGAIN);
3118 		if (nfilters == 0 || sc->filters == NULL)
3119 			return (EINVAL);
3120 		if (f->filter_id >= nfilters)
3121 		       return (EINVAL);
3122 
3123 		p = &sc->filters[f->filter_id];
3124 		if (p->locked)
3125 			return (EPERM);
3126 		if (!p->valid)
3127 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3128 
3129 		bzero(p, sizeof(*p));
3130 		p->sip = p->sip_mask = 0xffffffff;
3131 		p->vlan = 0xfff;
3132 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3133 		p->pkt_type = 1;
3134 		error = set_filter(sc, f->filter_id, p);
3135 		break;
3136 	}
3137 	case CHELSIO_GET_FILTER: {
3138 		struct ch_filter *f = (struct ch_filter *)data;
3139 		struct filter_info *p;
3140 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3141 
3142 		if (!is_offload(sc))
3143 			return (EOPNOTSUPP);
3144 		if (!(sc->flags & FULL_INIT_DONE))
3145 			return (EAGAIN);
3146 		if (nfilters == 0 || sc->filters == NULL)
3147 			return (EINVAL);
3148 
3149 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3150 		for (; i < nfilters; i++) {
3151 			p = &sc->filters[i];
3152 			if (!p->valid)
3153 				continue;
3154 
3155 			bzero(f, sizeof(*f));
3156 
3157 			f->filter_id = i;
3158 			f->val.sip = p->sip;
3159 			f->mask.sip = p->sip_mask;
3160 			f->val.dip = p->dip;
3161 			f->mask.dip = p->dip ? 0xffffffff : 0;
3162 			f->val.sport = p->sport;
3163 			f->mask.sport = p->sport ? 0xffff : 0;
3164 			f->val.dport = p->dport;
3165 			f->mask.dport = p->dport ? 0xffff : 0;
3166 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3167 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3168 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3169 			    0 : p->vlan_prio;
3170 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3171 			    0 : FILTER_NO_VLAN_PRI;
3172 			f->mac_hit = p->mac_hit;
3173 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3174 			f->proto = p->pkt_type;
3175 			f->want_filter_id = p->report_filter_id;
3176 			f->pass = p->pass;
3177 			f->rss = p->rss;
3178 			f->qset = p->qset;
3179 
3180 			break;
3181 		}
3182 
3183 		if (i == nfilters)
3184 			f->filter_id = 0xffffffff;
3185 		break;
3186 	}
3187 	default:
3188 		return (EOPNOTSUPP);
3189 		break;
3190 	}
3191 
3192 	return (error);
3193 }
3194 
3195 static __inline void
3196 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3197     unsigned int end)
3198 {
3199 	uint32_t *p = (uint32_t *)(buf + start);
3200 
3201 	for ( ; start <= end; start += sizeof(uint32_t))
3202 		*p++ = t3_read_reg(ap, start);
3203 }
3204 
3205 #define T3_REGMAP_SIZE (3 * 1024)
3206 static int
3207 cxgb_get_regs_len(void)
3208 {
3209 	return T3_REGMAP_SIZE;
3210 }
3211 
3212 static void
3213 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3214 {
3215 
3216 	/*
3217 	 * Version scheme:
3218 	 * bits 0..9: chip version
3219 	 * bits 10..15: chip revision
3220 	 * bit 31: set for PCIe cards
3221 	 */
3222 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3223 
3224 	/*
3225 	 * We skip the MAC statistics registers because they are clear-on-read.
3226 	 * Also reading multi-register stats would need to synchronize with the
3227 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3228 	 */
3229 	memset(buf, 0, cxgb_get_regs_len());
3230 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3231 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3232 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3233 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3234 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3235 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3236 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3237 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3238 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3239 }
3240 
3241 static int
3242 alloc_filters(struct adapter *sc)
3243 {
3244 	struct filter_info *p;
3245 	unsigned int nfilters = sc->params.mc5.nfilters;
3246 
3247 	if (nfilters == 0)
3248 		return (0);
3249 
3250 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3251 	sc->filters = p;
3252 
3253 	p = &sc->filters[nfilters - 1];
3254 	p->vlan = 0xfff;
3255 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3256 	p->pass = p->rss = p->valid = p->locked = 1;
3257 
3258 	return (0);
3259 }
3260 
3261 static int
3262 setup_hw_filters(struct adapter *sc)
3263 {
3264 	int i, rc;
3265 	unsigned int nfilters = sc->params.mc5.nfilters;
3266 
3267 	if (!sc->filters)
3268 		return (0);
3269 
3270 	t3_enable_filters(sc);
3271 
3272 	for (i = rc = 0; i < nfilters && !rc; i++) {
3273 		if (sc->filters[i].locked)
3274 			rc = set_filter(sc, i, &sc->filters[i]);
3275 	}
3276 
3277 	return (rc);
3278 }
3279 
3280 static int
3281 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3282 {
3283 	int len;
3284 	struct mbuf *m;
3285 	struct ulp_txpkt *txpkt;
3286 	struct work_request_hdr *wr;
3287 	struct cpl_pass_open_req *oreq;
3288 	struct cpl_set_tcb_field *sreq;
3289 
3290 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3291 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3292 
3293 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3294 	      sc->params.mc5.nfilters;
3295 
3296 	m = m_gethdr(M_WAITOK, MT_DATA);
3297 	m->m_len = m->m_pkthdr.len = len;
3298 	bzero(mtod(m, char *), len);
3299 
3300 	wr = mtod(m, struct work_request_hdr *);
3301 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3302 
3303 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3304 	txpkt = (struct ulp_txpkt *)oreq;
3305 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3306 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3307 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3308 	oreq->local_port = htons(f->dport);
3309 	oreq->peer_port = htons(f->sport);
3310 	oreq->local_ip = htonl(f->dip);
3311 	oreq->peer_ip = htonl(f->sip);
3312 	oreq->peer_netmask = htonl(f->sip_mask);
3313 	oreq->opt0h = 0;
3314 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3315 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3316 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3317 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3318 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3319 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3320 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3321 
3322 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3323 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3324 			  (f->report_filter_id << 15) | (1 << 23) |
3325 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3326 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3327 	t3_mgmt_tx(sc, m);
3328 
3329 	if (f->pass && !f->rss) {
3330 		len = sizeof(*sreq);
3331 		m = m_gethdr(M_WAITOK, MT_DATA);
3332 		m->m_len = m->m_pkthdr.len = len;
3333 		bzero(mtod(m, char *), len);
3334 		sreq = mtod(m, struct cpl_set_tcb_field *);
3335 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3336 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3337 				 (u64)sc->rrss_map[f->qset] << 19);
3338 		t3_mgmt_tx(sc, m);
3339 	}
3340 	return 0;
3341 }
3342 
3343 static inline void
3344 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3345     unsigned int word, u64 mask, u64 val)
3346 {
3347 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3348 	req->reply = V_NO_REPLY(1);
3349 	req->cpu_idx = 0;
3350 	req->word = htons(word);
3351 	req->mask = htobe64(mask);
3352 	req->val = htobe64(val);
3353 }
3354 
3355 static inline void
3356 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3357     unsigned int word, u64 mask, u64 val)
3358 {
3359 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3360 
3361 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3362 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3363 	mk_set_tcb_field(req, tid, word, mask, val);
3364 }
3365