xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 1670a1c2a47d10ecccd001970b859caf93cd3b6e)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	/* bus interface */
133 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
134 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
135 
136 	{ 0, 0 }
137 };
138 
139 static driver_t cxgb_controller_driver = {
140 	"cxgbc",
141 	cxgb_controller_methods,
142 	sizeof(struct adapter)
143 };
144 
145 static devclass_t	cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147 
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155 
156 static device_method_t cxgb_port_methods[] = {
157 	DEVMETHOD(device_probe,		cxgb_port_probe),
158 	DEVMETHOD(device_attach,	cxgb_port_attach),
159 	DEVMETHOD(device_detach,	cxgb_port_detach),
160 	{ 0, 0 }
161 };
162 
163 static driver_t cxgb_port_driver = {
164 	"cxgb",
165 	cxgb_port_methods,
166 	0
167 };
168 
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172 
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181 
182 static devclass_t	cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184 
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195 
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200 
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209 
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218 
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  *
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228 
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233 
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241 
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246 
247 enum {
248 	MAX_TXQ_ENTRIES      = 16384,
249 	MAX_CTRL_TXQ_ENTRIES = 1024,
250 	MAX_RSPQ_ENTRIES     = 16384,
251 	MAX_RX_BUFFERS       = 16384,
252 	MAX_RX_JUMBO_BUFFERS = 16384,
253 	MIN_TXQ_ENTRIES      = 4,
254 	MIN_CTRL_TXQ_ENTRIES = 4,
255 	MIN_RSPQ_ENTRIES     = 32,
256 	MIN_FL_ENTRIES       = 32,
257 	MIN_FL_JUMBO_ENTRIES = 32
258 };
259 
260 struct filter_info {
261 	u32 sip;
262 	u32 sip_mask;
263 	u32 dip;
264 	u16 sport;
265 	u16 dport;
266 	u32 vlan:12;
267 	u32 vlan_prio:3;
268 	u32 mac_hit:1;
269 	u32 mac_idx:4;
270 	u32 mac_vld:1;
271 	u32 pkt_type:2;
272 	u32 report_filter_id:1;
273 	u32 pass:1;
274 	u32 rss:1;
275 	u32 qset:3;
276 	u32 locked:1;
277 	u32 valid:1;
278 };
279 
280 enum { FILTER_NO_VLAN_PRI = 7 };
281 
282 #define EEPROM_MAGIC 0x38E2F10C
283 
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285 
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288 	uint16_t	vendor;
289 	uint16_t	device;
290 	int		index;
291 	char		*desc;
292 } cxgb_identifiers[] = {
293 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307 	{0, 0, 0, NULL}
308 };
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	/* find the PCIe link width and set max read request to 4KB*/
461 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
462 		uint16_t lnk;
463 
464 		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465 		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466 		if (sc->link_width < 8 &&
467 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
468 			device_printf(sc->dev,
469 			    "PCIe x%d Link, expect reduced performance\n",
470 			    sc->link_width);
471 		}
472 
473 		pci_set_max_read_req(dev, 4096);
474 	}
475 
476 	touch_bars(dev);
477 	pci_enable_busmaster(dev);
478 	/*
479 	 * Allocate the registers and make them available to the driver.
480 	 * The registers that we care about for NIC mode are in BAR 0
481 	 */
482 	sc->regs_rid = PCIR_BAR(0);
483 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
485 		device_printf(dev, "Cannot allocate BAR region 0\n");
486 		return (ENXIO);
487 	}
488 	sc->udbs_rid = PCIR_BAR(2);
489 	sc->udbs_res = NULL;
490 	if (is_offload(sc) &&
491 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493 		device_printf(dev, "Cannot allocate BAR region 1\n");
494 		error = ENXIO;
495 		goto out;
496 	}
497 
498 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499 	    device_get_unit(dev));
500 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501 
502 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503 	    device_get_unit(dev));
504 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505 	    device_get_unit(dev));
506 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507 	    device_get_unit(dev));
508 
509 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512 
513 	sc->bt = rman_get_bustag(sc->regs_res);
514 	sc->bh = rman_get_bushandle(sc->regs_res);
515 	sc->mmio_len = rman_get_size(sc->regs_res);
516 
517 	for (i = 0; i < MAX_NPORTS; i++)
518 		sc->port[i].adapter = sc;
519 
520 	if (t3_prep_adapter(sc, ai, 1) < 0) {
521 		printf("prep adapter failed\n");
522 		error = ENODEV;
523 		goto out;
524 	}
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526 	 * enough messages for the queue sets.  If that fails, try falling
527 	 * back to MSI.  If that fails, then try falling back to the legacy
528 	 * interrupt pin model.
529 	 */
530 	sc->msix_regs_rid = 0x20;
531 	if ((msi_allowed >= 2) &&
532 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534 
535 		if (multiq)
536 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538 
539 		if (pci_msix_count(dev) == 0 ||
540 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541 		    sc->msi_count != msi_needed) {
542 			device_printf(dev, "alloc msix failed - "
543 				      "msi_count=%d, msi_needed=%d, err=%d; "
544 				      "will try MSI\n", sc->msi_count,
545 				      msi_needed, error);
546 			sc->msi_count = 0;
547 			port_qsets = 1;
548 			pci_release_msi(dev);
549 			bus_release_resource(dev, SYS_RES_MEMORY,
550 			    sc->msix_regs_rid, sc->msix_regs_res);
551 			sc->msix_regs_res = NULL;
552 		} else {
553 			sc->flags |= USING_MSIX;
554 			sc->cxgb_intr = cxgb_async_intr;
555 			device_printf(dev,
556 				      "using MSI-X interrupts (%u vectors)\n",
557 				      sc->msi_count);
558 		}
559 	}
560 
561 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562 		sc->msi_count = 1;
563 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564 			device_printf(dev, "alloc msi failed - "
565 				      "err=%d; will try INTx\n", error);
566 			sc->msi_count = 0;
567 			port_qsets = 1;
568 			pci_release_msi(dev);
569 		} else {
570 			sc->flags |= USING_MSI;
571 			sc->cxgb_intr = t3_intr_msi;
572 			device_printf(dev, "using MSI interrupts\n");
573 		}
574 	}
575 	if (sc->msi_count == 0) {
576 		device_printf(dev, "using line interrupts\n");
577 		sc->cxgb_intr = t3b_intr;
578 	}
579 
580 	/* Create a private taskqueue thread for handling driver events */
581 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582 	    taskqueue_thread_enqueue, &sc->tq);
583 	if (sc->tq == NULL) {
584 		device_printf(dev, "failed to allocate controller task queue\n");
585 		goto out;
586 	}
587 
588 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589 	    device_get_nameunit(dev));
590 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591 
592 
593 	/* Create a periodic callout for checking adapter status */
594 	callout_init(&sc->cxgb_tick_ch, TRUE);
595 
596 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597 		/*
598 		 * Warn user that a firmware update will be attempted in init.
599 		 */
600 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602 		sc->flags &= ~FW_UPTODATE;
603 	} else {
604 		sc->flags |= FW_UPTODATE;
605 	}
606 
607 	if (t3_check_tpsram_version(sc) < 0) {
608 		/*
609 		 * Warn user that a firmware update will be attempted in init.
610 		 */
611 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613 		sc->flags &= ~TPS_UPTODATE;
614 	} else {
615 		sc->flags |= TPS_UPTODATE;
616 	}
617 
618 	/*
619 	 * Create a child device for each MAC.  The ethernet attachment
620 	 * will be done in these children.
621 	 */
622 	for (i = 0; i < (sc)->params.nports; i++) {
623 		struct port_info *pi;
624 
625 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626 			device_printf(dev, "failed to add child port\n");
627 			error = EINVAL;
628 			goto out;
629 		}
630 		pi = &sc->port[i];
631 		pi->adapter = sc;
632 		pi->nqsets = port_qsets;
633 		pi->first_qset = i*port_qsets;
634 		pi->port_id = i;
635 		pi->tx_chan = i >= ai->nports0;
636 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637 		sc->rxpkt_map[pi->txpkt_intf] = i;
638 		sc->port[i].tx_chan = i >= ai->nports0;
639 		sc->portdev[i] = child;
640 		device_set_softc(child, pi);
641 	}
642 	if ((error = bus_generic_attach(dev)) != 0)
643 		goto out;
644 
645 	/* initialize sge private state */
646 	t3_sge_init_adapter(sc);
647 
648 	t3_led_ready(sc);
649 
650 	cxgb_offload_init();
651 	if (is_offload(sc)) {
652 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653 		cxgb_adapter_ofld(sc);
654         }
655 	error = t3_get_fw_version(sc, &vers);
656 	if (error)
657 		goto out;
658 
659 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661 	    G_FW_VERSION_MICRO(vers));
662 
663 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664 		 ai->desc, is_offload(sc) ? "R" : "",
665 		 sc->params.vpd.ec, sc->params.vpd.sn);
666 	device_set_desc_copy(dev, buf);
667 
668 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671 
672 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674 	t3_add_attach_sysctls(sc);
675 out:
676 	if (error)
677 		cxgb_free(sc);
678 
679 	return (error);
680 }
681 
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686 
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690 	struct adapter *sc;
691 
692 	sc = device_get_softc(dev);
693 
694 	cxgb_free(sc);
695 
696 	return (0);
697 }
698 
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  *
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720 	int i;
721 
722 	ADAPTER_LOCK(sc);
723 	sc->flags |= CXGB_SHUTDOWN;
724 	ADAPTER_UNLOCK(sc);
725 
726 	/*
727 	 * Make sure all child devices are gone.
728 	 */
729 	bus_generic_detach(sc->dev);
730 	for (i = 0; i < (sc)->params.nports; i++) {
731 		if (sc->portdev[i] &&
732 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
733 			device_printf(sc->dev, "failed to delete child port\n");
734 	}
735 
736 	/*
737 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
738 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
739 	 * all open devices have been closed.
740 	 */
741 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
742 					   __func__, sc->open_device_map));
743 	for (i = 0; i < sc->params.nports; i++) {
744 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
745 						  __func__, i));
746 	}
747 
748 	/*
749 	 * Finish off the adapter's callouts.
750 	 */
751 	callout_drain(&sc->cxgb_tick_ch);
752 	callout_drain(&sc->sge_timer_ch);
753 
754 	/*
755 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
756 	 * sysctls are cleaned up by the kernel linker.
757 	 */
758 	if (sc->flags & FULL_INIT_DONE) {
759  		t3_free_sge_resources(sc);
760  		sc->flags &= ~FULL_INIT_DONE;
761  	}
762 
763 	/*
764 	 * Release all interrupt resources.
765 	 */
766 	cxgb_teardown_interrupts(sc);
767 	if (sc->flags & (USING_MSI | USING_MSIX)) {
768 		device_printf(sc->dev, "releasing msi message(s)\n");
769 		pci_release_msi(sc->dev);
770 	} else {
771 		device_printf(sc->dev, "no msi message to release\n");
772 	}
773 
774 	if (sc->msix_regs_res != NULL) {
775 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
776 		    sc->msix_regs_res);
777 	}
778 
779 	/*
780 	 * Free the adapter's taskqueue.
781 	 */
782 	if (sc->tq != NULL) {
783 		taskqueue_free(sc->tq);
784 		sc->tq = NULL;
785 	}
786 
787 	if (is_offload(sc)) {
788 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
789 		cxgb_adapter_unofld(sc);
790 	}
791 
792 #ifdef notyet
793 	if (sc->flags & CXGB_OFLD_INIT)
794 		cxgb_offload_deactivate(sc);
795 #endif
796 	free(sc->filters, M_DEVBUF);
797 	t3_sge_free(sc);
798 
799 	cxgb_offload_exit();
800 
801 	if (sc->udbs_res != NULL)
802 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803 		    sc->udbs_res);
804 
805 	if (sc->regs_res != NULL)
806 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807 		    sc->regs_res);
808 
809 	MTX_DESTROY(&sc->mdio_lock);
810 	MTX_DESTROY(&sc->sge.reg_lock);
811 	MTX_DESTROY(&sc->elmer_lock);
812 	ADAPTER_LOCK_DEINIT(sc);
813 }
814 
815 /**
816  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
817  *	@sc: the controller softc
818  *
819  *	Determines how many sets of SGE queues to use and initializes them.
820  *	We support multiple queue sets per port if we have MSI-X, otherwise
821  *	just one queue set per port.
822  */
823 static int
824 setup_sge_qsets(adapter_t *sc)
825 {
826 	int i, j, err, irq_idx = 0, qset_idx = 0;
827 	u_int ntxq = SGE_TXQ_PER_SET;
828 
829 	if ((err = t3_sge_alloc(sc)) != 0) {
830 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831 		return (err);
832 	}
833 
834 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835 		irq_idx = -1;
836 
837 	for (i = 0; i < (sc)->params.nports; i++) {
838 		struct port_info *pi = &sc->port[i];
839 
840 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
844 			if (err) {
845 				t3_free_sge_resources(sc);
846 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
847 				    err);
848 				return (err);
849 			}
850 		}
851 	}
852 
853 	return (0);
854 }
855 
856 static void
857 cxgb_teardown_interrupts(adapter_t *sc)
858 {
859 	int i;
860 
861 	for (i = 0; i < SGE_QSETS; i++) {
862 		if (sc->msix_intr_tag[i] == NULL) {
863 
864 			/* Should have been setup fully or not at all */
865 			KASSERT(sc->msix_irq_res[i] == NULL &&
866 				sc->msix_irq_rid[i] == 0,
867 				("%s: half-done interrupt (%d).", __func__, i));
868 
869 			continue;
870 		}
871 
872 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
873 				  sc->msix_intr_tag[i]);
874 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
875 				     sc->msix_irq_res[i]);
876 
877 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
878 		sc->msix_irq_rid[i] = 0;
879 	}
880 
881 	if (sc->intr_tag) {
882 		KASSERT(sc->irq_res != NULL,
883 			("%s: half-done interrupt.", __func__));
884 
885 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
886 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
887 				     sc->irq_res);
888 
889 		sc->irq_res = sc->intr_tag = NULL;
890 		sc->irq_rid = 0;
891 	}
892 }
893 
894 static int
895 cxgb_setup_interrupts(adapter_t *sc)
896 {
897 	struct resource *res;
898 	void *tag;
899 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
900 
901 	sc->irq_rid = intr_flag ? 1 : 0;
902 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
903 					     RF_SHAREABLE | RF_ACTIVE);
904 	if (sc->irq_res == NULL) {
905 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
906 			      intr_flag, sc->irq_rid);
907 		err = EINVAL;
908 		sc->irq_rid = 0;
909 	} else {
910 		err = bus_setup_intr(sc->dev, sc->irq_res,
911 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
912 		    sc->cxgb_intr, sc, &sc->intr_tag);
913 
914 		if (err) {
915 			device_printf(sc->dev,
916 				      "Cannot set up interrupt (%x, %u, %d)\n",
917 				      intr_flag, sc->irq_rid, err);
918 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
919 					     sc->irq_res);
920 			sc->irq_res = sc->intr_tag = NULL;
921 			sc->irq_rid = 0;
922 		}
923 	}
924 
925 	/* That's all for INTx or MSI */
926 	if (!(intr_flag & USING_MSIX) || err)
927 		return (err);
928 
929 	for (i = 0; i < sc->msi_count - 1; i++) {
930 		rid = i + 2;
931 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932 					     RF_SHAREABLE | RF_ACTIVE);
933 		if (res == NULL) {
934 			device_printf(sc->dev, "Cannot allocate interrupt "
935 				      "for message %d\n", rid);
936 			err = EINVAL;
937 			break;
938 		}
939 
940 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942 		if (err) {
943 			device_printf(sc->dev, "Cannot set up interrupt "
944 				      "for message %d (%d)\n", rid, err);
945 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946 			break;
947 		}
948 
949 		sc->msix_irq_rid[i] = rid;
950 		sc->msix_irq_res[i] = res;
951 		sc->msix_intr_tag[i] = tag;
952 	}
953 
954 	if (err)
955 		cxgb_teardown_interrupts(sc);
956 
957 	return (err);
958 }
959 
960 
961 static int
962 cxgb_port_probe(device_t dev)
963 {
964 	struct port_info *p;
965 	char buf[80];
966 	const char *desc;
967 
968 	p = device_get_softc(dev);
969 	desc = p->phy.desc;
970 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
971 	device_set_desc_copy(dev, buf);
972 	return (0);
973 }
974 
975 
976 static int
977 cxgb_makedev(struct port_info *pi)
978 {
979 
980 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
981 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
982 
983 	if (pi->port_cdev == NULL)
984 		return (ENOMEM);
985 
986 	pi->port_cdev->si_drv1 = (void *)pi;
987 
988 	return (0);
989 }
990 
991 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
992     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
993     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
994 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
995 
996 static int
997 cxgb_port_attach(device_t dev)
998 {
999 	struct port_info *p;
1000 	struct ifnet *ifp;
1001 	int err;
1002 	struct adapter *sc;
1003 
1004 	p = device_get_softc(dev);
1005 	sc = p->adapter;
1006 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1007 	    device_get_unit(device_get_parent(dev)), p->port_id);
1008 	PORT_LOCK_INIT(p, p->lockbuf);
1009 
1010 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1011 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1012 
1013 	/* Allocate an ifnet object and set it up */
1014 	ifp = p->ifp = if_alloc(IFT_ETHER);
1015 	if (ifp == NULL) {
1016 		device_printf(dev, "Cannot allocate ifnet\n");
1017 		return (ENOMEM);
1018 	}
1019 
1020 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1021 	ifp->if_init = cxgb_init;
1022 	ifp->if_softc = p;
1023 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1024 	ifp->if_ioctl = cxgb_ioctl;
1025 	ifp->if_start = cxgb_start;
1026 
1027 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1028 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1029 	IFQ_SET_READY(&ifp->if_snd);
1030 
1031 	ifp->if_capabilities = CXGB_CAP;
1032 	ifp->if_capenable = CXGB_CAP_ENABLE;
1033 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1034 
1035 	/*
1036 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1037 	 */
1038 	if (sc->params.nports > 2) {
1039 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041 		ifp->if_hwassist &= ~CSUM_TSO;
1042 	}
1043 
1044 	ether_ifattach(ifp, p->hw_addr);
1045 	ifp->if_transmit = cxgb_transmit;
1046 	ifp->if_qflush = cxgb_qflush;
1047 
1048 #ifdef DEFAULT_JUMBO
1049 	if (sc->params.nports <= 2)
1050 		ifp->if_mtu = ETHERMTU_JUMBO;
1051 #endif
1052 	if ((err = cxgb_makedev(p)) != 0) {
1053 		printf("makedev failed %d\n", err);
1054 		return (err);
1055 	}
1056 
1057 	/* Create a list of media supported by this port */
1058 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1059 	    cxgb_media_status);
1060 	cxgb_build_medialist(p);
1061 
1062 	t3_sge_init_port(p);
1063 
1064 	return (err);
1065 }
1066 
1067 /*
1068  * cxgb_port_detach() is called via the device_detach methods when
1069  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1070  * removing the device from the view of the kernel, i.e. from all
1071  * interfaces lists etc.  This routine is only called when the driver is
1072  * being unloaded, not when the link goes down.
1073  */
1074 static int
1075 cxgb_port_detach(device_t dev)
1076 {
1077 	struct port_info *p;
1078 	struct adapter *sc;
1079 	int i;
1080 
1081 	p = device_get_softc(dev);
1082 	sc = p->adapter;
1083 
1084 	/* Tell cxgb_ioctl and if_init that the port is going away */
1085 	ADAPTER_LOCK(sc);
1086 	SET_DOOMED(p);
1087 	wakeup(&sc->flags);
1088 	while (IS_BUSY(sc))
1089 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1090 	SET_BUSY(sc);
1091 	ADAPTER_UNLOCK(sc);
1092 
1093 	if (p->port_cdev != NULL)
1094 		destroy_dev(p->port_cdev);
1095 
1096 	cxgb_uninit_synchronized(p);
1097 	ether_ifdetach(p->ifp);
1098 
1099 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1100 		struct sge_qset *qs = &sc->sge.qs[i];
1101 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1102 
1103 		callout_drain(&txq->txq_watchdog);
1104 		callout_drain(&txq->txq_timer);
1105 	}
1106 
1107 	PORT_LOCK_DEINIT(p);
1108 	if_free(p->ifp);
1109 	p->ifp = NULL;
1110 
1111 	ADAPTER_LOCK(sc);
1112 	CLR_BUSY(sc);
1113 	wakeup_one(&sc->flags);
1114 	ADAPTER_UNLOCK(sc);
1115 	return (0);
1116 }
1117 
1118 void
1119 t3_fatal_err(struct adapter *sc)
1120 {
1121 	u_int fw_status[4];
1122 
1123 	if (sc->flags & FULL_INIT_DONE) {
1124 		t3_sge_stop(sc);
1125 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1126 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1127 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1128 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1129 		t3_intr_disable(sc);
1130 	}
1131 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1132 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1133 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1134 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1135 }
1136 
1137 int
1138 t3_os_find_pci_capability(adapter_t *sc, int cap)
1139 {
1140 	device_t dev;
1141 	struct pci_devinfo *dinfo;
1142 	pcicfgregs *cfg;
1143 	uint32_t status;
1144 	uint8_t ptr;
1145 
1146 	dev = sc->dev;
1147 	dinfo = device_get_ivars(dev);
1148 	cfg = &dinfo->cfg;
1149 
1150 	status = pci_read_config(dev, PCIR_STATUS, 2);
1151 	if (!(status & PCIM_STATUS_CAPPRESENT))
1152 		return (0);
1153 
1154 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1155 	case 0:
1156 	case 1:
1157 		ptr = PCIR_CAP_PTR;
1158 		break;
1159 	case 2:
1160 		ptr = PCIR_CAP_PTR_2;
1161 		break;
1162 	default:
1163 		return (0);
1164 		break;
1165 	}
1166 	ptr = pci_read_config(dev, ptr, 1);
1167 
1168 	while (ptr != 0) {
1169 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1170 			return (ptr);
1171 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1172 	}
1173 
1174 	return (0);
1175 }
1176 
1177 int
1178 t3_os_pci_save_state(struct adapter *sc)
1179 {
1180 	device_t dev;
1181 	struct pci_devinfo *dinfo;
1182 
1183 	dev = sc->dev;
1184 	dinfo = device_get_ivars(dev);
1185 
1186 	pci_cfg_save(dev, dinfo, 0);
1187 	return (0);
1188 }
1189 
1190 int
1191 t3_os_pci_restore_state(struct adapter *sc)
1192 {
1193 	device_t dev;
1194 	struct pci_devinfo *dinfo;
1195 
1196 	dev = sc->dev;
1197 	dinfo = device_get_ivars(dev);
1198 
1199 	pci_cfg_restore(dev, dinfo);
1200 	return (0);
1201 }
1202 
1203 /**
1204  *	t3_os_link_changed - handle link status changes
1205  *	@sc: the adapter associated with the link change
1206  *	@port_id: the port index whose link status has changed
1207  *	@link_status: the new status of the link
1208  *	@speed: the new speed setting
1209  *	@duplex: the new duplex setting
1210  *	@fc: the new flow-control setting
1211  *
1212  *	This is the OS-dependent handler for link status changes.  The OS
1213  *	neutral handler takes care of most of the processing for these events,
1214  *	then calls this handler for any OS-specific processing.
1215  */
1216 void
1217 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1218      int duplex, int fc, int mac_was_reset)
1219 {
1220 	struct port_info *pi = &adapter->port[port_id];
1221 	struct ifnet *ifp = pi->ifp;
1222 
1223 	/* no race with detach, so ifp should always be good */
1224 	KASSERT(ifp, ("%s: if detached.", __func__));
1225 
1226 	/* Reapply mac settings if they were lost due to a reset */
1227 	if (mac_was_reset) {
1228 		PORT_LOCK(pi);
1229 		cxgb_update_mac_settings(pi);
1230 		PORT_UNLOCK(pi);
1231 	}
1232 
1233 	if (link_status) {
1234 		ifp->if_baudrate = IF_Mbps(speed);
1235 		if_link_state_change(ifp, LINK_STATE_UP);
1236 	} else
1237 		if_link_state_change(ifp, LINK_STATE_DOWN);
1238 }
1239 
1240 /**
1241  *	t3_os_phymod_changed - handle PHY module changes
1242  *	@phy: the PHY reporting the module change
1243  *	@mod_type: new module type
1244  *
1245  *	This is the OS-dependent handler for PHY module changes.  It is
1246  *	invoked when a PHY module is removed or inserted for any OS-specific
1247  *	processing.
1248  */
1249 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1250 {
1251 	static const char *mod_str[] = {
1252 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1253 	};
1254 	struct port_info *pi = &adap->port[port_id];
1255 	int mod = pi->phy.modtype;
1256 
1257 	if (mod != pi->media.ifm_cur->ifm_data)
1258 		cxgb_build_medialist(pi);
1259 
1260 	if (mod == phy_modtype_none)
1261 		if_printf(pi->ifp, "PHY module unplugged\n");
1262 	else {
1263 		KASSERT(mod < ARRAY_SIZE(mod_str),
1264 			("invalid PHY module type %d", mod));
1265 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1266 	}
1267 }
1268 
1269 void
1270 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1271 {
1272 
1273 	/*
1274 	 * The ifnet might not be allocated before this gets called,
1275 	 * as this is called early on in attach by t3_prep_adapter
1276 	 * save the address off in the port structure
1277 	 */
1278 	if (cxgb_debug)
1279 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1280 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1281 }
1282 
1283 /*
1284  * Programs the XGMAC based on the settings in the ifnet.  These settings
1285  * include MTU, MAC address, mcast addresses, etc.
1286  */
1287 static void
1288 cxgb_update_mac_settings(struct port_info *p)
1289 {
1290 	struct ifnet *ifp = p->ifp;
1291 	struct t3_rx_mode rm;
1292 	struct cmac *mac = &p->mac;
1293 	int mtu, hwtagging;
1294 
1295 	PORT_LOCK_ASSERT_OWNED(p);
1296 
1297 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1298 
1299 	mtu = ifp->if_mtu;
1300 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1301 		mtu += ETHER_VLAN_ENCAP_LEN;
1302 
1303 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1304 
1305 	t3_mac_set_mtu(mac, mtu);
1306 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1307 	t3_mac_set_address(mac, 0, p->hw_addr);
1308 	t3_init_rx_mode(&rm, p);
1309 	t3_mac_set_rx_mode(mac, &rm);
1310 }
1311 
1312 
1313 static int
1314 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1315 			      unsigned long n)
1316 {
1317 	int attempts = 5;
1318 
1319 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1320 		if (!--attempts)
1321 			return (ETIMEDOUT);
1322 		t3_os_sleep(10);
1323 	}
1324 	return 0;
1325 }
1326 
1327 static int
1328 init_tp_parity(struct adapter *adap)
1329 {
1330 	int i;
1331 	struct mbuf *m;
1332 	struct cpl_set_tcb_field *greq;
1333 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1334 
1335 	t3_tp_set_offload_mode(adap, 1);
1336 
1337 	for (i = 0; i < 16; i++) {
1338 		struct cpl_smt_write_req *req;
1339 
1340 		m = m_gethdr(M_WAITOK, MT_DATA);
1341 		req = mtod(m, struct cpl_smt_write_req *);
1342 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1343 		memset(req, 0, sizeof(*req));
1344 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1345 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1346 		req->iff = i;
1347 		t3_mgmt_tx(adap, m);
1348 	}
1349 
1350 	for (i = 0; i < 2048; i++) {
1351 		struct cpl_l2t_write_req *req;
1352 
1353 		m = m_gethdr(M_WAITOK, MT_DATA);
1354 		req = mtod(m, struct cpl_l2t_write_req *);
1355 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1356 		memset(req, 0, sizeof(*req));
1357 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1359 		req->params = htonl(V_L2T_W_IDX(i));
1360 		t3_mgmt_tx(adap, m);
1361 	}
1362 
1363 	for (i = 0; i < 2048; i++) {
1364 		struct cpl_rte_write_req *req;
1365 
1366 		m = m_gethdr(M_WAITOK, MT_DATA);
1367 		req = mtod(m, struct cpl_rte_write_req *);
1368 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1369 		memset(req, 0, sizeof(*req));
1370 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1372 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1373 		t3_mgmt_tx(adap, m);
1374 	}
1375 
1376 	m = m_gethdr(M_WAITOK, MT_DATA);
1377 	greq = mtod(m, struct cpl_set_tcb_field *);
1378 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1379 	memset(greq, 0, sizeof(*greq));
1380 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1381 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1382 	greq->mask = htobe64(1);
1383 	t3_mgmt_tx(adap, m);
1384 
1385 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1386 	t3_tp_set_offload_mode(adap, 0);
1387 	return (i);
1388 }
1389 
1390 /**
1391  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1392  *	@adap: the adapter
1393  *
1394  *	Sets up RSS to distribute packets to multiple receive queues.  We
1395  *	configure the RSS CPU lookup table to distribute to the number of HW
1396  *	receive queues, and the response queue lookup table to narrow that
1397  *	down to the response queues actually configured for each port.
1398  *	We always configure the RSS mapping for two ports since the mapping
1399  *	table has plenty of entries.
1400  */
1401 static void
1402 setup_rss(adapter_t *adap)
1403 {
1404 	int i;
1405 	u_int nq[2];
1406 	uint8_t cpus[SGE_QSETS + 1];
1407 	uint16_t rspq_map[RSS_TABLE_SIZE];
1408 
1409 	for (i = 0; i < SGE_QSETS; ++i)
1410 		cpus[i] = i;
1411 	cpus[SGE_QSETS] = 0xff;
1412 
1413 	nq[0] = nq[1] = 0;
1414 	for_each_port(adap, i) {
1415 		const struct port_info *pi = adap2pinfo(adap, i);
1416 
1417 		nq[pi->tx_chan] += pi->nqsets;
1418 	}
1419 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1420 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1421 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1422 	}
1423 
1424 	/* Calculate the reverse RSS map table */
1425 	for (i = 0; i < SGE_QSETS; ++i)
1426 		adap->rrss_map[i] = 0xff;
1427 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1428 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1429 			adap->rrss_map[rspq_map[i]] = i;
1430 
1431 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1432 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1433 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1434 	              cpus, rspq_map);
1435 
1436 }
1437 
1438 /*
1439  * Sends an mbuf to an offload queue driver
1440  * after dealing with any active network taps.
1441  */
1442 static inline int
1443 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1444 {
1445 	int ret;
1446 
1447 	ret = t3_offload_tx(tdev, m);
1448 	return (ret);
1449 }
1450 
1451 static int
1452 write_smt_entry(struct adapter *adapter, int idx)
1453 {
1454 	struct port_info *pi = &adapter->port[idx];
1455 	struct cpl_smt_write_req *req;
1456 	struct mbuf *m;
1457 
1458 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1459 		return (ENOMEM);
1460 
1461 	req = mtod(m, struct cpl_smt_write_req *);
1462 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1463 
1464 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1465 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1466 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1467 	req->iff = idx;
1468 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1469 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1470 
1471 	m_set_priority(m, 1);
1472 
1473 	offload_tx(&adapter->tdev, m);
1474 
1475 	return (0);
1476 }
1477 
1478 static int
1479 init_smt(struct adapter *adapter)
1480 {
1481 	int i;
1482 
1483 	for_each_port(adapter, i)
1484 		write_smt_entry(adapter, i);
1485 	return 0;
1486 }
1487 
1488 static void
1489 init_port_mtus(adapter_t *adapter)
1490 {
1491 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1492 
1493 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1494 }
1495 
1496 static void
1497 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1498 			      int hi, int port)
1499 {
1500 	struct mbuf *m;
1501 	struct mngt_pktsched_wr *req;
1502 
1503 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1504 	if (m) {
1505 		req = mtod(m, struct mngt_pktsched_wr *);
1506 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1507 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1508 		req->sched = sched;
1509 		req->idx = qidx;
1510 		req->min = lo;
1511 		req->max = hi;
1512 		req->binding = port;
1513 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1514 		t3_mgmt_tx(adap, m);
1515 	}
1516 }
1517 
1518 static void
1519 bind_qsets(adapter_t *sc)
1520 {
1521 	int i, j;
1522 
1523 	for (i = 0; i < (sc)->params.nports; ++i) {
1524 		const struct port_info *pi = adap2pinfo(sc, i);
1525 
1526 		for (j = 0; j < pi->nqsets; ++j) {
1527 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1528 					  -1, pi->tx_chan);
1529 
1530 		}
1531 	}
1532 }
1533 
1534 static void
1535 update_tpeeprom(struct adapter *adap)
1536 {
1537 	const struct firmware *tpeeprom;
1538 
1539 	uint32_t version;
1540 	unsigned int major, minor;
1541 	int ret, len;
1542 	char rev, name[32];
1543 
1544 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1545 
1546 	major = G_TP_VERSION_MAJOR(version);
1547 	minor = G_TP_VERSION_MINOR(version);
1548 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1549 		return;
1550 
1551 	rev = t3rev2char(adap);
1552 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1553 
1554 	tpeeprom = firmware_get(name);
1555 	if (tpeeprom == NULL) {
1556 		device_printf(adap->dev,
1557 			      "could not load TP EEPROM: unable to load %s\n",
1558 			      name);
1559 		return;
1560 	}
1561 
1562 	len = tpeeprom->datasize - 4;
1563 
1564 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1565 	if (ret)
1566 		goto release_tpeeprom;
1567 
1568 	if (len != TP_SRAM_LEN) {
1569 		device_printf(adap->dev,
1570 			      "%s length is wrong len=%d expected=%d\n", name,
1571 			      len, TP_SRAM_LEN);
1572 		return;
1573 	}
1574 
1575 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1576 	    TP_SRAM_OFFSET);
1577 
1578 	if (!ret) {
1579 		device_printf(adap->dev,
1580 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1581 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1582 	} else
1583 		device_printf(adap->dev,
1584 			      "Protocol SRAM image update in EEPROM failed\n");
1585 
1586 release_tpeeprom:
1587 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1588 
1589 	return;
1590 }
1591 
1592 static int
1593 update_tpsram(struct adapter *adap)
1594 {
1595 	const struct firmware *tpsram;
1596 	int ret;
1597 	char rev, name[32];
1598 
1599 	rev = t3rev2char(adap);
1600 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1601 
1602 	update_tpeeprom(adap);
1603 
1604 	tpsram = firmware_get(name);
1605 	if (tpsram == NULL){
1606 		device_printf(adap->dev, "could not load TP SRAM\n");
1607 		return (EINVAL);
1608 	} else
1609 		device_printf(adap->dev, "updating TP SRAM\n");
1610 
1611 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1612 	if (ret)
1613 		goto release_tpsram;
1614 
1615 	ret = t3_set_proto_sram(adap, tpsram->data);
1616 	if (ret)
1617 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1618 
1619 release_tpsram:
1620 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1621 
1622 	return ret;
1623 }
1624 
1625 /**
1626  *	cxgb_up - enable the adapter
1627  *	@adap: adapter being enabled
1628  *
1629  *	Called when the first port is enabled, this function performs the
1630  *	actions necessary to make an adapter operational, such as completing
1631  *	the initialization of HW modules, and enabling interrupts.
1632  */
1633 static int
1634 cxgb_up(struct adapter *sc)
1635 {
1636 	int err = 0;
1637 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1638 
1639 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1640 					   __func__, sc->open_device_map));
1641 
1642 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1643 
1644 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1645 
1646 		if ((sc->flags & FW_UPTODATE) == 0)
1647 			if ((err = upgrade_fw(sc)))
1648 				goto out;
1649 
1650 		if ((sc->flags & TPS_UPTODATE) == 0)
1651 			if ((err = update_tpsram(sc)))
1652 				goto out;
1653 
1654 		if (is_offload(sc) && nfilters != 0) {
1655 			sc->params.mc5.nservers = 0;
1656 
1657 			if (nfilters < 0)
1658 				sc->params.mc5.nfilters = mxf;
1659 			else
1660 				sc->params.mc5.nfilters = min(nfilters, mxf);
1661 		}
1662 
1663 		err = t3_init_hw(sc, 0);
1664 		if (err)
1665 			goto out;
1666 
1667 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1668 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1669 
1670 		err = setup_sge_qsets(sc);
1671 		if (err)
1672 			goto out;
1673 
1674 		alloc_filters(sc);
1675 		setup_rss(sc);
1676 
1677 		t3_intr_clear(sc);
1678 		err = cxgb_setup_interrupts(sc);
1679 		if (err)
1680 			goto out;
1681 
1682 		t3_add_configured_sysctls(sc);
1683 		sc->flags |= FULL_INIT_DONE;
1684 	}
1685 
1686 	t3_intr_clear(sc);
1687 	t3_sge_start(sc);
1688 	t3_intr_enable(sc);
1689 
1690 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691 	    is_offload(sc) && init_tp_parity(sc) == 0)
1692 		sc->flags |= TP_PARITY_INIT;
1693 
1694 	if (sc->flags & TP_PARITY_INIT) {
1695 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697 	}
1698 
1699 	if (!(sc->flags & QUEUES_BOUND)) {
1700 		bind_qsets(sc);
1701 		setup_hw_filters(sc);
1702 		sc->flags |= QUEUES_BOUND;
1703 	}
1704 
1705 	t3_sge_reset_adapter(sc);
1706 out:
1707 	return (err);
1708 }
1709 
1710 /*
1711  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1712  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1713  * during controller_detach, not here.
1714  */
1715 static void
1716 cxgb_down(struct adapter *sc)
1717 {
1718 	t3_sge_stop(sc);
1719 	t3_intr_disable(sc);
1720 }
1721 
1722 static int
1723 offload_open(struct port_info *pi)
1724 {
1725 	struct adapter *sc = pi->adapter;
1726 	struct t3cdev *tdev = &sc->tdev;
1727 
1728 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1729 
1730 	t3_tp_set_offload_mode(sc, 1);
1731 	tdev->lldev = pi->ifp;
1732 	init_port_mtus(sc);
1733 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1734 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1735 	init_smt(sc);
1736 	cxgb_add_clients(tdev);
1737 
1738 	return (0);
1739 }
1740 
1741 static int
1742 offload_close(struct t3cdev *tdev)
1743 {
1744 	struct adapter *adapter = tdev2adap(tdev);
1745 
1746 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1747 		return (0);
1748 
1749 	/* Call back all registered clients */
1750 	cxgb_remove_clients(tdev);
1751 
1752 	tdev->lldev = NULL;
1753 	cxgb_set_dummy_ops(tdev);
1754 	t3_tp_set_offload_mode(adapter, 0);
1755 
1756 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1757 
1758 	return (0);
1759 }
1760 
1761 /*
1762  * if_init for cxgb ports.
1763  */
1764 static void
1765 cxgb_init(void *arg)
1766 {
1767 	struct port_info *p = arg;
1768 	struct adapter *sc = p->adapter;
1769 
1770 	ADAPTER_LOCK(sc);
1771 	cxgb_init_locked(p); /* releases adapter lock */
1772 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1773 }
1774 
1775 static int
1776 cxgb_init_locked(struct port_info *p)
1777 {
1778 	struct adapter *sc = p->adapter;
1779 	struct ifnet *ifp = p->ifp;
1780 	struct cmac *mac = &p->mac;
1781 	int i, rc = 0, may_sleep = 0;
1782 
1783 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1784 
1785 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1786 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1787 			rc = EINTR;
1788 			goto done;
1789 		}
1790 	}
1791 	if (IS_DOOMED(p)) {
1792 		rc = ENXIO;
1793 		goto done;
1794 	}
1795 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1796 
1797 	/*
1798 	 * The code that runs during one-time adapter initialization can sleep
1799 	 * so it's important not to hold any locks across it.
1800 	 */
1801 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1802 
1803 	if (may_sleep) {
1804 		SET_BUSY(sc);
1805 		ADAPTER_UNLOCK(sc);
1806 	}
1807 
1808 	if (sc->open_device_map == 0) {
1809 		if ((rc = cxgb_up(sc)) != 0)
1810 			goto done;
1811 
1812 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1813 			log(LOG_WARNING,
1814 			    "Could not initialize offload capabilities\n");
1815 	}
1816 
1817 	PORT_LOCK(p);
1818 	if (isset(&sc->open_device_map, p->port_id) &&
1819 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1820 		PORT_UNLOCK(p);
1821 		goto done;
1822 	}
1823 	t3_port_intr_enable(sc, p->port_id);
1824 	if (!mac->multiport)
1825 		t3_mac_init(mac);
1826 	cxgb_update_mac_settings(p);
1827 	t3_link_start(&p->phy, mac, &p->link_config);
1828 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1829 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1830 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1831 	PORT_UNLOCK(p);
1832 
1833 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1834 		struct sge_qset *qs = &sc->sge.qs[i];
1835 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1836 
1837 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1838 				 txq->txq_watchdog.c_cpu);
1839 	}
1840 
1841 	/* all ok */
1842 	setbit(&sc->open_device_map, p->port_id);
1843 	callout_reset(&p->link_check_ch,
1844 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1845 	    link_check_callout, p);
1846 
1847 done:
1848 	if (may_sleep) {
1849 		ADAPTER_LOCK(sc);
1850 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1851 		CLR_BUSY(sc);
1852 		wakeup_one(&sc->flags);
1853 	}
1854 	ADAPTER_UNLOCK(sc);
1855 	return (rc);
1856 }
1857 
1858 static int
1859 cxgb_uninit_locked(struct port_info *p)
1860 {
1861 	struct adapter *sc = p->adapter;
1862 	int rc;
1863 
1864 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1865 
1866 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1867 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1868 			rc = EINTR;
1869 			goto done;
1870 		}
1871 	}
1872 	if (IS_DOOMED(p)) {
1873 		rc = ENXIO;
1874 		goto done;
1875 	}
1876 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1877 	SET_BUSY(sc);
1878 	ADAPTER_UNLOCK(sc);
1879 
1880 	rc = cxgb_uninit_synchronized(p);
1881 
1882 	ADAPTER_LOCK(sc);
1883 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1884 	CLR_BUSY(sc);
1885 	wakeup_one(&sc->flags);
1886 done:
1887 	ADAPTER_UNLOCK(sc);
1888 	return (rc);
1889 }
1890 
1891 /*
1892  * Called on "ifconfig down", and from port_detach
1893  */
1894 static int
1895 cxgb_uninit_synchronized(struct port_info *pi)
1896 {
1897 	struct adapter *sc = pi->adapter;
1898 	struct ifnet *ifp = pi->ifp;
1899 
1900 	/*
1901 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1902 	 */
1903 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1904 
1905 	/*
1906 	 * Clear this port's bit from the open device map, and then drain all
1907 	 * the tasks that can access/manipulate this port's port_info or ifp.
1908 	 * We disable this port's interrupts here and so the the slow/ext
1909 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1910 	 * be enqueued every second but the runs after this drain will not see
1911 	 * this port in the open device map.
1912 	 *
1913 	 * A well behaved task must take open_device_map into account and ignore
1914 	 * ports that are not open.
1915 	 */
1916 	clrbit(&sc->open_device_map, pi->port_id);
1917 	t3_port_intr_disable(sc, pi->port_id);
1918 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1919 	taskqueue_drain(sc->tq, &sc->tick_task);
1920 
1921 	callout_drain(&pi->link_check_ch);
1922 	taskqueue_drain(sc->tq, &pi->link_check_task);
1923 
1924 	PORT_LOCK(pi);
1925 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1926 
1927 	/* disable pause frames */
1928 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1929 
1930 	/* Reset RX FIFO HWM */
1931 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1932 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1933 
1934 	DELAY(100 * 1000);
1935 
1936 	/* Wait for TXFIFO empty */
1937 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1938 			F_TXFIFO_EMPTY, 1, 20, 5);
1939 
1940 	DELAY(100 * 1000);
1941 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1942 
1943 
1944 	pi->phy.ops->power_down(&pi->phy, 1);
1945 
1946 	PORT_UNLOCK(pi);
1947 
1948 	pi->link_config.link_ok = 0;
1949 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1950 
1951 	if ((sc->open_device_map & PORT_MASK) == 0)
1952 		offload_close(&sc->tdev);
1953 
1954 	if (sc->open_device_map == 0)
1955 		cxgb_down(pi->adapter);
1956 
1957 	return (0);
1958 }
1959 
1960 /*
1961  * Mark lro enabled or disabled in all qsets for this port
1962  */
1963 static int
1964 cxgb_set_lro(struct port_info *p, int enabled)
1965 {
1966 	int i;
1967 	struct adapter *adp = p->adapter;
1968 	struct sge_qset *q;
1969 
1970 	for (i = 0; i < p->nqsets; i++) {
1971 		q = &adp->sge.qs[p->first_qset + i];
1972 		q->lro.enabled = (enabled != 0);
1973 	}
1974 	return (0);
1975 }
1976 
1977 static int
1978 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1979 {
1980 	struct port_info *p = ifp->if_softc;
1981 	struct adapter *sc = p->adapter;
1982 	struct ifreq *ifr = (struct ifreq *)data;
1983 	int flags, error = 0, mtu;
1984 	uint32_t mask;
1985 
1986 	switch (command) {
1987 	case SIOCSIFMTU:
1988 		ADAPTER_LOCK(sc);
1989 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1990 		if (error) {
1991 fail:
1992 			ADAPTER_UNLOCK(sc);
1993 			return (error);
1994 		}
1995 
1996 		mtu = ifr->ifr_mtu;
1997 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1998 			error = EINVAL;
1999 		} else {
2000 			ifp->if_mtu = mtu;
2001 			PORT_LOCK(p);
2002 			cxgb_update_mac_settings(p);
2003 			PORT_UNLOCK(p);
2004 		}
2005 		ADAPTER_UNLOCK(sc);
2006 		break;
2007 	case SIOCSIFFLAGS:
2008 		ADAPTER_LOCK(sc);
2009 		if (IS_DOOMED(p)) {
2010 			error = ENXIO;
2011 			goto fail;
2012 		}
2013 		if (ifp->if_flags & IFF_UP) {
2014 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2015 				flags = p->if_flags;
2016 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2017 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2018 					if (IS_BUSY(sc)) {
2019 						error = EBUSY;
2020 						goto fail;
2021 					}
2022 					PORT_LOCK(p);
2023 					cxgb_update_mac_settings(p);
2024 					PORT_UNLOCK(p);
2025 				}
2026 				ADAPTER_UNLOCK(sc);
2027 			} else
2028 				error = cxgb_init_locked(p);
2029 			p->if_flags = ifp->if_flags;
2030 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2031 			error = cxgb_uninit_locked(p);
2032 		else
2033 			ADAPTER_UNLOCK(sc);
2034 
2035 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2036 		break;
2037 	case SIOCADDMULTI:
2038 	case SIOCDELMULTI:
2039 		ADAPTER_LOCK(sc);
2040 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2041 		if (error)
2042 			goto fail;
2043 
2044 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2045 			PORT_LOCK(p);
2046 			cxgb_update_mac_settings(p);
2047 			PORT_UNLOCK(p);
2048 		}
2049 		ADAPTER_UNLOCK(sc);
2050 
2051 		break;
2052 	case SIOCSIFCAP:
2053 		ADAPTER_LOCK(sc);
2054 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2055 		if (error)
2056 			goto fail;
2057 
2058 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2059 		if (mask & IFCAP_TXCSUM) {
2060 			ifp->if_capenable ^= IFCAP_TXCSUM;
2061 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2062 
2063 			if (IFCAP_TSO & ifp->if_capenable &&
2064 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2065 				ifp->if_capenable &= ~IFCAP_TSO;
2066 				ifp->if_hwassist &= ~CSUM_TSO;
2067 				if_printf(ifp,
2068 				    "tso disabled due to -txcsum.\n");
2069 			}
2070 		}
2071 		if (mask & IFCAP_RXCSUM)
2072 			ifp->if_capenable ^= IFCAP_RXCSUM;
2073 		if (mask & IFCAP_TSO4) {
2074 			ifp->if_capenable ^= IFCAP_TSO4;
2075 
2076 			if (IFCAP_TSO & ifp->if_capenable) {
2077 				if (IFCAP_TXCSUM & ifp->if_capenable)
2078 					ifp->if_hwassist |= CSUM_TSO;
2079 				else {
2080 					ifp->if_capenable &= ~IFCAP_TSO;
2081 					ifp->if_hwassist &= ~CSUM_TSO;
2082 					if_printf(ifp,
2083 					    "enable txcsum first.\n");
2084 					error = EAGAIN;
2085 				}
2086 			} else
2087 				ifp->if_hwassist &= ~CSUM_TSO;
2088 		}
2089 		if (mask & IFCAP_LRO) {
2090 			ifp->if_capenable ^= IFCAP_LRO;
2091 
2092 			/* Safe to do this even if cxgb_up not called yet */
2093 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2094 		}
2095 		if (mask & IFCAP_VLAN_HWTAGGING) {
2096 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2097 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2098 				PORT_LOCK(p);
2099 				cxgb_update_mac_settings(p);
2100 				PORT_UNLOCK(p);
2101 			}
2102 		}
2103 		if (mask & IFCAP_VLAN_MTU) {
2104 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2105 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2106 				PORT_LOCK(p);
2107 				cxgb_update_mac_settings(p);
2108 				PORT_UNLOCK(p);
2109 			}
2110 		}
2111 		if (mask & IFCAP_VLAN_HWTSO)
2112 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2113 		if (mask & IFCAP_VLAN_HWCSUM)
2114 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2115 
2116 #ifdef VLAN_CAPABILITIES
2117 		VLAN_CAPABILITIES(ifp);
2118 #endif
2119 		ADAPTER_UNLOCK(sc);
2120 		break;
2121 	case SIOCSIFMEDIA:
2122 	case SIOCGIFMEDIA:
2123 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2124 		break;
2125 	default:
2126 		error = ether_ioctl(ifp, command, data);
2127 	}
2128 
2129 	return (error);
2130 }
2131 
2132 static int
2133 cxgb_media_change(struct ifnet *ifp)
2134 {
2135 	return (EOPNOTSUPP);
2136 }
2137 
2138 /*
2139  * Translates phy->modtype to the correct Ethernet media subtype.
2140  */
2141 static int
2142 cxgb_ifm_type(int mod)
2143 {
2144 	switch (mod) {
2145 	case phy_modtype_sr:
2146 		return (IFM_10G_SR);
2147 	case phy_modtype_lr:
2148 		return (IFM_10G_LR);
2149 	case phy_modtype_lrm:
2150 		return (IFM_10G_LRM);
2151 	case phy_modtype_twinax:
2152 		return (IFM_10G_TWINAX);
2153 	case phy_modtype_twinax_long:
2154 		return (IFM_10G_TWINAX_LONG);
2155 	case phy_modtype_none:
2156 		return (IFM_NONE);
2157 	case phy_modtype_unknown:
2158 		return (IFM_UNKNOWN);
2159 	}
2160 
2161 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2162 	return (IFM_UNKNOWN);
2163 }
2164 
2165 /*
2166  * Rebuilds the ifmedia list for this port, and sets the current media.
2167  */
2168 static void
2169 cxgb_build_medialist(struct port_info *p)
2170 {
2171 	struct cphy *phy = &p->phy;
2172 	struct ifmedia *media = &p->media;
2173 	int mod = phy->modtype;
2174 	int m = IFM_ETHER | IFM_FDX;
2175 
2176 	PORT_LOCK(p);
2177 
2178 	ifmedia_removeall(media);
2179 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2180 		/* Copper (RJ45) */
2181 
2182 		if (phy->caps & SUPPORTED_10000baseT_Full)
2183 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2184 
2185 		if (phy->caps & SUPPORTED_1000baseT_Full)
2186 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2187 
2188 		if (phy->caps & SUPPORTED_100baseT_Full)
2189 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2190 
2191 		if (phy->caps & SUPPORTED_10baseT_Full)
2192 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2193 
2194 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2195 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2196 
2197 	} else if (phy->caps & SUPPORTED_TP) {
2198 		/* Copper (CX4) */
2199 
2200 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2201 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2202 
2203 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2204 		ifmedia_set(media, m | IFM_10G_CX4);
2205 
2206 	} else if (phy->caps & SUPPORTED_FIBRE &&
2207 		   phy->caps & SUPPORTED_10000baseT_Full) {
2208 		/* 10G optical (but includes SFP+ twinax) */
2209 
2210 		m |= cxgb_ifm_type(mod);
2211 		if (IFM_SUBTYPE(m) == IFM_NONE)
2212 			m &= ~IFM_FDX;
2213 
2214 		ifmedia_add(media, m, mod, NULL);
2215 		ifmedia_set(media, m);
2216 
2217 	} else if (phy->caps & SUPPORTED_FIBRE &&
2218 		   phy->caps & SUPPORTED_1000baseT_Full) {
2219 		/* 1G optical */
2220 
2221 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2222 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2223 		ifmedia_set(media, m | IFM_1000_SX);
2224 
2225 	} else {
2226 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2227 			    phy->caps));
2228 	}
2229 
2230 	PORT_UNLOCK(p);
2231 }
2232 
2233 static void
2234 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2235 {
2236 	struct port_info *p = ifp->if_softc;
2237 	struct ifmedia_entry *cur = p->media.ifm_cur;
2238 	int speed = p->link_config.speed;
2239 
2240 	if (cur->ifm_data != p->phy.modtype) {
2241 		cxgb_build_medialist(p);
2242 		cur = p->media.ifm_cur;
2243 	}
2244 
2245 	ifmr->ifm_status = IFM_AVALID;
2246 	if (!p->link_config.link_ok)
2247 		return;
2248 
2249 	ifmr->ifm_status |= IFM_ACTIVE;
2250 
2251 	/*
2252 	 * active and current will differ iff current media is autoselect.  That
2253 	 * can happen only for copper RJ45.
2254 	 */
2255 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2256 		return;
2257 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2258 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2259 
2260 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2261 	if (speed == SPEED_10000)
2262 		ifmr->ifm_active |= IFM_10G_T;
2263 	else if (speed == SPEED_1000)
2264 		ifmr->ifm_active |= IFM_1000_T;
2265 	else if (speed == SPEED_100)
2266 		ifmr->ifm_active |= IFM_100_TX;
2267 	else if (speed == SPEED_10)
2268 		ifmr->ifm_active |= IFM_10_T;
2269 	else
2270 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2271 			    speed));
2272 }
2273 
2274 static void
2275 cxgb_async_intr(void *data)
2276 {
2277 	adapter_t *sc = data;
2278 
2279 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2280 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2281 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2282 }
2283 
2284 static void
2285 link_check_callout(void *arg)
2286 {
2287 	struct port_info *pi = arg;
2288 	struct adapter *sc = pi->adapter;
2289 
2290 	if (!isset(&sc->open_device_map, pi->port_id))
2291 		return;
2292 
2293 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2294 }
2295 
2296 static void
2297 check_link_status(void *arg, int pending)
2298 {
2299 	struct port_info *pi = arg;
2300 	struct adapter *sc = pi->adapter;
2301 
2302 	if (!isset(&sc->open_device_map, pi->port_id))
2303 		return;
2304 
2305 	t3_link_changed(sc, pi->port_id);
2306 
2307 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2308 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2309 }
2310 
2311 void
2312 t3_os_link_intr(struct port_info *pi)
2313 {
2314 	/*
2315 	 * Schedule a link check in the near future.  If the link is flapping
2316 	 * rapidly we'll keep resetting the callout and delaying the check until
2317 	 * things stabilize a bit.
2318 	 */
2319 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2320 }
2321 
2322 static void
2323 check_t3b2_mac(struct adapter *sc)
2324 {
2325 	int i;
2326 
2327 	if (sc->flags & CXGB_SHUTDOWN)
2328 		return;
2329 
2330 	for_each_port(sc, i) {
2331 		struct port_info *p = &sc->port[i];
2332 		int status;
2333 #ifdef INVARIANTS
2334 		struct ifnet *ifp = p->ifp;
2335 #endif
2336 
2337 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2338 		    !p->link_config.link_ok)
2339 			continue;
2340 
2341 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2342 			("%s: state mismatch (drv_flags %x, device_map %x)",
2343 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2344 
2345 		PORT_LOCK(p);
2346 		status = t3b2_mac_watchdog_task(&p->mac);
2347 		if (status == 1)
2348 			p->mac.stats.num_toggled++;
2349 		else if (status == 2) {
2350 			struct cmac *mac = &p->mac;
2351 
2352 			cxgb_update_mac_settings(p);
2353 			t3_link_start(&p->phy, mac, &p->link_config);
2354 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2355 			t3_port_intr_enable(sc, p->port_id);
2356 			p->mac.stats.num_resets++;
2357 		}
2358 		PORT_UNLOCK(p);
2359 	}
2360 }
2361 
2362 static void
2363 cxgb_tick(void *arg)
2364 {
2365 	adapter_t *sc = (adapter_t *)arg;
2366 
2367 	if (sc->flags & CXGB_SHUTDOWN)
2368 		return;
2369 
2370 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2371 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2372 }
2373 
2374 static void
2375 cxgb_tick_handler(void *arg, int count)
2376 {
2377 	adapter_t *sc = (adapter_t *)arg;
2378 	const struct adapter_params *p = &sc->params;
2379 	int i;
2380 	uint32_t cause, reset;
2381 
2382 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2383 		return;
2384 
2385 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2386 		check_t3b2_mac(sc);
2387 
2388 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2389 	if (cause) {
2390 		struct sge_qset *qs = &sc->sge.qs[0];
2391 		uint32_t mask, v;
2392 
2393 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2394 
2395 		mask = 1;
2396 		for (i = 0; i < SGE_QSETS; i++) {
2397 			if (v & mask)
2398 				qs[i].rspq.starved++;
2399 			mask <<= 1;
2400 		}
2401 
2402 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2403 
2404 		for (i = 0; i < SGE_QSETS * 2; i++) {
2405 			if (v & mask) {
2406 				qs[i / 2].fl[i % 2].empty++;
2407 			}
2408 			mask <<= 1;
2409 		}
2410 
2411 		/* clear */
2412 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2413 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2414 	}
2415 
2416 	for (i = 0; i < sc->params.nports; i++) {
2417 		struct port_info *pi = &sc->port[i];
2418 		struct ifnet *ifp = pi->ifp;
2419 		struct cmac *mac = &pi->mac;
2420 		struct mac_stats *mstats = &mac->stats;
2421 		int drops, j;
2422 
2423 		if (!isset(&sc->open_device_map, pi->port_id))
2424 			continue;
2425 
2426 		PORT_LOCK(pi);
2427 		t3_mac_update_stats(mac);
2428 		PORT_UNLOCK(pi);
2429 
2430 		ifp->if_opackets = mstats->tx_frames;
2431 		ifp->if_ipackets = mstats->rx_frames;
2432 		ifp->if_obytes = mstats->tx_octets;
2433 		ifp->if_ibytes = mstats->rx_octets;
2434 		ifp->if_omcasts = mstats->tx_mcast_frames;
2435 		ifp->if_imcasts = mstats->rx_mcast_frames;
2436 		ifp->if_collisions = mstats->tx_total_collisions;
2437 		ifp->if_iqdrops = mstats->rx_cong_drops;
2438 
2439 		drops = 0;
2440 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2441 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2442 		ifp->if_snd.ifq_drops = drops;
2443 
2444 		ifp->if_oerrors =
2445 		    mstats->tx_excess_collisions +
2446 		    mstats->tx_underrun +
2447 		    mstats->tx_len_errs +
2448 		    mstats->tx_mac_internal_errs +
2449 		    mstats->tx_excess_deferral +
2450 		    mstats->tx_fcs_errs;
2451 		ifp->if_ierrors =
2452 		    mstats->rx_jabber +
2453 		    mstats->rx_data_errs +
2454 		    mstats->rx_sequence_errs +
2455 		    mstats->rx_runt +
2456 		    mstats->rx_too_long +
2457 		    mstats->rx_mac_internal_errs +
2458 		    mstats->rx_short +
2459 		    mstats->rx_fcs_errs;
2460 
2461 		if (mac->multiport)
2462 			continue;
2463 
2464 		/* Count rx fifo overflows, once per second */
2465 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2466 		reset = 0;
2467 		if (cause & F_RXFIFO_OVERFLOW) {
2468 			mac->stats.rx_fifo_ovfl++;
2469 			reset |= F_RXFIFO_OVERFLOW;
2470 		}
2471 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2472 	}
2473 }
2474 
2475 static void
2476 touch_bars(device_t dev)
2477 {
2478 	/*
2479 	 * Don't enable yet
2480 	 */
2481 #if !defined(__LP64__) && 0
2482 	u32 v;
2483 
2484 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2485 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2486 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2487 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2488 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2489 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2490 #endif
2491 }
2492 
2493 static int
2494 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2495 {
2496 	uint8_t *buf;
2497 	int err = 0;
2498 	u32 aligned_offset, aligned_len, *p;
2499 	struct adapter *adapter = pi->adapter;
2500 
2501 
2502 	aligned_offset = offset & ~3;
2503 	aligned_len = (len + (offset & 3) + 3) & ~3;
2504 
2505 	if (aligned_offset != offset || aligned_len != len) {
2506 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2507 		if (!buf)
2508 			return (ENOMEM);
2509 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2510 		if (!err && aligned_len > 4)
2511 			err = t3_seeprom_read(adapter,
2512 					      aligned_offset + aligned_len - 4,
2513 					      (u32 *)&buf[aligned_len - 4]);
2514 		if (err)
2515 			goto out;
2516 		memcpy(buf + (offset & 3), data, len);
2517 	} else
2518 		buf = (uint8_t *)(uintptr_t)data;
2519 
2520 	err = t3_seeprom_wp(adapter, 0);
2521 	if (err)
2522 		goto out;
2523 
2524 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2525 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2526 		aligned_offset += 4;
2527 	}
2528 
2529 	if (!err)
2530 		err = t3_seeprom_wp(adapter, 1);
2531 out:
2532 	if (buf != data)
2533 		free(buf, M_DEVBUF);
2534 	return err;
2535 }
2536 
2537 
2538 static int
2539 in_range(int val, int lo, int hi)
2540 {
2541 	return val < 0 || (val <= hi && val >= lo);
2542 }
2543 
2544 static int
2545 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2546 {
2547        return (0);
2548 }
2549 
2550 static int
2551 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2552 {
2553        return (0);
2554 }
2555 
2556 static int
2557 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2558     int fflag, struct thread *td)
2559 {
2560 	int mmd, error = 0;
2561 	struct port_info *pi = dev->si_drv1;
2562 	adapter_t *sc = pi->adapter;
2563 
2564 #ifdef PRIV_SUPPORTED
2565 	if (priv_check(td, PRIV_DRIVER)) {
2566 		if (cxgb_debug)
2567 			printf("user does not have access to privileged ioctls\n");
2568 		return (EPERM);
2569 	}
2570 #else
2571 	if (suser(td)) {
2572 		if (cxgb_debug)
2573 			printf("user does not have access to privileged ioctls\n");
2574 		return (EPERM);
2575 	}
2576 #endif
2577 
2578 	switch (cmd) {
2579 	case CHELSIO_GET_MIIREG: {
2580 		uint32_t val;
2581 		struct cphy *phy = &pi->phy;
2582 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2583 
2584 		if (!phy->mdio_read)
2585 			return (EOPNOTSUPP);
2586 		if (is_10G(sc)) {
2587 			mmd = mid->phy_id >> 8;
2588 			if (!mmd)
2589 				mmd = MDIO_DEV_PCS;
2590 			else if (mmd > MDIO_DEV_VEND2)
2591 				return (EINVAL);
2592 
2593 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2594 					     mid->reg_num, &val);
2595 		} else
2596 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2597 					     mid->reg_num & 0x1f, &val);
2598 		if (error == 0)
2599 			mid->val_out = val;
2600 		break;
2601 	}
2602 	case CHELSIO_SET_MIIREG: {
2603 		struct cphy *phy = &pi->phy;
2604 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2605 
2606 		if (!phy->mdio_write)
2607 			return (EOPNOTSUPP);
2608 		if (is_10G(sc)) {
2609 			mmd = mid->phy_id >> 8;
2610 			if (!mmd)
2611 				mmd = MDIO_DEV_PCS;
2612 			else if (mmd > MDIO_DEV_VEND2)
2613 				return (EINVAL);
2614 
2615 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2616 					      mmd, mid->reg_num, mid->val_in);
2617 		} else
2618 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2619 					      mid->reg_num & 0x1f,
2620 					      mid->val_in);
2621 		break;
2622 	}
2623 	case CHELSIO_SETREG: {
2624 		struct ch_reg *edata = (struct ch_reg *)data;
2625 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2626 			return (EFAULT);
2627 		t3_write_reg(sc, edata->addr, edata->val);
2628 		break;
2629 	}
2630 	case CHELSIO_GETREG: {
2631 		struct ch_reg *edata = (struct ch_reg *)data;
2632 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2633 			return (EFAULT);
2634 		edata->val = t3_read_reg(sc, edata->addr);
2635 		break;
2636 	}
2637 	case CHELSIO_GET_SGE_CONTEXT: {
2638 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2639 		mtx_lock_spin(&sc->sge.reg_lock);
2640 		switch (ecntxt->cntxt_type) {
2641 		case CNTXT_TYPE_EGRESS:
2642 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2643 			    ecntxt->data);
2644 			break;
2645 		case CNTXT_TYPE_FL:
2646 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2647 			    ecntxt->data);
2648 			break;
2649 		case CNTXT_TYPE_RSP:
2650 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2651 			    ecntxt->data);
2652 			break;
2653 		case CNTXT_TYPE_CQ:
2654 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2655 			    ecntxt->data);
2656 			break;
2657 		default:
2658 			error = EINVAL;
2659 			break;
2660 		}
2661 		mtx_unlock_spin(&sc->sge.reg_lock);
2662 		break;
2663 	}
2664 	case CHELSIO_GET_SGE_DESC: {
2665 		struct ch_desc *edesc = (struct ch_desc *)data;
2666 		int ret;
2667 		if (edesc->queue_num >= SGE_QSETS * 6)
2668 			return (EINVAL);
2669 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2670 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2671 		if (ret < 0)
2672 			return (EINVAL);
2673 		edesc->size = ret;
2674 		break;
2675 	}
2676 	case CHELSIO_GET_QSET_PARAMS: {
2677 		struct qset_params *q;
2678 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2679 		int q1 = pi->first_qset;
2680 		int nqsets = pi->nqsets;
2681 		int i;
2682 
2683 		if (t->qset_idx >= nqsets)
2684 			return EINVAL;
2685 
2686 		i = q1 + t->qset_idx;
2687 		q = &sc->params.sge.qset[i];
2688 		t->rspq_size   = q->rspq_size;
2689 		t->txq_size[0] = q->txq_size[0];
2690 		t->txq_size[1] = q->txq_size[1];
2691 		t->txq_size[2] = q->txq_size[2];
2692 		t->fl_size[0]  = q->fl_size;
2693 		t->fl_size[1]  = q->jumbo_size;
2694 		t->polling     = q->polling;
2695 		t->lro         = q->lro;
2696 		t->intr_lat    = q->coalesce_usecs;
2697 		t->cong_thres  = q->cong_thres;
2698 		t->qnum        = i;
2699 
2700 		if ((sc->flags & FULL_INIT_DONE) == 0)
2701 			t->vector = 0;
2702 		else if (sc->flags & USING_MSIX)
2703 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2704 		else
2705 			t->vector = rman_get_start(sc->irq_res);
2706 
2707 		break;
2708 	}
2709 	case CHELSIO_GET_QSET_NUM: {
2710 		struct ch_reg *edata = (struct ch_reg *)data;
2711 		edata->val = pi->nqsets;
2712 		break;
2713 	}
2714 	case CHELSIO_LOAD_FW: {
2715 		uint8_t *fw_data;
2716 		uint32_t vers;
2717 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2718 
2719 		/*
2720 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2721 		 *
2722 		 * FW_UPTODATE is also set so the rest of the initialization
2723 		 * will not overwrite what was loaded here.  This gives you the
2724 		 * flexibility to load any firmware (and maybe shoot yourself in
2725 		 * the foot).
2726 		 */
2727 
2728 		ADAPTER_LOCK(sc);
2729 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2730 			ADAPTER_UNLOCK(sc);
2731 			return (EBUSY);
2732 		}
2733 
2734 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2735 		if (!fw_data)
2736 			error = ENOMEM;
2737 		else
2738 			error = copyin(t->buf, fw_data, t->len);
2739 
2740 		if (!error)
2741 			error = -t3_load_fw(sc, fw_data, t->len);
2742 
2743 		if (t3_get_fw_version(sc, &vers) == 0) {
2744 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2745 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2746 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2747 		}
2748 
2749 		if (!error)
2750 			sc->flags |= FW_UPTODATE;
2751 
2752 		free(fw_data, M_DEVBUF);
2753 		ADAPTER_UNLOCK(sc);
2754 		break;
2755 	}
2756 	case CHELSIO_LOAD_BOOT: {
2757 		uint8_t *boot_data;
2758 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2759 
2760 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2761 		if (!boot_data)
2762 			return ENOMEM;
2763 
2764 		error = copyin(t->buf, boot_data, t->len);
2765 		if (!error)
2766 			error = -t3_load_boot(sc, boot_data, t->len);
2767 
2768 		free(boot_data, M_DEVBUF);
2769 		break;
2770 	}
2771 	case CHELSIO_GET_PM: {
2772 		struct ch_pm *m = (struct ch_pm *)data;
2773 		struct tp_params *p = &sc->params.tp;
2774 
2775 		if (!is_offload(sc))
2776 			return (EOPNOTSUPP);
2777 
2778 		m->tx_pg_sz = p->tx_pg_size;
2779 		m->tx_num_pg = p->tx_num_pgs;
2780 		m->rx_pg_sz  = p->rx_pg_size;
2781 		m->rx_num_pg = p->rx_num_pgs;
2782 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2783 
2784 		break;
2785 	}
2786 	case CHELSIO_SET_PM: {
2787 		struct ch_pm *m = (struct ch_pm *)data;
2788 		struct tp_params *p = &sc->params.tp;
2789 
2790 		if (!is_offload(sc))
2791 			return (EOPNOTSUPP);
2792 		if (sc->flags & FULL_INIT_DONE)
2793 			return (EBUSY);
2794 
2795 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2796 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2797 			return (EINVAL);	/* not power of 2 */
2798 		if (!(m->rx_pg_sz & 0x14000))
2799 			return (EINVAL);	/* not 16KB or 64KB */
2800 		if (!(m->tx_pg_sz & 0x1554000))
2801 			return (EINVAL);
2802 		if (m->tx_num_pg == -1)
2803 			m->tx_num_pg = p->tx_num_pgs;
2804 		if (m->rx_num_pg == -1)
2805 			m->rx_num_pg = p->rx_num_pgs;
2806 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2807 			return (EINVAL);
2808 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2809 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2810 			return (EINVAL);
2811 
2812 		p->rx_pg_size = m->rx_pg_sz;
2813 		p->tx_pg_size = m->tx_pg_sz;
2814 		p->rx_num_pgs = m->rx_num_pg;
2815 		p->tx_num_pgs = m->tx_num_pg;
2816 		break;
2817 	}
2818 	case CHELSIO_SETMTUTAB: {
2819 		struct ch_mtus *m = (struct ch_mtus *)data;
2820 		int i;
2821 
2822 		if (!is_offload(sc))
2823 			return (EOPNOTSUPP);
2824 		if (offload_running(sc))
2825 			return (EBUSY);
2826 		if (m->nmtus != NMTUS)
2827 			return (EINVAL);
2828 		if (m->mtus[0] < 81)         /* accommodate SACK */
2829 			return (EINVAL);
2830 
2831 		/*
2832 		 * MTUs must be in ascending order
2833 		 */
2834 		for (i = 1; i < NMTUS; ++i)
2835 			if (m->mtus[i] < m->mtus[i - 1])
2836 				return (EINVAL);
2837 
2838 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2839 		break;
2840 	}
2841 	case CHELSIO_GETMTUTAB: {
2842 		struct ch_mtus *m = (struct ch_mtus *)data;
2843 
2844 		if (!is_offload(sc))
2845 			return (EOPNOTSUPP);
2846 
2847 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2848 		m->nmtus = NMTUS;
2849 		break;
2850 	}
2851 	case CHELSIO_GET_MEM: {
2852 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2853 		struct mc7 *mem;
2854 		uint8_t *useraddr;
2855 		u64 buf[32];
2856 
2857 		/*
2858 		 * Use these to avoid modifying len/addr in the the return
2859 		 * struct
2860 		 */
2861 		uint32_t len = t->len, addr = t->addr;
2862 
2863 		if (!is_offload(sc))
2864 			return (EOPNOTSUPP);
2865 		if (!(sc->flags & FULL_INIT_DONE))
2866 			return (EIO);         /* need the memory controllers */
2867 		if ((addr & 0x7) || (len & 0x7))
2868 			return (EINVAL);
2869 		if (t->mem_id == MEM_CM)
2870 			mem = &sc->cm;
2871 		else if (t->mem_id == MEM_PMRX)
2872 			mem = &sc->pmrx;
2873 		else if (t->mem_id == MEM_PMTX)
2874 			mem = &sc->pmtx;
2875 		else
2876 			return (EINVAL);
2877 
2878 		/*
2879 		 * Version scheme:
2880 		 * bits 0..9: chip version
2881 		 * bits 10..15: chip revision
2882 		 */
2883 		t->version = 3 | (sc->params.rev << 10);
2884 
2885 		/*
2886 		 * Read 256 bytes at a time as len can be large and we don't
2887 		 * want to use huge intermediate buffers.
2888 		 */
2889 		useraddr = (uint8_t *)t->buf;
2890 		while (len) {
2891 			unsigned int chunk = min(len, sizeof(buf));
2892 
2893 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2894 			if (error)
2895 				return (-error);
2896 			if (copyout(buf, useraddr, chunk))
2897 				return (EFAULT);
2898 			useraddr += chunk;
2899 			addr += chunk;
2900 			len -= chunk;
2901 		}
2902 		break;
2903 	}
2904 	case CHELSIO_READ_TCAM_WORD: {
2905 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2906 
2907 		if (!is_offload(sc))
2908 			return (EOPNOTSUPP);
2909 		if (!(sc->flags & FULL_INIT_DONE))
2910 			return (EIO);         /* need MC5 */
2911 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2912 		break;
2913 	}
2914 	case CHELSIO_SET_TRACE_FILTER: {
2915 		struct ch_trace *t = (struct ch_trace *)data;
2916 		const struct trace_params *tp;
2917 
2918 		tp = (const struct trace_params *)&t->sip;
2919 		if (t->config_tx)
2920 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2921 					       t->trace_tx);
2922 		if (t->config_rx)
2923 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2924 					       t->trace_rx);
2925 		break;
2926 	}
2927 	case CHELSIO_SET_PKTSCHED: {
2928 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2929 		if (sc->open_device_map == 0)
2930 			return (EAGAIN);
2931 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2932 		    p->binding);
2933 		break;
2934 	}
2935 	case CHELSIO_IFCONF_GETREGS: {
2936 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2937 		int reglen = cxgb_get_regs_len();
2938 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2939 		if (buf == NULL) {
2940 			return (ENOMEM);
2941 		}
2942 		if (regs->len > reglen)
2943 			regs->len = reglen;
2944 		else if (regs->len < reglen)
2945 			error = ENOBUFS;
2946 
2947 		if (!error) {
2948 			cxgb_get_regs(sc, regs, buf);
2949 			error = copyout(buf, regs->data, reglen);
2950 		}
2951 		free(buf, M_DEVBUF);
2952 
2953 		break;
2954 	}
2955 	case CHELSIO_SET_HW_SCHED: {
2956 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2957 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2958 
2959 		if ((sc->flags & FULL_INIT_DONE) == 0)
2960 			return (EAGAIN);       /* need TP to be initialized */
2961 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2962 		    !in_range(t->channel, 0, 1) ||
2963 		    !in_range(t->kbps, 0, 10000000) ||
2964 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2965 		    !in_range(t->flow_ipg, 0,
2966 			      dack_ticks_to_usec(sc, 0x7ff)))
2967 			return (EINVAL);
2968 
2969 		if (t->kbps >= 0) {
2970 			error = t3_config_sched(sc, t->kbps, t->sched);
2971 			if (error < 0)
2972 				return (-error);
2973 		}
2974 		if (t->class_ipg >= 0)
2975 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2976 		if (t->flow_ipg >= 0) {
2977 			t->flow_ipg *= 1000;     /* us -> ns */
2978 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2979 		}
2980 		if (t->mode >= 0) {
2981 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2982 
2983 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2984 					 bit, t->mode ? bit : 0);
2985 		}
2986 		if (t->channel >= 0)
2987 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2988 					 1 << t->sched, t->channel << t->sched);
2989 		break;
2990 	}
2991 	case CHELSIO_GET_EEPROM: {
2992 		int i;
2993 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2994 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2995 
2996 		if (buf == NULL) {
2997 			return (ENOMEM);
2998 		}
2999 		e->magic = EEPROM_MAGIC;
3000 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3001 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3002 
3003 		if (!error)
3004 			error = copyout(buf + e->offset, e->data, e->len);
3005 
3006 		free(buf, M_DEVBUF);
3007 		break;
3008 	}
3009 	case CHELSIO_CLEAR_STATS: {
3010 		if (!(sc->flags & FULL_INIT_DONE))
3011 			return EAGAIN;
3012 
3013 		PORT_LOCK(pi);
3014 		t3_mac_update_stats(&pi->mac);
3015 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3016 		PORT_UNLOCK(pi);
3017 		break;
3018 	}
3019 	case CHELSIO_GET_UP_LA: {
3020 		struct ch_up_la *la = (struct ch_up_la *)data;
3021 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3022 		if (buf == NULL) {
3023 			return (ENOMEM);
3024 		}
3025 		if (la->bufsize < LA_BUFSIZE)
3026 			error = ENOBUFS;
3027 
3028 		if (!error)
3029 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3030 					      &la->bufsize, buf);
3031 		if (!error)
3032 			error = copyout(buf, la->data, la->bufsize);
3033 
3034 		free(buf, M_DEVBUF);
3035 		break;
3036 	}
3037 	case CHELSIO_GET_UP_IOQS: {
3038 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3039 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3040 		uint32_t *v;
3041 
3042 		if (buf == NULL) {
3043 			return (ENOMEM);
3044 		}
3045 		if (ioqs->bufsize < IOQS_BUFSIZE)
3046 			error = ENOBUFS;
3047 
3048 		if (!error)
3049 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3050 
3051 		if (!error) {
3052 			v = (uint32_t *)buf;
3053 
3054 			ioqs->ioq_rx_enable = *v++;
3055 			ioqs->ioq_tx_enable = *v++;
3056 			ioqs->ioq_rx_status = *v++;
3057 			ioqs->ioq_tx_status = *v++;
3058 
3059 			error = copyout(v, ioqs->data, ioqs->bufsize);
3060 		}
3061 
3062 		free(buf, M_DEVBUF);
3063 		break;
3064 	}
3065 	case CHELSIO_SET_FILTER: {
3066 		struct ch_filter *f = (struct ch_filter *)data;;
3067 		struct filter_info *p;
3068 		unsigned int nfilters = sc->params.mc5.nfilters;
3069 
3070 		if (!is_offload(sc))
3071 			return (EOPNOTSUPP);	/* No TCAM */
3072 		if (!(sc->flags & FULL_INIT_DONE))
3073 			return (EAGAIN);	/* mc5 not setup yet */
3074 		if (nfilters == 0)
3075 			return (EBUSY);		/* TOE will use TCAM */
3076 
3077 		/* sanity checks */
3078 		if (f->filter_id >= nfilters ||
3079 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3080 		    (f->val.sport && f->mask.sport != 0xffff) ||
3081 		    (f->val.dport && f->mask.dport != 0xffff) ||
3082 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3083 		    (f->val.vlan_prio &&
3084 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3085 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3086 		    f->qset >= SGE_QSETS ||
3087 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3088 			return (EINVAL);
3089 
3090 		/* Was allocated with M_WAITOK */
3091 		KASSERT(sc->filters, ("filter table NULL\n"));
3092 
3093 		p = &sc->filters[f->filter_id];
3094 		if (p->locked)
3095 			return (EPERM);
3096 
3097 		bzero(p, sizeof(*p));
3098 		p->sip = f->val.sip;
3099 		p->sip_mask = f->mask.sip;
3100 		p->dip = f->val.dip;
3101 		p->sport = f->val.sport;
3102 		p->dport = f->val.dport;
3103 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3104 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3105 		    FILTER_NO_VLAN_PRI;
3106 		p->mac_hit = f->mac_hit;
3107 		p->mac_vld = f->mac_addr_idx != 0xffff;
3108 		p->mac_idx = f->mac_addr_idx;
3109 		p->pkt_type = f->proto;
3110 		p->report_filter_id = f->want_filter_id;
3111 		p->pass = f->pass;
3112 		p->rss = f->rss;
3113 		p->qset = f->qset;
3114 
3115 		error = set_filter(sc, f->filter_id, p);
3116 		if (error == 0)
3117 			p->valid = 1;
3118 		break;
3119 	}
3120 	case CHELSIO_DEL_FILTER: {
3121 		struct ch_filter *f = (struct ch_filter *)data;
3122 		struct filter_info *p;
3123 		unsigned int nfilters = sc->params.mc5.nfilters;
3124 
3125 		if (!is_offload(sc))
3126 			return (EOPNOTSUPP);
3127 		if (!(sc->flags & FULL_INIT_DONE))
3128 			return (EAGAIN);
3129 		if (nfilters == 0 || sc->filters == NULL)
3130 			return (EINVAL);
3131 		if (f->filter_id >= nfilters)
3132 		       return (EINVAL);
3133 
3134 		p = &sc->filters[f->filter_id];
3135 		if (p->locked)
3136 			return (EPERM);
3137 		if (!p->valid)
3138 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3139 
3140 		bzero(p, sizeof(*p));
3141 		p->sip = p->sip_mask = 0xffffffff;
3142 		p->vlan = 0xfff;
3143 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3144 		p->pkt_type = 1;
3145 		error = set_filter(sc, f->filter_id, p);
3146 		break;
3147 	}
3148 	case CHELSIO_GET_FILTER: {
3149 		struct ch_filter *f = (struct ch_filter *)data;
3150 		struct filter_info *p;
3151 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3152 
3153 		if (!is_offload(sc))
3154 			return (EOPNOTSUPP);
3155 		if (!(sc->flags & FULL_INIT_DONE))
3156 			return (EAGAIN);
3157 		if (nfilters == 0 || sc->filters == NULL)
3158 			return (EINVAL);
3159 
3160 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3161 		for (; i < nfilters; i++) {
3162 			p = &sc->filters[i];
3163 			if (!p->valid)
3164 				continue;
3165 
3166 			bzero(f, sizeof(*f));
3167 
3168 			f->filter_id = i;
3169 			f->val.sip = p->sip;
3170 			f->mask.sip = p->sip_mask;
3171 			f->val.dip = p->dip;
3172 			f->mask.dip = p->dip ? 0xffffffff : 0;
3173 			f->val.sport = p->sport;
3174 			f->mask.sport = p->sport ? 0xffff : 0;
3175 			f->val.dport = p->dport;
3176 			f->mask.dport = p->dport ? 0xffff : 0;
3177 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3178 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3179 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3180 			    0 : p->vlan_prio;
3181 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3182 			    0 : FILTER_NO_VLAN_PRI;
3183 			f->mac_hit = p->mac_hit;
3184 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3185 			f->proto = p->pkt_type;
3186 			f->want_filter_id = p->report_filter_id;
3187 			f->pass = p->pass;
3188 			f->rss = p->rss;
3189 			f->qset = p->qset;
3190 
3191 			break;
3192 		}
3193 
3194 		if (i == nfilters)
3195 			f->filter_id = 0xffffffff;
3196 		break;
3197 	}
3198 	default:
3199 		return (EOPNOTSUPP);
3200 		break;
3201 	}
3202 
3203 	return (error);
3204 }
3205 
3206 static __inline void
3207 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3208     unsigned int end)
3209 {
3210 	uint32_t *p = (uint32_t *)(buf + start);
3211 
3212 	for ( ; start <= end; start += sizeof(uint32_t))
3213 		*p++ = t3_read_reg(ap, start);
3214 }
3215 
3216 #define T3_REGMAP_SIZE (3 * 1024)
3217 static int
3218 cxgb_get_regs_len(void)
3219 {
3220 	return T3_REGMAP_SIZE;
3221 }
3222 
3223 static void
3224 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3225 {
3226 
3227 	/*
3228 	 * Version scheme:
3229 	 * bits 0..9: chip version
3230 	 * bits 10..15: chip revision
3231 	 * bit 31: set for PCIe cards
3232 	 */
3233 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3234 
3235 	/*
3236 	 * We skip the MAC statistics registers because they are clear-on-read.
3237 	 * Also reading multi-register stats would need to synchronize with the
3238 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3239 	 */
3240 	memset(buf, 0, cxgb_get_regs_len());
3241 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3242 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3243 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3244 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3245 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3246 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3247 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3248 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3249 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3250 }
3251 
3252 static int
3253 alloc_filters(struct adapter *sc)
3254 {
3255 	struct filter_info *p;
3256 	unsigned int nfilters = sc->params.mc5.nfilters;
3257 
3258 	if (nfilters == 0)
3259 		return (0);
3260 
3261 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3262 	sc->filters = p;
3263 
3264 	p = &sc->filters[nfilters - 1];
3265 	p->vlan = 0xfff;
3266 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3267 	p->pass = p->rss = p->valid = p->locked = 1;
3268 
3269 	return (0);
3270 }
3271 
3272 static int
3273 setup_hw_filters(struct adapter *sc)
3274 {
3275 	int i, rc;
3276 	unsigned int nfilters = sc->params.mc5.nfilters;
3277 
3278 	if (!sc->filters)
3279 		return (0);
3280 
3281 	t3_enable_filters(sc);
3282 
3283 	for (i = rc = 0; i < nfilters && !rc; i++) {
3284 		if (sc->filters[i].locked)
3285 			rc = set_filter(sc, i, &sc->filters[i]);
3286 	}
3287 
3288 	return (rc);
3289 }
3290 
3291 static int
3292 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3293 {
3294 	int len;
3295 	struct mbuf *m;
3296 	struct ulp_txpkt *txpkt;
3297 	struct work_request_hdr *wr;
3298 	struct cpl_pass_open_req *oreq;
3299 	struct cpl_set_tcb_field *sreq;
3300 
3301 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3302 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3303 
3304 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3305 	      sc->params.mc5.nfilters;
3306 
3307 	m = m_gethdr(M_WAITOK, MT_DATA);
3308 	m->m_len = m->m_pkthdr.len = len;
3309 	bzero(mtod(m, char *), len);
3310 
3311 	wr = mtod(m, struct work_request_hdr *);
3312 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3313 
3314 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3315 	txpkt = (struct ulp_txpkt *)oreq;
3316 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3317 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3318 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3319 	oreq->local_port = htons(f->dport);
3320 	oreq->peer_port = htons(f->sport);
3321 	oreq->local_ip = htonl(f->dip);
3322 	oreq->peer_ip = htonl(f->sip);
3323 	oreq->peer_netmask = htonl(f->sip_mask);
3324 	oreq->opt0h = 0;
3325 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3326 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3327 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3328 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3329 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3330 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3331 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3332 
3333 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3334 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3335 			  (f->report_filter_id << 15) | (1 << 23) |
3336 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3337 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3338 	t3_mgmt_tx(sc, m);
3339 
3340 	if (f->pass && !f->rss) {
3341 		len = sizeof(*sreq);
3342 		m = m_gethdr(M_WAITOK, MT_DATA);
3343 		m->m_len = m->m_pkthdr.len = len;
3344 		bzero(mtod(m, char *), len);
3345 		sreq = mtod(m, struct cpl_set_tcb_field *);
3346 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3347 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3348 				 (u64)sc->rrss_map[f->qset] << 19);
3349 		t3_mgmt_tx(sc, m);
3350 	}
3351 	return 0;
3352 }
3353 
3354 static inline void
3355 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3356     unsigned int word, u64 mask, u64 val)
3357 {
3358 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3359 	req->reply = V_NO_REPLY(1);
3360 	req->cpu_idx = 0;
3361 	req->word = htons(word);
3362 	req->mask = htobe64(mask);
3363 	req->val = htobe64(val);
3364 }
3365 
3366 static inline void
3367 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3368     unsigned int word, u64 mask, u64 val)
3369 {
3370 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3371 
3372 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3373 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3374 	mk_set_tcb_field(req, tid, word, mask, val);
3375 }
3376