xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision a3cf0ef5a295c885c895fabfd56470c0d1db322d)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	/* bus interface */
133 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
134 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
135 
136 	{ 0, 0 }
137 };
138 
139 static driver_t cxgb_controller_driver = {
140 	"cxgbc",
141 	cxgb_controller_methods,
142 	sizeof(struct adapter)
143 };
144 
145 static devclass_t	cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147 
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155 
156 static device_method_t cxgb_port_methods[] = {
157 	DEVMETHOD(device_probe,		cxgb_port_probe),
158 	DEVMETHOD(device_attach,	cxgb_port_attach),
159 	DEVMETHOD(device_detach,	cxgb_port_detach),
160 	{ 0, 0 }
161 };
162 
163 static driver_t cxgb_port_driver = {
164 	"cxgb",
165 	cxgb_port_methods,
166 	0
167 };
168 
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172 
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181 
182 static devclass_t	cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184 
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195 
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200 
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209 
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218 
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  *
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228 
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233 
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241 
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246 
247 enum {
248 	MAX_TXQ_ENTRIES      = 16384,
249 	MAX_CTRL_TXQ_ENTRIES = 1024,
250 	MAX_RSPQ_ENTRIES     = 16384,
251 	MAX_RX_BUFFERS       = 16384,
252 	MAX_RX_JUMBO_BUFFERS = 16384,
253 	MIN_TXQ_ENTRIES      = 4,
254 	MIN_CTRL_TXQ_ENTRIES = 4,
255 	MIN_RSPQ_ENTRIES     = 32,
256 	MIN_FL_ENTRIES       = 32,
257 	MIN_FL_JUMBO_ENTRIES = 32
258 };
259 
260 struct filter_info {
261 	u32 sip;
262 	u32 sip_mask;
263 	u32 dip;
264 	u16 sport;
265 	u16 dport;
266 	u32 vlan:12;
267 	u32 vlan_prio:3;
268 	u32 mac_hit:1;
269 	u32 mac_idx:4;
270 	u32 mac_vld:1;
271 	u32 pkt_type:2;
272 	u32 report_filter_id:1;
273 	u32 pass:1;
274 	u32 rss:1;
275 	u32 qset:3;
276 	u32 locked:1;
277 	u32 valid:1;
278 };
279 
280 enum { FILTER_NO_VLAN_PRI = 7 };
281 
282 #define EEPROM_MAGIC 0x38E2F10C
283 
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285 
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288 	uint16_t	vendor;
289 	uint16_t	device;
290 	int		index;
291 	char		*desc;
292 } cxgb_identifiers[] = {
293 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307 	{0, 0, 0, NULL}
308 };
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	/* find the PCIe link width and set max read request to 4KB*/
461 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
462 		uint16_t lnk;
463 
464 		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465 		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466 		if (sc->link_width < 8 &&
467 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
468 			device_printf(sc->dev,
469 			    "PCIe x%d Link, expect reduced performance\n",
470 			    sc->link_width);
471 		}
472 
473 		pci_set_max_read_req(dev, 4096);
474 	}
475 
476 	touch_bars(dev);
477 	pci_enable_busmaster(dev);
478 	/*
479 	 * Allocate the registers and make them available to the driver.
480 	 * The registers that we care about for NIC mode are in BAR 0
481 	 */
482 	sc->regs_rid = PCIR_BAR(0);
483 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
485 		device_printf(dev, "Cannot allocate BAR region 0\n");
486 		return (ENXIO);
487 	}
488 	sc->udbs_rid = PCIR_BAR(2);
489 	sc->udbs_res = NULL;
490 	if (is_offload(sc) &&
491 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493 		device_printf(dev, "Cannot allocate BAR region 1\n");
494 		error = ENXIO;
495 		goto out;
496 	}
497 
498 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499 	    device_get_unit(dev));
500 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501 
502 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503 	    device_get_unit(dev));
504 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505 	    device_get_unit(dev));
506 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507 	    device_get_unit(dev));
508 
509 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512 
513 	sc->bt = rman_get_bustag(sc->regs_res);
514 	sc->bh = rman_get_bushandle(sc->regs_res);
515 	sc->mmio_len = rman_get_size(sc->regs_res);
516 
517 	for (i = 0; i < MAX_NPORTS; i++)
518 		sc->port[i].adapter = sc;
519 
520 	if (t3_prep_adapter(sc, ai, 1) < 0) {
521 		printf("prep adapter failed\n");
522 		error = ENODEV;
523 		goto out;
524 	}
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526 	 * enough messages for the queue sets.  If that fails, try falling
527 	 * back to MSI.  If that fails, then try falling back to the legacy
528 	 * interrupt pin model.
529 	 */
530 	sc->msix_regs_rid = 0x20;
531 	if ((msi_allowed >= 2) &&
532 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534 
535 		if (multiq)
536 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538 
539 		if (pci_msix_count(dev) == 0 ||
540 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541 		    sc->msi_count != msi_needed) {
542 			device_printf(dev, "alloc msix failed - "
543 				      "msi_count=%d, msi_needed=%d, err=%d; "
544 				      "will try MSI\n", sc->msi_count,
545 				      msi_needed, error);
546 			sc->msi_count = 0;
547 			port_qsets = 1;
548 			pci_release_msi(dev);
549 			bus_release_resource(dev, SYS_RES_MEMORY,
550 			    sc->msix_regs_rid, sc->msix_regs_res);
551 			sc->msix_regs_res = NULL;
552 		} else {
553 			sc->flags |= USING_MSIX;
554 			sc->cxgb_intr = cxgb_async_intr;
555 			device_printf(dev,
556 				      "using MSI-X interrupts (%u vectors)\n",
557 				      sc->msi_count);
558 		}
559 	}
560 
561 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562 		sc->msi_count = 1;
563 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564 			device_printf(dev, "alloc msi failed - "
565 				      "err=%d; will try INTx\n", error);
566 			sc->msi_count = 0;
567 			port_qsets = 1;
568 			pci_release_msi(dev);
569 		} else {
570 			sc->flags |= USING_MSI;
571 			sc->cxgb_intr = t3_intr_msi;
572 			device_printf(dev, "using MSI interrupts\n");
573 		}
574 	}
575 	if (sc->msi_count == 0) {
576 		device_printf(dev, "using line interrupts\n");
577 		sc->cxgb_intr = t3b_intr;
578 	}
579 
580 	/* Create a private taskqueue thread for handling driver events */
581 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582 	    taskqueue_thread_enqueue, &sc->tq);
583 	if (sc->tq == NULL) {
584 		device_printf(dev, "failed to allocate controller task queue\n");
585 		goto out;
586 	}
587 
588 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589 	    device_get_nameunit(dev));
590 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591 
592 
593 	/* Create a periodic callout for checking adapter status */
594 	callout_init(&sc->cxgb_tick_ch, TRUE);
595 
596 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597 		/*
598 		 * Warn user that a firmware update will be attempted in init.
599 		 */
600 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602 		sc->flags &= ~FW_UPTODATE;
603 	} else {
604 		sc->flags |= FW_UPTODATE;
605 	}
606 
607 	if (t3_check_tpsram_version(sc) < 0) {
608 		/*
609 		 * Warn user that a firmware update will be attempted in init.
610 		 */
611 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613 		sc->flags &= ~TPS_UPTODATE;
614 	} else {
615 		sc->flags |= TPS_UPTODATE;
616 	}
617 
618 	/*
619 	 * Create a child device for each MAC.  The ethernet attachment
620 	 * will be done in these children.
621 	 */
622 	for (i = 0; i < (sc)->params.nports; i++) {
623 		struct port_info *pi;
624 
625 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626 			device_printf(dev, "failed to add child port\n");
627 			error = EINVAL;
628 			goto out;
629 		}
630 		pi = &sc->port[i];
631 		pi->adapter = sc;
632 		pi->nqsets = port_qsets;
633 		pi->first_qset = i*port_qsets;
634 		pi->port_id = i;
635 		pi->tx_chan = i >= ai->nports0;
636 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637 		sc->rxpkt_map[pi->txpkt_intf] = i;
638 		sc->port[i].tx_chan = i >= ai->nports0;
639 		sc->portdev[i] = child;
640 		device_set_softc(child, pi);
641 	}
642 	if ((error = bus_generic_attach(dev)) != 0)
643 		goto out;
644 
645 	/* initialize sge private state */
646 	t3_sge_init_adapter(sc);
647 
648 	t3_led_ready(sc);
649 
650 	cxgb_offload_init();
651 	if (is_offload(sc)) {
652 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653 		cxgb_adapter_ofld(sc);
654         }
655 	error = t3_get_fw_version(sc, &vers);
656 	if (error)
657 		goto out;
658 
659 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661 	    G_FW_VERSION_MICRO(vers));
662 
663 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664 		 ai->desc, is_offload(sc) ? "R" : "",
665 		 sc->params.vpd.ec, sc->params.vpd.sn);
666 	device_set_desc_copy(dev, buf);
667 
668 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671 
672 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674 	t3_add_attach_sysctls(sc);
675 out:
676 	if (error)
677 		cxgb_free(sc);
678 
679 	return (error);
680 }
681 
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686 
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690 	struct adapter *sc;
691 
692 	sc = device_get_softc(dev);
693 
694 	cxgb_free(sc);
695 
696 	return (0);
697 }
698 
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  *
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720 	int i;
721 
722 	ADAPTER_LOCK(sc);
723 	sc->flags |= CXGB_SHUTDOWN;
724 	ADAPTER_UNLOCK(sc);
725 
726 	/*
727 	 * Make sure all child devices are gone.
728 	 */
729 	bus_generic_detach(sc->dev);
730 	for (i = 0; i < (sc)->params.nports; i++) {
731 		if (sc->portdev[i] &&
732 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
733 			device_printf(sc->dev, "failed to delete child port\n");
734 	}
735 
736 	/*
737 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
738 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
739 	 * all open devices have been closed.
740 	 */
741 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
742 					   __func__, sc->open_device_map));
743 	for (i = 0; i < sc->params.nports; i++) {
744 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
745 						  __func__, i));
746 	}
747 
748 	/*
749 	 * Finish off the adapter's callouts.
750 	 */
751 	callout_drain(&sc->cxgb_tick_ch);
752 	callout_drain(&sc->sge_timer_ch);
753 
754 	/*
755 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
756 	 * sysctls are cleaned up by the kernel linker.
757 	 */
758 	if (sc->flags & FULL_INIT_DONE) {
759  		t3_free_sge_resources(sc);
760  		sc->flags &= ~FULL_INIT_DONE;
761  	}
762 
763 	/*
764 	 * Release all interrupt resources.
765 	 */
766 	cxgb_teardown_interrupts(sc);
767 	if (sc->flags & (USING_MSI | USING_MSIX)) {
768 		device_printf(sc->dev, "releasing msi message(s)\n");
769 		pci_release_msi(sc->dev);
770 	} else {
771 		device_printf(sc->dev, "no msi message to release\n");
772 	}
773 
774 	if (sc->msix_regs_res != NULL) {
775 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
776 		    sc->msix_regs_res);
777 	}
778 
779 	/*
780 	 * Free the adapter's taskqueue.
781 	 */
782 	if (sc->tq != NULL) {
783 		taskqueue_free(sc->tq);
784 		sc->tq = NULL;
785 	}
786 
787 	if (is_offload(sc)) {
788 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
789 		cxgb_adapter_unofld(sc);
790 	}
791 
792 #ifdef notyet
793 	if (sc->flags & CXGB_OFLD_INIT)
794 		cxgb_offload_deactivate(sc);
795 #endif
796 	free(sc->filters, M_DEVBUF);
797 	t3_sge_free(sc);
798 
799 	cxgb_offload_exit();
800 
801 	if (sc->udbs_res != NULL)
802 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
803 		    sc->udbs_res);
804 
805 	if (sc->regs_res != NULL)
806 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
807 		    sc->regs_res);
808 
809 	MTX_DESTROY(&sc->mdio_lock);
810 	MTX_DESTROY(&sc->sge.reg_lock);
811 	MTX_DESTROY(&sc->elmer_lock);
812 	ADAPTER_LOCK_DEINIT(sc);
813 }
814 
815 /**
816  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
817  *	@sc: the controller softc
818  *
819  *	Determines how many sets of SGE queues to use and initializes them.
820  *	We support multiple queue sets per port if we have MSI-X, otherwise
821  *	just one queue set per port.
822  */
823 static int
824 setup_sge_qsets(adapter_t *sc)
825 {
826 	int i, j, err, irq_idx = 0, qset_idx = 0;
827 	u_int ntxq = SGE_TXQ_PER_SET;
828 
829 	if ((err = t3_sge_alloc(sc)) != 0) {
830 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
831 		return (err);
832 	}
833 
834 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
835 		irq_idx = -1;
836 
837 	for (i = 0; i < (sc)->params.nports; i++) {
838 		struct port_info *pi = &sc->port[i];
839 
840 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
841 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
842 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
843 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
844 			if (err) {
845 				t3_free_sge_resources(sc);
846 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
847 				    err);
848 				return (err);
849 			}
850 		}
851 	}
852 
853 	return (0);
854 }
855 
856 static void
857 cxgb_teardown_interrupts(adapter_t *sc)
858 {
859 	int i;
860 
861 	for (i = 0; i < SGE_QSETS; i++) {
862 		if (sc->msix_intr_tag[i] == NULL) {
863 
864 			/* Should have been setup fully or not at all */
865 			KASSERT(sc->msix_irq_res[i] == NULL &&
866 				sc->msix_irq_rid[i] == 0,
867 				("%s: half-done interrupt (%d).", __func__, i));
868 
869 			continue;
870 		}
871 
872 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
873 				  sc->msix_intr_tag[i]);
874 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
875 				     sc->msix_irq_res[i]);
876 
877 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
878 		sc->msix_irq_rid[i] = 0;
879 	}
880 
881 	if (sc->intr_tag) {
882 		KASSERT(sc->irq_res != NULL,
883 			("%s: half-done interrupt.", __func__));
884 
885 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
886 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
887 				     sc->irq_res);
888 
889 		sc->irq_res = sc->intr_tag = NULL;
890 		sc->irq_rid = 0;
891 	}
892 }
893 
894 static int
895 cxgb_setup_interrupts(adapter_t *sc)
896 {
897 	struct resource *res;
898 	void *tag;
899 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
900 
901 	sc->irq_rid = intr_flag ? 1 : 0;
902 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
903 					     RF_SHAREABLE | RF_ACTIVE);
904 	if (sc->irq_res == NULL) {
905 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
906 			      intr_flag, sc->irq_rid);
907 		err = EINVAL;
908 		sc->irq_rid = 0;
909 	} else {
910 		err = bus_setup_intr(sc->dev, sc->irq_res,
911 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
912 		    sc->cxgb_intr, sc, &sc->intr_tag);
913 
914 		if (err) {
915 			device_printf(sc->dev,
916 				      "Cannot set up interrupt (%x, %u, %d)\n",
917 				      intr_flag, sc->irq_rid, err);
918 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
919 					     sc->irq_res);
920 			sc->irq_res = sc->intr_tag = NULL;
921 			sc->irq_rid = 0;
922 		}
923 	}
924 
925 	/* That's all for INTx or MSI */
926 	if (!(intr_flag & USING_MSIX) || err)
927 		return (err);
928 
929 	for (i = 0; i < sc->msi_count - 1; i++) {
930 		rid = i + 2;
931 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
932 					     RF_SHAREABLE | RF_ACTIVE);
933 		if (res == NULL) {
934 			device_printf(sc->dev, "Cannot allocate interrupt "
935 				      "for message %d\n", rid);
936 			err = EINVAL;
937 			break;
938 		}
939 
940 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
941 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
942 		if (err) {
943 			device_printf(sc->dev, "Cannot set up interrupt "
944 				      "for message %d (%d)\n", rid, err);
945 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
946 			break;
947 		}
948 
949 		sc->msix_irq_rid[i] = rid;
950 		sc->msix_irq_res[i] = res;
951 		sc->msix_intr_tag[i] = tag;
952 	}
953 
954 	if (err)
955 		cxgb_teardown_interrupts(sc);
956 
957 	return (err);
958 }
959 
960 
961 static int
962 cxgb_port_probe(device_t dev)
963 {
964 	struct port_info *p;
965 	char buf[80];
966 	const char *desc;
967 
968 	p = device_get_softc(dev);
969 	desc = p->phy.desc;
970 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
971 	device_set_desc_copy(dev, buf);
972 	return (0);
973 }
974 
975 
976 static int
977 cxgb_makedev(struct port_info *pi)
978 {
979 
980 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
981 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
982 
983 	if (pi->port_cdev == NULL)
984 		return (ENOMEM);
985 
986 	pi->port_cdev->si_drv1 = (void *)pi;
987 
988 	return (0);
989 }
990 
991 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
992     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
993     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
994 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
995 
996 static int
997 cxgb_port_attach(device_t dev)
998 {
999 	struct port_info *p;
1000 	struct ifnet *ifp;
1001 	int err;
1002 	struct adapter *sc;
1003 
1004 	p = device_get_softc(dev);
1005 	sc = p->adapter;
1006 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1007 	    device_get_unit(device_get_parent(dev)), p->port_id);
1008 	PORT_LOCK_INIT(p, p->lockbuf);
1009 
1010 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1011 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1012 
1013 	/* Allocate an ifnet object and set it up */
1014 	ifp = p->ifp = if_alloc(IFT_ETHER);
1015 	if (ifp == NULL) {
1016 		device_printf(dev, "Cannot allocate ifnet\n");
1017 		return (ENOMEM);
1018 	}
1019 
1020 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1021 	ifp->if_init = cxgb_init;
1022 	ifp->if_softc = p;
1023 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1024 	ifp->if_ioctl = cxgb_ioctl;
1025 	ifp->if_start = cxgb_start;
1026 
1027 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1028 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1029 	IFQ_SET_READY(&ifp->if_snd);
1030 
1031 	ifp->if_capabilities = CXGB_CAP;
1032 	ifp->if_capenable = CXGB_CAP_ENABLE;
1033 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1034 
1035 	/*
1036 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1037 	 */
1038 	if (sc->params.nports > 2) {
1039 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1040 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041 		ifp->if_hwassist &= ~CSUM_TSO;
1042 	}
1043 
1044 	ether_ifattach(ifp, p->hw_addr);
1045 	ifp->if_transmit = cxgb_transmit;
1046 	ifp->if_qflush = cxgb_qflush;
1047 
1048 #ifdef DEFAULT_JUMBO
1049 	if (sc->params.nports <= 2)
1050 		ifp->if_mtu = ETHERMTU_JUMBO;
1051 #endif
1052 	if ((err = cxgb_makedev(p)) != 0) {
1053 		printf("makedev failed %d\n", err);
1054 		return (err);
1055 	}
1056 
1057 	/* Create a list of media supported by this port */
1058 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1059 	    cxgb_media_status);
1060 	cxgb_build_medialist(p);
1061 
1062 	t3_sge_init_port(p);
1063 
1064 	return (err);
1065 }
1066 
1067 /*
1068  * cxgb_port_detach() is called via the device_detach methods when
1069  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1070  * removing the device from the view of the kernel, i.e. from all
1071  * interfaces lists etc.  This routine is only called when the driver is
1072  * being unloaded, not when the link goes down.
1073  */
1074 static int
1075 cxgb_port_detach(device_t dev)
1076 {
1077 	struct port_info *p;
1078 	struct adapter *sc;
1079 	int i;
1080 
1081 	p = device_get_softc(dev);
1082 	sc = p->adapter;
1083 
1084 	/* Tell cxgb_ioctl and if_init that the port is going away */
1085 	ADAPTER_LOCK(sc);
1086 	SET_DOOMED(p);
1087 	wakeup(&sc->flags);
1088 	while (IS_BUSY(sc))
1089 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1090 	SET_BUSY(sc);
1091 	ADAPTER_UNLOCK(sc);
1092 
1093 	if (p->port_cdev != NULL)
1094 		destroy_dev(p->port_cdev);
1095 
1096 	cxgb_uninit_synchronized(p);
1097 	ether_ifdetach(p->ifp);
1098 
1099 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1100 		struct sge_qset *qs = &sc->sge.qs[i];
1101 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1102 
1103 		callout_drain(&txq->txq_watchdog);
1104 		callout_drain(&txq->txq_timer);
1105 	}
1106 
1107 	PORT_LOCK_DEINIT(p);
1108 	if_free(p->ifp);
1109 	p->ifp = NULL;
1110 
1111 	ADAPTER_LOCK(sc);
1112 	CLR_BUSY(sc);
1113 	wakeup_one(&sc->flags);
1114 	ADAPTER_UNLOCK(sc);
1115 	return (0);
1116 }
1117 
1118 void
1119 t3_fatal_err(struct adapter *sc)
1120 {
1121 	u_int fw_status[4];
1122 
1123 	if (sc->flags & FULL_INIT_DONE) {
1124 		t3_sge_stop(sc);
1125 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1126 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1127 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1128 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1129 		t3_intr_disable(sc);
1130 	}
1131 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1132 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1133 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1134 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1135 }
1136 
1137 int
1138 t3_os_find_pci_capability(adapter_t *sc, int cap)
1139 {
1140 	device_t dev;
1141 	struct pci_devinfo *dinfo;
1142 	pcicfgregs *cfg;
1143 	uint32_t status;
1144 	uint8_t ptr;
1145 
1146 	dev = sc->dev;
1147 	dinfo = device_get_ivars(dev);
1148 	cfg = &dinfo->cfg;
1149 
1150 	status = pci_read_config(dev, PCIR_STATUS, 2);
1151 	if (!(status & PCIM_STATUS_CAPPRESENT))
1152 		return (0);
1153 
1154 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1155 	case 0:
1156 	case 1:
1157 		ptr = PCIR_CAP_PTR;
1158 		break;
1159 	case 2:
1160 		ptr = PCIR_CAP_PTR_2;
1161 		break;
1162 	default:
1163 		return (0);
1164 		break;
1165 	}
1166 	ptr = pci_read_config(dev, ptr, 1);
1167 
1168 	while (ptr != 0) {
1169 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1170 			return (ptr);
1171 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1172 	}
1173 
1174 	return (0);
1175 }
1176 
1177 int
1178 t3_os_pci_save_state(struct adapter *sc)
1179 {
1180 	device_t dev;
1181 	struct pci_devinfo *dinfo;
1182 
1183 	dev = sc->dev;
1184 	dinfo = device_get_ivars(dev);
1185 
1186 	pci_cfg_save(dev, dinfo, 0);
1187 	return (0);
1188 }
1189 
1190 int
1191 t3_os_pci_restore_state(struct adapter *sc)
1192 {
1193 	device_t dev;
1194 	struct pci_devinfo *dinfo;
1195 
1196 	dev = sc->dev;
1197 	dinfo = device_get_ivars(dev);
1198 
1199 	pci_cfg_restore(dev, dinfo);
1200 	return (0);
1201 }
1202 
1203 /**
1204  *	t3_os_link_changed - handle link status changes
1205  *	@sc: the adapter associated with the link change
1206  *	@port_id: the port index whose link status has changed
1207  *	@link_status: the new status of the link
1208  *	@speed: the new speed setting
1209  *	@duplex: the new duplex setting
1210  *	@fc: the new flow-control setting
1211  *
1212  *	This is the OS-dependent handler for link status changes.  The OS
1213  *	neutral handler takes care of most of the processing for these events,
1214  *	then calls this handler for any OS-specific processing.
1215  */
1216 void
1217 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1218      int duplex, int fc, int mac_was_reset)
1219 {
1220 	struct port_info *pi = &adapter->port[port_id];
1221 	struct ifnet *ifp = pi->ifp;
1222 
1223 	/* no race with detach, so ifp should always be good */
1224 	KASSERT(ifp, ("%s: if detached.", __func__));
1225 
1226 	/* Reapply mac settings if they were lost due to a reset */
1227 	if (mac_was_reset) {
1228 		PORT_LOCK(pi);
1229 		cxgb_update_mac_settings(pi);
1230 		PORT_UNLOCK(pi);
1231 	}
1232 
1233 	if (link_status) {
1234 		ifp->if_baudrate = IF_Mbps(speed);
1235 		if_link_state_change(ifp, LINK_STATE_UP);
1236 	} else
1237 		if_link_state_change(ifp, LINK_STATE_DOWN);
1238 }
1239 
1240 /**
1241  *	t3_os_phymod_changed - handle PHY module changes
1242  *	@phy: the PHY reporting the module change
1243  *	@mod_type: new module type
1244  *
1245  *	This is the OS-dependent handler for PHY module changes.  It is
1246  *	invoked when a PHY module is removed or inserted for any OS-specific
1247  *	processing.
1248  */
1249 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1250 {
1251 	static const char *mod_str[] = {
1252 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1253 	};
1254 	struct port_info *pi = &adap->port[port_id];
1255 	int mod = pi->phy.modtype;
1256 
1257 	if (mod != pi->media.ifm_cur->ifm_data)
1258 		cxgb_build_medialist(pi);
1259 
1260 	if (mod == phy_modtype_none)
1261 		if_printf(pi->ifp, "PHY module unplugged\n");
1262 	else {
1263 		KASSERT(mod < ARRAY_SIZE(mod_str),
1264 			("invalid PHY module type %d", mod));
1265 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1266 	}
1267 }
1268 
1269 void
1270 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1271 {
1272 
1273 	/*
1274 	 * The ifnet might not be allocated before this gets called,
1275 	 * as this is called early on in attach by t3_prep_adapter
1276 	 * save the address off in the port structure
1277 	 */
1278 	if (cxgb_debug)
1279 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1280 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1281 }
1282 
1283 /*
1284  * Programs the XGMAC based on the settings in the ifnet.  These settings
1285  * include MTU, MAC address, mcast addresses, etc.
1286  */
1287 static void
1288 cxgb_update_mac_settings(struct port_info *p)
1289 {
1290 	struct ifnet *ifp = p->ifp;
1291 	struct t3_rx_mode rm;
1292 	struct cmac *mac = &p->mac;
1293 	int mtu, hwtagging;
1294 
1295 	PORT_LOCK_ASSERT_OWNED(p);
1296 
1297 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1298 
1299 	mtu = ifp->if_mtu;
1300 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1301 		mtu += ETHER_VLAN_ENCAP_LEN;
1302 
1303 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1304 
1305 	t3_mac_set_mtu(mac, mtu);
1306 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1307 	t3_mac_set_address(mac, 0, p->hw_addr);
1308 	t3_init_rx_mode(&rm, p);
1309 	t3_mac_set_rx_mode(mac, &rm);
1310 }
1311 
1312 
1313 static int
1314 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1315 			      unsigned long n)
1316 {
1317 	int attempts = 5;
1318 
1319 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1320 		if (!--attempts)
1321 			return (ETIMEDOUT);
1322 		t3_os_sleep(10);
1323 	}
1324 	return 0;
1325 }
1326 
1327 static int
1328 init_tp_parity(struct adapter *adap)
1329 {
1330 	int i;
1331 	struct mbuf *m;
1332 	struct cpl_set_tcb_field *greq;
1333 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1334 
1335 	t3_tp_set_offload_mode(adap, 1);
1336 
1337 	for (i = 0; i < 16; i++) {
1338 		struct cpl_smt_write_req *req;
1339 
1340 		m = m_gethdr(M_WAITOK, MT_DATA);
1341 		req = mtod(m, struct cpl_smt_write_req *);
1342 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1343 		memset(req, 0, sizeof(*req));
1344 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1345 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1346 		req->iff = i;
1347 		t3_mgmt_tx(adap, m);
1348 	}
1349 
1350 	for (i = 0; i < 2048; i++) {
1351 		struct cpl_l2t_write_req *req;
1352 
1353 		m = m_gethdr(M_WAITOK, MT_DATA);
1354 		req = mtod(m, struct cpl_l2t_write_req *);
1355 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1356 		memset(req, 0, sizeof(*req));
1357 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1358 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1359 		req->params = htonl(V_L2T_W_IDX(i));
1360 		t3_mgmt_tx(adap, m);
1361 	}
1362 
1363 	for (i = 0; i < 2048; i++) {
1364 		struct cpl_rte_write_req *req;
1365 
1366 		m = m_gethdr(M_WAITOK, MT_DATA);
1367 		req = mtod(m, struct cpl_rte_write_req *);
1368 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1369 		memset(req, 0, sizeof(*req));
1370 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1371 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1372 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1373 		t3_mgmt_tx(adap, m);
1374 	}
1375 
1376 	m = m_gethdr(M_WAITOK, MT_DATA);
1377 	greq = mtod(m, struct cpl_set_tcb_field *);
1378 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1379 	memset(greq, 0, sizeof(*greq));
1380 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1381 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1382 	greq->mask = htobe64(1);
1383 	t3_mgmt_tx(adap, m);
1384 
1385 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1386 	t3_tp_set_offload_mode(adap, 0);
1387 	return (i);
1388 }
1389 
1390 /**
1391  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1392  *	@adap: the adapter
1393  *
1394  *	Sets up RSS to distribute packets to multiple receive queues.  We
1395  *	configure the RSS CPU lookup table to distribute to the number of HW
1396  *	receive queues, and the response queue lookup table to narrow that
1397  *	down to the response queues actually configured for each port.
1398  *	We always configure the RSS mapping for two ports since the mapping
1399  *	table has plenty of entries.
1400  */
1401 static void
1402 setup_rss(adapter_t *adap)
1403 {
1404 	int i;
1405 	u_int nq[2];
1406 	uint8_t cpus[SGE_QSETS + 1];
1407 	uint16_t rspq_map[RSS_TABLE_SIZE];
1408 
1409 	for (i = 0; i < SGE_QSETS; ++i)
1410 		cpus[i] = i;
1411 	cpus[SGE_QSETS] = 0xff;
1412 
1413 	nq[0] = nq[1] = 0;
1414 	for_each_port(adap, i) {
1415 		const struct port_info *pi = adap2pinfo(adap, i);
1416 
1417 		nq[pi->tx_chan] += pi->nqsets;
1418 	}
1419 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1420 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1421 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1422 	}
1423 
1424 	/* Calculate the reverse RSS map table */
1425 	for (i = 0; i < SGE_QSETS; ++i)
1426 		adap->rrss_map[i] = 0xff;
1427 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1428 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1429 			adap->rrss_map[rspq_map[i]] = i;
1430 
1431 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1432 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1433 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1434 	              cpus, rspq_map);
1435 
1436 }
1437 
1438 /*
1439  * Sends an mbuf to an offload queue driver
1440  * after dealing with any active network taps.
1441  */
1442 static inline int
1443 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1444 {
1445 	int ret;
1446 
1447 	ret = t3_offload_tx(tdev, m);
1448 	return (ret);
1449 }
1450 
1451 static int
1452 write_smt_entry(struct adapter *adapter, int idx)
1453 {
1454 	struct port_info *pi = &adapter->port[idx];
1455 	struct cpl_smt_write_req *req;
1456 	struct mbuf *m;
1457 
1458 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1459 		return (ENOMEM);
1460 
1461 	req = mtod(m, struct cpl_smt_write_req *);
1462 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1463 
1464 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1465 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1466 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1467 	req->iff = idx;
1468 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1469 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1470 
1471 	m_set_priority(m, 1);
1472 
1473 	offload_tx(&adapter->tdev, m);
1474 
1475 	return (0);
1476 }
1477 
1478 static int
1479 init_smt(struct adapter *adapter)
1480 {
1481 	int i;
1482 
1483 	for_each_port(adapter, i)
1484 		write_smt_entry(adapter, i);
1485 	return 0;
1486 }
1487 
1488 static void
1489 init_port_mtus(adapter_t *adapter)
1490 {
1491 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1492 
1493 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1494 }
1495 
1496 static void
1497 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1498 			      int hi, int port)
1499 {
1500 	struct mbuf *m;
1501 	struct mngt_pktsched_wr *req;
1502 
1503 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1504 	if (m) {
1505 		req = mtod(m, struct mngt_pktsched_wr *);
1506 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1507 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1508 		req->sched = sched;
1509 		req->idx = qidx;
1510 		req->min = lo;
1511 		req->max = hi;
1512 		req->binding = port;
1513 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1514 		t3_mgmt_tx(adap, m);
1515 	}
1516 }
1517 
1518 static void
1519 bind_qsets(adapter_t *sc)
1520 {
1521 	int i, j;
1522 
1523 	for (i = 0; i < (sc)->params.nports; ++i) {
1524 		const struct port_info *pi = adap2pinfo(sc, i);
1525 
1526 		for (j = 0; j < pi->nqsets; ++j) {
1527 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1528 					  -1, pi->tx_chan);
1529 
1530 		}
1531 	}
1532 }
1533 
1534 static void
1535 update_tpeeprom(struct adapter *adap)
1536 {
1537 	const struct firmware *tpeeprom;
1538 
1539 	uint32_t version;
1540 	unsigned int major, minor;
1541 	int ret, len;
1542 	char rev, name[32];
1543 
1544 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1545 
1546 	major = G_TP_VERSION_MAJOR(version);
1547 	minor = G_TP_VERSION_MINOR(version);
1548 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1549 		return;
1550 
1551 	rev = t3rev2char(adap);
1552 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1553 
1554 	tpeeprom = firmware_get(name);
1555 	if (tpeeprom == NULL) {
1556 		device_printf(adap->dev,
1557 			      "could not load TP EEPROM: unable to load %s\n",
1558 			      name);
1559 		return;
1560 	}
1561 
1562 	len = tpeeprom->datasize - 4;
1563 
1564 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1565 	if (ret)
1566 		goto release_tpeeprom;
1567 
1568 	if (len != TP_SRAM_LEN) {
1569 		device_printf(adap->dev,
1570 			      "%s length is wrong len=%d expected=%d\n", name,
1571 			      len, TP_SRAM_LEN);
1572 		return;
1573 	}
1574 
1575 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1576 	    TP_SRAM_OFFSET);
1577 
1578 	if (!ret) {
1579 		device_printf(adap->dev,
1580 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1581 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1582 	} else
1583 		device_printf(adap->dev,
1584 			      "Protocol SRAM image update in EEPROM failed\n");
1585 
1586 release_tpeeprom:
1587 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1588 
1589 	return;
1590 }
1591 
1592 static int
1593 update_tpsram(struct adapter *adap)
1594 {
1595 	const struct firmware *tpsram;
1596 	int ret;
1597 	char rev, name[32];
1598 
1599 	rev = t3rev2char(adap);
1600 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1601 
1602 	update_tpeeprom(adap);
1603 
1604 	tpsram = firmware_get(name);
1605 	if (tpsram == NULL){
1606 		device_printf(adap->dev, "could not load TP SRAM\n");
1607 		return (EINVAL);
1608 	} else
1609 		device_printf(adap->dev, "updating TP SRAM\n");
1610 
1611 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1612 	if (ret)
1613 		goto release_tpsram;
1614 
1615 	ret = t3_set_proto_sram(adap, tpsram->data);
1616 	if (ret)
1617 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1618 
1619 release_tpsram:
1620 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1621 
1622 	return ret;
1623 }
1624 
1625 /**
1626  *	cxgb_up - enable the adapter
1627  *	@adap: adapter being enabled
1628  *
1629  *	Called when the first port is enabled, this function performs the
1630  *	actions necessary to make an adapter operational, such as completing
1631  *	the initialization of HW modules, and enabling interrupts.
1632  */
1633 static int
1634 cxgb_up(struct adapter *sc)
1635 {
1636 	int err = 0;
1637 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1638 
1639 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1640 					   __func__, sc->open_device_map));
1641 
1642 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1643 
1644 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1645 
1646 		if ((sc->flags & FW_UPTODATE) == 0)
1647 			if ((err = upgrade_fw(sc)))
1648 				goto out;
1649 
1650 		if ((sc->flags & TPS_UPTODATE) == 0)
1651 			if ((err = update_tpsram(sc)))
1652 				goto out;
1653 
1654 		if (is_offload(sc) && nfilters != 0) {
1655 			sc->params.mc5.nservers = 0;
1656 
1657 			if (nfilters < 0)
1658 				sc->params.mc5.nfilters = mxf;
1659 			else
1660 				sc->params.mc5.nfilters = min(nfilters, mxf);
1661 		}
1662 
1663 		err = t3_init_hw(sc, 0);
1664 		if (err)
1665 			goto out;
1666 
1667 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1668 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1669 
1670 		err = setup_sge_qsets(sc);
1671 		if (err)
1672 			goto out;
1673 
1674 		alloc_filters(sc);
1675 		setup_rss(sc);
1676 
1677 		t3_intr_clear(sc);
1678 		err = cxgb_setup_interrupts(sc);
1679 		if (err)
1680 			goto out;
1681 
1682 		t3_add_configured_sysctls(sc);
1683 		sc->flags |= FULL_INIT_DONE;
1684 	}
1685 
1686 	t3_intr_clear(sc);
1687 	t3_sge_start(sc);
1688 	t3_intr_enable(sc);
1689 
1690 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1691 	    is_offload(sc) && init_tp_parity(sc) == 0)
1692 		sc->flags |= TP_PARITY_INIT;
1693 
1694 	if (sc->flags & TP_PARITY_INIT) {
1695 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1696 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1697 	}
1698 
1699 	if (!(sc->flags & QUEUES_BOUND)) {
1700 		bind_qsets(sc);
1701 		setup_hw_filters(sc);
1702 		sc->flags |= QUEUES_BOUND;
1703 	}
1704 
1705 	t3_sge_reset_adapter(sc);
1706 out:
1707 	return (err);
1708 }
1709 
1710 /*
1711  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1712  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1713  * during controller_detach, not here.
1714  */
1715 static void
1716 cxgb_down(struct adapter *sc)
1717 {
1718 	t3_sge_stop(sc);
1719 	t3_intr_disable(sc);
1720 }
1721 
1722 static int
1723 offload_open(struct port_info *pi)
1724 {
1725 	struct adapter *sc = pi->adapter;
1726 	struct t3cdev *tdev = &sc->tdev;
1727 
1728 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1729 
1730 	t3_tp_set_offload_mode(sc, 1);
1731 	tdev->lldev = pi->ifp;
1732 	init_port_mtus(sc);
1733 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1734 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1735 	init_smt(sc);
1736 	cxgb_add_clients(tdev);
1737 
1738 	return (0);
1739 }
1740 
1741 static int
1742 offload_close(struct t3cdev *tdev)
1743 {
1744 	struct adapter *adapter = tdev2adap(tdev);
1745 
1746 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1747 		return (0);
1748 
1749 	/* Call back all registered clients */
1750 	cxgb_remove_clients(tdev);
1751 
1752 	tdev->lldev = NULL;
1753 	cxgb_set_dummy_ops(tdev);
1754 	t3_tp_set_offload_mode(adapter, 0);
1755 
1756 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1757 
1758 	return (0);
1759 }
1760 
1761 /*
1762  * if_init for cxgb ports.
1763  */
1764 static void
1765 cxgb_init(void *arg)
1766 {
1767 	struct port_info *p = arg;
1768 	struct adapter *sc = p->adapter;
1769 
1770 	ADAPTER_LOCK(sc);
1771 	cxgb_init_locked(p); /* releases adapter lock */
1772 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1773 }
1774 
1775 static int
1776 cxgb_init_locked(struct port_info *p)
1777 {
1778 	struct adapter *sc = p->adapter;
1779 	struct ifnet *ifp = p->ifp;
1780 	struct cmac *mac = &p->mac;
1781 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1782 
1783 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1784 
1785 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1786 		gave_up_lock = 1;
1787 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1788 			rc = EINTR;
1789 			goto done;
1790 		}
1791 	}
1792 	if (IS_DOOMED(p)) {
1793 		rc = ENXIO;
1794 		goto done;
1795 	}
1796 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1797 
1798 	/*
1799 	 * The code that runs during one-time adapter initialization can sleep
1800 	 * so it's important not to hold any locks across it.
1801 	 */
1802 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1803 
1804 	if (may_sleep) {
1805 		SET_BUSY(sc);
1806 		gave_up_lock = 1;
1807 		ADAPTER_UNLOCK(sc);
1808 	}
1809 
1810 	if (sc->open_device_map == 0) {
1811 		if ((rc = cxgb_up(sc)) != 0)
1812 			goto done;
1813 
1814 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1815 			log(LOG_WARNING,
1816 			    "Could not initialize offload capabilities\n");
1817 	}
1818 
1819 	PORT_LOCK(p);
1820 	if (isset(&sc->open_device_map, p->port_id) &&
1821 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1822 		PORT_UNLOCK(p);
1823 		goto done;
1824 	}
1825 	t3_port_intr_enable(sc, p->port_id);
1826 	if (!mac->multiport)
1827 		t3_mac_init(mac);
1828 	cxgb_update_mac_settings(p);
1829 	t3_link_start(&p->phy, mac, &p->link_config);
1830 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1831 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1832 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1833 	PORT_UNLOCK(p);
1834 
1835 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1836 		struct sge_qset *qs = &sc->sge.qs[i];
1837 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1838 
1839 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1840 				 txq->txq_watchdog.c_cpu);
1841 	}
1842 
1843 	/* all ok */
1844 	setbit(&sc->open_device_map, p->port_id);
1845 	callout_reset(&p->link_check_ch,
1846 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1847 	    link_check_callout, p);
1848 
1849 done:
1850 	if (may_sleep) {
1851 		ADAPTER_LOCK(sc);
1852 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1853 		CLR_BUSY(sc);
1854 	}
1855 	if (gave_up_lock)
1856 		wakeup_one(&sc->flags);
1857 	ADAPTER_UNLOCK(sc);
1858 	return (rc);
1859 }
1860 
1861 static int
1862 cxgb_uninit_locked(struct port_info *p)
1863 {
1864 	struct adapter *sc = p->adapter;
1865 	int rc;
1866 
1867 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1868 
1869 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1870 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1871 			rc = EINTR;
1872 			goto done;
1873 		}
1874 	}
1875 	if (IS_DOOMED(p)) {
1876 		rc = ENXIO;
1877 		goto done;
1878 	}
1879 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1880 	SET_BUSY(sc);
1881 	ADAPTER_UNLOCK(sc);
1882 
1883 	rc = cxgb_uninit_synchronized(p);
1884 
1885 	ADAPTER_LOCK(sc);
1886 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1887 	CLR_BUSY(sc);
1888 	wakeup_one(&sc->flags);
1889 done:
1890 	ADAPTER_UNLOCK(sc);
1891 	return (rc);
1892 }
1893 
1894 /*
1895  * Called on "ifconfig down", and from port_detach
1896  */
1897 static int
1898 cxgb_uninit_synchronized(struct port_info *pi)
1899 {
1900 	struct adapter *sc = pi->adapter;
1901 	struct ifnet *ifp = pi->ifp;
1902 
1903 	/*
1904 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1905 	 */
1906 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1907 
1908 	/*
1909 	 * Clear this port's bit from the open device map, and then drain all
1910 	 * the tasks that can access/manipulate this port's port_info or ifp.
1911 	 * We disable this port's interrupts here and so the the slow/ext
1912 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1913 	 * be enqueued every second but the runs after this drain will not see
1914 	 * this port in the open device map.
1915 	 *
1916 	 * A well behaved task must take open_device_map into account and ignore
1917 	 * ports that are not open.
1918 	 */
1919 	clrbit(&sc->open_device_map, pi->port_id);
1920 	t3_port_intr_disable(sc, pi->port_id);
1921 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1922 	taskqueue_drain(sc->tq, &sc->tick_task);
1923 
1924 	callout_drain(&pi->link_check_ch);
1925 	taskqueue_drain(sc->tq, &pi->link_check_task);
1926 
1927 	PORT_LOCK(pi);
1928 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1929 
1930 	/* disable pause frames */
1931 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1932 
1933 	/* Reset RX FIFO HWM */
1934 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1935 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1936 
1937 	DELAY(100 * 1000);
1938 
1939 	/* Wait for TXFIFO empty */
1940 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1941 			F_TXFIFO_EMPTY, 1, 20, 5);
1942 
1943 	DELAY(100 * 1000);
1944 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1945 
1946 
1947 	pi->phy.ops->power_down(&pi->phy, 1);
1948 
1949 	PORT_UNLOCK(pi);
1950 
1951 	pi->link_config.link_ok = 0;
1952 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1953 
1954 	if ((sc->open_device_map & PORT_MASK) == 0)
1955 		offload_close(&sc->tdev);
1956 
1957 	if (sc->open_device_map == 0)
1958 		cxgb_down(pi->adapter);
1959 
1960 	return (0);
1961 }
1962 
1963 /*
1964  * Mark lro enabled or disabled in all qsets for this port
1965  */
1966 static int
1967 cxgb_set_lro(struct port_info *p, int enabled)
1968 {
1969 	int i;
1970 	struct adapter *adp = p->adapter;
1971 	struct sge_qset *q;
1972 
1973 	for (i = 0; i < p->nqsets; i++) {
1974 		q = &adp->sge.qs[p->first_qset + i];
1975 		q->lro.enabled = (enabled != 0);
1976 	}
1977 	return (0);
1978 }
1979 
1980 static int
1981 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1982 {
1983 	struct port_info *p = ifp->if_softc;
1984 	struct adapter *sc = p->adapter;
1985 	struct ifreq *ifr = (struct ifreq *)data;
1986 	int flags, error = 0, mtu;
1987 	uint32_t mask;
1988 
1989 	switch (command) {
1990 	case SIOCSIFMTU:
1991 		ADAPTER_LOCK(sc);
1992 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1993 		if (error) {
1994 fail:
1995 			ADAPTER_UNLOCK(sc);
1996 			return (error);
1997 		}
1998 
1999 		mtu = ifr->ifr_mtu;
2000 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2001 			error = EINVAL;
2002 		} else {
2003 			ifp->if_mtu = mtu;
2004 			PORT_LOCK(p);
2005 			cxgb_update_mac_settings(p);
2006 			PORT_UNLOCK(p);
2007 		}
2008 		ADAPTER_UNLOCK(sc);
2009 		break;
2010 	case SIOCSIFFLAGS:
2011 		ADAPTER_LOCK(sc);
2012 		if (IS_DOOMED(p)) {
2013 			error = ENXIO;
2014 			goto fail;
2015 		}
2016 		if (ifp->if_flags & IFF_UP) {
2017 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2018 				flags = p->if_flags;
2019 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2020 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2021 					if (IS_BUSY(sc)) {
2022 						error = EBUSY;
2023 						goto fail;
2024 					}
2025 					PORT_LOCK(p);
2026 					cxgb_update_mac_settings(p);
2027 					PORT_UNLOCK(p);
2028 				}
2029 				ADAPTER_UNLOCK(sc);
2030 			} else
2031 				error = cxgb_init_locked(p);
2032 			p->if_flags = ifp->if_flags;
2033 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2034 			error = cxgb_uninit_locked(p);
2035 		else
2036 			ADAPTER_UNLOCK(sc);
2037 
2038 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2039 		break;
2040 	case SIOCADDMULTI:
2041 	case SIOCDELMULTI:
2042 		ADAPTER_LOCK(sc);
2043 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2044 		if (error)
2045 			goto fail;
2046 
2047 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2048 			PORT_LOCK(p);
2049 			cxgb_update_mac_settings(p);
2050 			PORT_UNLOCK(p);
2051 		}
2052 		ADAPTER_UNLOCK(sc);
2053 
2054 		break;
2055 	case SIOCSIFCAP:
2056 		ADAPTER_LOCK(sc);
2057 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2058 		if (error)
2059 			goto fail;
2060 
2061 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2062 		if (mask & IFCAP_TXCSUM) {
2063 			ifp->if_capenable ^= IFCAP_TXCSUM;
2064 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2065 
2066 			if (IFCAP_TSO & ifp->if_capenable &&
2067 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2068 				ifp->if_capenable &= ~IFCAP_TSO;
2069 				ifp->if_hwassist &= ~CSUM_TSO;
2070 				if_printf(ifp,
2071 				    "tso disabled due to -txcsum.\n");
2072 			}
2073 		}
2074 		if (mask & IFCAP_RXCSUM)
2075 			ifp->if_capenable ^= IFCAP_RXCSUM;
2076 		if (mask & IFCAP_TSO4) {
2077 			ifp->if_capenable ^= IFCAP_TSO4;
2078 
2079 			if (IFCAP_TSO & ifp->if_capenable) {
2080 				if (IFCAP_TXCSUM & ifp->if_capenable)
2081 					ifp->if_hwassist |= CSUM_TSO;
2082 				else {
2083 					ifp->if_capenable &= ~IFCAP_TSO;
2084 					ifp->if_hwassist &= ~CSUM_TSO;
2085 					if_printf(ifp,
2086 					    "enable txcsum first.\n");
2087 					error = EAGAIN;
2088 				}
2089 			} else
2090 				ifp->if_hwassist &= ~CSUM_TSO;
2091 		}
2092 		if (mask & IFCAP_LRO) {
2093 			ifp->if_capenable ^= IFCAP_LRO;
2094 
2095 			/* Safe to do this even if cxgb_up not called yet */
2096 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2097 		}
2098 		if (mask & IFCAP_VLAN_HWTAGGING) {
2099 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2100 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2101 				PORT_LOCK(p);
2102 				cxgb_update_mac_settings(p);
2103 				PORT_UNLOCK(p);
2104 			}
2105 		}
2106 		if (mask & IFCAP_VLAN_MTU) {
2107 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2108 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2109 				PORT_LOCK(p);
2110 				cxgb_update_mac_settings(p);
2111 				PORT_UNLOCK(p);
2112 			}
2113 		}
2114 		if (mask & IFCAP_VLAN_HWTSO)
2115 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2116 		if (mask & IFCAP_VLAN_HWCSUM)
2117 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2118 
2119 #ifdef VLAN_CAPABILITIES
2120 		VLAN_CAPABILITIES(ifp);
2121 #endif
2122 		ADAPTER_UNLOCK(sc);
2123 		break;
2124 	case SIOCSIFMEDIA:
2125 	case SIOCGIFMEDIA:
2126 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2127 		break;
2128 	default:
2129 		error = ether_ioctl(ifp, command, data);
2130 	}
2131 
2132 	return (error);
2133 }
2134 
2135 static int
2136 cxgb_media_change(struct ifnet *ifp)
2137 {
2138 	return (EOPNOTSUPP);
2139 }
2140 
2141 /*
2142  * Translates phy->modtype to the correct Ethernet media subtype.
2143  */
2144 static int
2145 cxgb_ifm_type(int mod)
2146 {
2147 	switch (mod) {
2148 	case phy_modtype_sr:
2149 		return (IFM_10G_SR);
2150 	case phy_modtype_lr:
2151 		return (IFM_10G_LR);
2152 	case phy_modtype_lrm:
2153 		return (IFM_10G_LRM);
2154 	case phy_modtype_twinax:
2155 		return (IFM_10G_TWINAX);
2156 	case phy_modtype_twinax_long:
2157 		return (IFM_10G_TWINAX_LONG);
2158 	case phy_modtype_none:
2159 		return (IFM_NONE);
2160 	case phy_modtype_unknown:
2161 		return (IFM_UNKNOWN);
2162 	}
2163 
2164 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2165 	return (IFM_UNKNOWN);
2166 }
2167 
2168 /*
2169  * Rebuilds the ifmedia list for this port, and sets the current media.
2170  */
2171 static void
2172 cxgb_build_medialist(struct port_info *p)
2173 {
2174 	struct cphy *phy = &p->phy;
2175 	struct ifmedia *media = &p->media;
2176 	int mod = phy->modtype;
2177 	int m = IFM_ETHER | IFM_FDX;
2178 
2179 	PORT_LOCK(p);
2180 
2181 	ifmedia_removeall(media);
2182 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2183 		/* Copper (RJ45) */
2184 
2185 		if (phy->caps & SUPPORTED_10000baseT_Full)
2186 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2187 
2188 		if (phy->caps & SUPPORTED_1000baseT_Full)
2189 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2190 
2191 		if (phy->caps & SUPPORTED_100baseT_Full)
2192 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2193 
2194 		if (phy->caps & SUPPORTED_10baseT_Full)
2195 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2196 
2197 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2198 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2199 
2200 	} else if (phy->caps & SUPPORTED_TP) {
2201 		/* Copper (CX4) */
2202 
2203 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2204 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2205 
2206 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2207 		ifmedia_set(media, m | IFM_10G_CX4);
2208 
2209 	} else if (phy->caps & SUPPORTED_FIBRE &&
2210 		   phy->caps & SUPPORTED_10000baseT_Full) {
2211 		/* 10G optical (but includes SFP+ twinax) */
2212 
2213 		m |= cxgb_ifm_type(mod);
2214 		if (IFM_SUBTYPE(m) == IFM_NONE)
2215 			m &= ~IFM_FDX;
2216 
2217 		ifmedia_add(media, m, mod, NULL);
2218 		ifmedia_set(media, m);
2219 
2220 	} else if (phy->caps & SUPPORTED_FIBRE &&
2221 		   phy->caps & SUPPORTED_1000baseT_Full) {
2222 		/* 1G optical */
2223 
2224 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2225 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2226 		ifmedia_set(media, m | IFM_1000_SX);
2227 
2228 	} else {
2229 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2230 			    phy->caps));
2231 	}
2232 
2233 	PORT_UNLOCK(p);
2234 }
2235 
2236 static void
2237 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2238 {
2239 	struct port_info *p = ifp->if_softc;
2240 	struct ifmedia_entry *cur = p->media.ifm_cur;
2241 	int speed = p->link_config.speed;
2242 
2243 	if (cur->ifm_data != p->phy.modtype) {
2244 		cxgb_build_medialist(p);
2245 		cur = p->media.ifm_cur;
2246 	}
2247 
2248 	ifmr->ifm_status = IFM_AVALID;
2249 	if (!p->link_config.link_ok)
2250 		return;
2251 
2252 	ifmr->ifm_status |= IFM_ACTIVE;
2253 
2254 	/*
2255 	 * active and current will differ iff current media is autoselect.  That
2256 	 * can happen only for copper RJ45.
2257 	 */
2258 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2259 		return;
2260 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2261 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2262 
2263 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2264 	if (speed == SPEED_10000)
2265 		ifmr->ifm_active |= IFM_10G_T;
2266 	else if (speed == SPEED_1000)
2267 		ifmr->ifm_active |= IFM_1000_T;
2268 	else if (speed == SPEED_100)
2269 		ifmr->ifm_active |= IFM_100_TX;
2270 	else if (speed == SPEED_10)
2271 		ifmr->ifm_active |= IFM_10_T;
2272 	else
2273 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2274 			    speed));
2275 }
2276 
2277 static void
2278 cxgb_async_intr(void *data)
2279 {
2280 	adapter_t *sc = data;
2281 
2282 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2283 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2284 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2285 }
2286 
2287 static void
2288 link_check_callout(void *arg)
2289 {
2290 	struct port_info *pi = arg;
2291 	struct adapter *sc = pi->adapter;
2292 
2293 	if (!isset(&sc->open_device_map, pi->port_id))
2294 		return;
2295 
2296 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2297 }
2298 
2299 static void
2300 check_link_status(void *arg, int pending)
2301 {
2302 	struct port_info *pi = arg;
2303 	struct adapter *sc = pi->adapter;
2304 
2305 	if (!isset(&sc->open_device_map, pi->port_id))
2306 		return;
2307 
2308 	t3_link_changed(sc, pi->port_id);
2309 
2310 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2311 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2312 }
2313 
2314 void
2315 t3_os_link_intr(struct port_info *pi)
2316 {
2317 	/*
2318 	 * Schedule a link check in the near future.  If the link is flapping
2319 	 * rapidly we'll keep resetting the callout and delaying the check until
2320 	 * things stabilize a bit.
2321 	 */
2322 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2323 }
2324 
2325 static void
2326 check_t3b2_mac(struct adapter *sc)
2327 {
2328 	int i;
2329 
2330 	if (sc->flags & CXGB_SHUTDOWN)
2331 		return;
2332 
2333 	for_each_port(sc, i) {
2334 		struct port_info *p = &sc->port[i];
2335 		int status;
2336 #ifdef INVARIANTS
2337 		struct ifnet *ifp = p->ifp;
2338 #endif
2339 
2340 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2341 		    !p->link_config.link_ok)
2342 			continue;
2343 
2344 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2345 			("%s: state mismatch (drv_flags %x, device_map %x)",
2346 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2347 
2348 		PORT_LOCK(p);
2349 		status = t3b2_mac_watchdog_task(&p->mac);
2350 		if (status == 1)
2351 			p->mac.stats.num_toggled++;
2352 		else if (status == 2) {
2353 			struct cmac *mac = &p->mac;
2354 
2355 			cxgb_update_mac_settings(p);
2356 			t3_link_start(&p->phy, mac, &p->link_config);
2357 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2358 			t3_port_intr_enable(sc, p->port_id);
2359 			p->mac.stats.num_resets++;
2360 		}
2361 		PORT_UNLOCK(p);
2362 	}
2363 }
2364 
2365 static void
2366 cxgb_tick(void *arg)
2367 {
2368 	adapter_t *sc = (adapter_t *)arg;
2369 
2370 	if (sc->flags & CXGB_SHUTDOWN)
2371 		return;
2372 
2373 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2374 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2375 }
2376 
2377 static void
2378 cxgb_tick_handler(void *arg, int count)
2379 {
2380 	adapter_t *sc = (adapter_t *)arg;
2381 	const struct adapter_params *p = &sc->params;
2382 	int i;
2383 	uint32_t cause, reset;
2384 
2385 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2386 		return;
2387 
2388 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2389 		check_t3b2_mac(sc);
2390 
2391 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2392 	if (cause) {
2393 		struct sge_qset *qs = &sc->sge.qs[0];
2394 		uint32_t mask, v;
2395 
2396 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2397 
2398 		mask = 1;
2399 		for (i = 0; i < SGE_QSETS; i++) {
2400 			if (v & mask)
2401 				qs[i].rspq.starved++;
2402 			mask <<= 1;
2403 		}
2404 
2405 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2406 
2407 		for (i = 0; i < SGE_QSETS * 2; i++) {
2408 			if (v & mask) {
2409 				qs[i / 2].fl[i % 2].empty++;
2410 			}
2411 			mask <<= 1;
2412 		}
2413 
2414 		/* clear */
2415 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2416 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2417 	}
2418 
2419 	for (i = 0; i < sc->params.nports; i++) {
2420 		struct port_info *pi = &sc->port[i];
2421 		struct ifnet *ifp = pi->ifp;
2422 		struct cmac *mac = &pi->mac;
2423 		struct mac_stats *mstats = &mac->stats;
2424 		int drops, j;
2425 
2426 		if (!isset(&sc->open_device_map, pi->port_id))
2427 			continue;
2428 
2429 		PORT_LOCK(pi);
2430 		t3_mac_update_stats(mac);
2431 		PORT_UNLOCK(pi);
2432 
2433 		ifp->if_opackets = mstats->tx_frames;
2434 		ifp->if_ipackets = mstats->rx_frames;
2435 		ifp->if_obytes = mstats->tx_octets;
2436 		ifp->if_ibytes = mstats->rx_octets;
2437 		ifp->if_omcasts = mstats->tx_mcast_frames;
2438 		ifp->if_imcasts = mstats->rx_mcast_frames;
2439 		ifp->if_collisions = mstats->tx_total_collisions;
2440 		ifp->if_iqdrops = mstats->rx_cong_drops;
2441 
2442 		drops = 0;
2443 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2444 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2445 		ifp->if_snd.ifq_drops = drops;
2446 
2447 		ifp->if_oerrors =
2448 		    mstats->tx_excess_collisions +
2449 		    mstats->tx_underrun +
2450 		    mstats->tx_len_errs +
2451 		    mstats->tx_mac_internal_errs +
2452 		    mstats->tx_excess_deferral +
2453 		    mstats->tx_fcs_errs;
2454 		ifp->if_ierrors =
2455 		    mstats->rx_jabber +
2456 		    mstats->rx_data_errs +
2457 		    mstats->rx_sequence_errs +
2458 		    mstats->rx_runt +
2459 		    mstats->rx_too_long +
2460 		    mstats->rx_mac_internal_errs +
2461 		    mstats->rx_short +
2462 		    mstats->rx_fcs_errs;
2463 
2464 		if (mac->multiport)
2465 			continue;
2466 
2467 		/* Count rx fifo overflows, once per second */
2468 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2469 		reset = 0;
2470 		if (cause & F_RXFIFO_OVERFLOW) {
2471 			mac->stats.rx_fifo_ovfl++;
2472 			reset |= F_RXFIFO_OVERFLOW;
2473 		}
2474 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2475 	}
2476 }
2477 
2478 static void
2479 touch_bars(device_t dev)
2480 {
2481 	/*
2482 	 * Don't enable yet
2483 	 */
2484 #if !defined(__LP64__) && 0
2485 	u32 v;
2486 
2487 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2488 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2489 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2490 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2491 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2492 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2493 #endif
2494 }
2495 
2496 static int
2497 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2498 {
2499 	uint8_t *buf;
2500 	int err = 0;
2501 	u32 aligned_offset, aligned_len, *p;
2502 	struct adapter *adapter = pi->adapter;
2503 
2504 
2505 	aligned_offset = offset & ~3;
2506 	aligned_len = (len + (offset & 3) + 3) & ~3;
2507 
2508 	if (aligned_offset != offset || aligned_len != len) {
2509 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2510 		if (!buf)
2511 			return (ENOMEM);
2512 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2513 		if (!err && aligned_len > 4)
2514 			err = t3_seeprom_read(adapter,
2515 					      aligned_offset + aligned_len - 4,
2516 					      (u32 *)&buf[aligned_len - 4]);
2517 		if (err)
2518 			goto out;
2519 		memcpy(buf + (offset & 3), data, len);
2520 	} else
2521 		buf = (uint8_t *)(uintptr_t)data;
2522 
2523 	err = t3_seeprom_wp(adapter, 0);
2524 	if (err)
2525 		goto out;
2526 
2527 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2528 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2529 		aligned_offset += 4;
2530 	}
2531 
2532 	if (!err)
2533 		err = t3_seeprom_wp(adapter, 1);
2534 out:
2535 	if (buf != data)
2536 		free(buf, M_DEVBUF);
2537 	return err;
2538 }
2539 
2540 
2541 static int
2542 in_range(int val, int lo, int hi)
2543 {
2544 	return val < 0 || (val <= hi && val >= lo);
2545 }
2546 
2547 static int
2548 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2549 {
2550        return (0);
2551 }
2552 
2553 static int
2554 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2555 {
2556        return (0);
2557 }
2558 
2559 static int
2560 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2561     int fflag, struct thread *td)
2562 {
2563 	int mmd, error = 0;
2564 	struct port_info *pi = dev->si_drv1;
2565 	adapter_t *sc = pi->adapter;
2566 
2567 #ifdef PRIV_SUPPORTED
2568 	if (priv_check(td, PRIV_DRIVER)) {
2569 		if (cxgb_debug)
2570 			printf("user does not have access to privileged ioctls\n");
2571 		return (EPERM);
2572 	}
2573 #else
2574 	if (suser(td)) {
2575 		if (cxgb_debug)
2576 			printf("user does not have access to privileged ioctls\n");
2577 		return (EPERM);
2578 	}
2579 #endif
2580 
2581 	switch (cmd) {
2582 	case CHELSIO_GET_MIIREG: {
2583 		uint32_t val;
2584 		struct cphy *phy = &pi->phy;
2585 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2586 
2587 		if (!phy->mdio_read)
2588 			return (EOPNOTSUPP);
2589 		if (is_10G(sc)) {
2590 			mmd = mid->phy_id >> 8;
2591 			if (!mmd)
2592 				mmd = MDIO_DEV_PCS;
2593 			else if (mmd > MDIO_DEV_VEND2)
2594 				return (EINVAL);
2595 
2596 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2597 					     mid->reg_num, &val);
2598 		} else
2599 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2600 					     mid->reg_num & 0x1f, &val);
2601 		if (error == 0)
2602 			mid->val_out = val;
2603 		break;
2604 	}
2605 	case CHELSIO_SET_MIIREG: {
2606 		struct cphy *phy = &pi->phy;
2607 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2608 
2609 		if (!phy->mdio_write)
2610 			return (EOPNOTSUPP);
2611 		if (is_10G(sc)) {
2612 			mmd = mid->phy_id >> 8;
2613 			if (!mmd)
2614 				mmd = MDIO_DEV_PCS;
2615 			else if (mmd > MDIO_DEV_VEND2)
2616 				return (EINVAL);
2617 
2618 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2619 					      mmd, mid->reg_num, mid->val_in);
2620 		} else
2621 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2622 					      mid->reg_num & 0x1f,
2623 					      mid->val_in);
2624 		break;
2625 	}
2626 	case CHELSIO_SETREG: {
2627 		struct ch_reg *edata = (struct ch_reg *)data;
2628 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2629 			return (EFAULT);
2630 		t3_write_reg(sc, edata->addr, edata->val);
2631 		break;
2632 	}
2633 	case CHELSIO_GETREG: {
2634 		struct ch_reg *edata = (struct ch_reg *)data;
2635 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2636 			return (EFAULT);
2637 		edata->val = t3_read_reg(sc, edata->addr);
2638 		break;
2639 	}
2640 	case CHELSIO_GET_SGE_CONTEXT: {
2641 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2642 		mtx_lock_spin(&sc->sge.reg_lock);
2643 		switch (ecntxt->cntxt_type) {
2644 		case CNTXT_TYPE_EGRESS:
2645 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2646 			    ecntxt->data);
2647 			break;
2648 		case CNTXT_TYPE_FL:
2649 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2650 			    ecntxt->data);
2651 			break;
2652 		case CNTXT_TYPE_RSP:
2653 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2654 			    ecntxt->data);
2655 			break;
2656 		case CNTXT_TYPE_CQ:
2657 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2658 			    ecntxt->data);
2659 			break;
2660 		default:
2661 			error = EINVAL;
2662 			break;
2663 		}
2664 		mtx_unlock_spin(&sc->sge.reg_lock);
2665 		break;
2666 	}
2667 	case CHELSIO_GET_SGE_DESC: {
2668 		struct ch_desc *edesc = (struct ch_desc *)data;
2669 		int ret;
2670 		if (edesc->queue_num >= SGE_QSETS * 6)
2671 			return (EINVAL);
2672 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2673 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2674 		if (ret < 0)
2675 			return (EINVAL);
2676 		edesc->size = ret;
2677 		break;
2678 	}
2679 	case CHELSIO_GET_QSET_PARAMS: {
2680 		struct qset_params *q;
2681 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2682 		int q1 = pi->first_qset;
2683 		int nqsets = pi->nqsets;
2684 		int i;
2685 
2686 		if (t->qset_idx >= nqsets)
2687 			return EINVAL;
2688 
2689 		i = q1 + t->qset_idx;
2690 		q = &sc->params.sge.qset[i];
2691 		t->rspq_size   = q->rspq_size;
2692 		t->txq_size[0] = q->txq_size[0];
2693 		t->txq_size[1] = q->txq_size[1];
2694 		t->txq_size[2] = q->txq_size[2];
2695 		t->fl_size[0]  = q->fl_size;
2696 		t->fl_size[1]  = q->jumbo_size;
2697 		t->polling     = q->polling;
2698 		t->lro         = q->lro;
2699 		t->intr_lat    = q->coalesce_usecs;
2700 		t->cong_thres  = q->cong_thres;
2701 		t->qnum        = i;
2702 
2703 		if ((sc->flags & FULL_INIT_DONE) == 0)
2704 			t->vector = 0;
2705 		else if (sc->flags & USING_MSIX)
2706 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2707 		else
2708 			t->vector = rman_get_start(sc->irq_res);
2709 
2710 		break;
2711 	}
2712 	case CHELSIO_GET_QSET_NUM: {
2713 		struct ch_reg *edata = (struct ch_reg *)data;
2714 		edata->val = pi->nqsets;
2715 		break;
2716 	}
2717 	case CHELSIO_LOAD_FW: {
2718 		uint8_t *fw_data;
2719 		uint32_t vers;
2720 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2721 
2722 		/*
2723 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2724 		 *
2725 		 * FW_UPTODATE is also set so the rest of the initialization
2726 		 * will not overwrite what was loaded here.  This gives you the
2727 		 * flexibility to load any firmware (and maybe shoot yourself in
2728 		 * the foot).
2729 		 */
2730 
2731 		ADAPTER_LOCK(sc);
2732 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2733 			ADAPTER_UNLOCK(sc);
2734 			return (EBUSY);
2735 		}
2736 
2737 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2738 		if (!fw_data)
2739 			error = ENOMEM;
2740 		else
2741 			error = copyin(t->buf, fw_data, t->len);
2742 
2743 		if (!error)
2744 			error = -t3_load_fw(sc, fw_data, t->len);
2745 
2746 		if (t3_get_fw_version(sc, &vers) == 0) {
2747 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2748 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2749 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2750 		}
2751 
2752 		if (!error)
2753 			sc->flags |= FW_UPTODATE;
2754 
2755 		free(fw_data, M_DEVBUF);
2756 		ADAPTER_UNLOCK(sc);
2757 		break;
2758 	}
2759 	case CHELSIO_LOAD_BOOT: {
2760 		uint8_t *boot_data;
2761 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2762 
2763 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2764 		if (!boot_data)
2765 			return ENOMEM;
2766 
2767 		error = copyin(t->buf, boot_data, t->len);
2768 		if (!error)
2769 			error = -t3_load_boot(sc, boot_data, t->len);
2770 
2771 		free(boot_data, M_DEVBUF);
2772 		break;
2773 	}
2774 	case CHELSIO_GET_PM: {
2775 		struct ch_pm *m = (struct ch_pm *)data;
2776 		struct tp_params *p = &sc->params.tp;
2777 
2778 		if (!is_offload(sc))
2779 			return (EOPNOTSUPP);
2780 
2781 		m->tx_pg_sz = p->tx_pg_size;
2782 		m->tx_num_pg = p->tx_num_pgs;
2783 		m->rx_pg_sz  = p->rx_pg_size;
2784 		m->rx_num_pg = p->rx_num_pgs;
2785 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2786 
2787 		break;
2788 	}
2789 	case CHELSIO_SET_PM: {
2790 		struct ch_pm *m = (struct ch_pm *)data;
2791 		struct tp_params *p = &sc->params.tp;
2792 
2793 		if (!is_offload(sc))
2794 			return (EOPNOTSUPP);
2795 		if (sc->flags & FULL_INIT_DONE)
2796 			return (EBUSY);
2797 
2798 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2799 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2800 			return (EINVAL);	/* not power of 2 */
2801 		if (!(m->rx_pg_sz & 0x14000))
2802 			return (EINVAL);	/* not 16KB or 64KB */
2803 		if (!(m->tx_pg_sz & 0x1554000))
2804 			return (EINVAL);
2805 		if (m->tx_num_pg == -1)
2806 			m->tx_num_pg = p->tx_num_pgs;
2807 		if (m->rx_num_pg == -1)
2808 			m->rx_num_pg = p->rx_num_pgs;
2809 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2810 			return (EINVAL);
2811 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2812 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2813 			return (EINVAL);
2814 
2815 		p->rx_pg_size = m->rx_pg_sz;
2816 		p->tx_pg_size = m->tx_pg_sz;
2817 		p->rx_num_pgs = m->rx_num_pg;
2818 		p->tx_num_pgs = m->tx_num_pg;
2819 		break;
2820 	}
2821 	case CHELSIO_SETMTUTAB: {
2822 		struct ch_mtus *m = (struct ch_mtus *)data;
2823 		int i;
2824 
2825 		if (!is_offload(sc))
2826 			return (EOPNOTSUPP);
2827 		if (offload_running(sc))
2828 			return (EBUSY);
2829 		if (m->nmtus != NMTUS)
2830 			return (EINVAL);
2831 		if (m->mtus[0] < 81)         /* accommodate SACK */
2832 			return (EINVAL);
2833 
2834 		/*
2835 		 * MTUs must be in ascending order
2836 		 */
2837 		for (i = 1; i < NMTUS; ++i)
2838 			if (m->mtus[i] < m->mtus[i - 1])
2839 				return (EINVAL);
2840 
2841 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2842 		break;
2843 	}
2844 	case CHELSIO_GETMTUTAB: {
2845 		struct ch_mtus *m = (struct ch_mtus *)data;
2846 
2847 		if (!is_offload(sc))
2848 			return (EOPNOTSUPP);
2849 
2850 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2851 		m->nmtus = NMTUS;
2852 		break;
2853 	}
2854 	case CHELSIO_GET_MEM: {
2855 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2856 		struct mc7 *mem;
2857 		uint8_t *useraddr;
2858 		u64 buf[32];
2859 
2860 		/*
2861 		 * Use these to avoid modifying len/addr in the the return
2862 		 * struct
2863 		 */
2864 		uint32_t len = t->len, addr = t->addr;
2865 
2866 		if (!is_offload(sc))
2867 			return (EOPNOTSUPP);
2868 		if (!(sc->flags & FULL_INIT_DONE))
2869 			return (EIO);         /* need the memory controllers */
2870 		if ((addr & 0x7) || (len & 0x7))
2871 			return (EINVAL);
2872 		if (t->mem_id == MEM_CM)
2873 			mem = &sc->cm;
2874 		else if (t->mem_id == MEM_PMRX)
2875 			mem = &sc->pmrx;
2876 		else if (t->mem_id == MEM_PMTX)
2877 			mem = &sc->pmtx;
2878 		else
2879 			return (EINVAL);
2880 
2881 		/*
2882 		 * Version scheme:
2883 		 * bits 0..9: chip version
2884 		 * bits 10..15: chip revision
2885 		 */
2886 		t->version = 3 | (sc->params.rev << 10);
2887 
2888 		/*
2889 		 * Read 256 bytes at a time as len can be large and we don't
2890 		 * want to use huge intermediate buffers.
2891 		 */
2892 		useraddr = (uint8_t *)t->buf;
2893 		while (len) {
2894 			unsigned int chunk = min(len, sizeof(buf));
2895 
2896 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2897 			if (error)
2898 				return (-error);
2899 			if (copyout(buf, useraddr, chunk))
2900 				return (EFAULT);
2901 			useraddr += chunk;
2902 			addr += chunk;
2903 			len -= chunk;
2904 		}
2905 		break;
2906 	}
2907 	case CHELSIO_READ_TCAM_WORD: {
2908 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2909 
2910 		if (!is_offload(sc))
2911 			return (EOPNOTSUPP);
2912 		if (!(sc->flags & FULL_INIT_DONE))
2913 			return (EIO);         /* need MC5 */
2914 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2915 		break;
2916 	}
2917 	case CHELSIO_SET_TRACE_FILTER: {
2918 		struct ch_trace *t = (struct ch_trace *)data;
2919 		const struct trace_params *tp;
2920 
2921 		tp = (const struct trace_params *)&t->sip;
2922 		if (t->config_tx)
2923 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2924 					       t->trace_tx);
2925 		if (t->config_rx)
2926 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2927 					       t->trace_rx);
2928 		break;
2929 	}
2930 	case CHELSIO_SET_PKTSCHED: {
2931 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2932 		if (sc->open_device_map == 0)
2933 			return (EAGAIN);
2934 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2935 		    p->binding);
2936 		break;
2937 	}
2938 	case CHELSIO_IFCONF_GETREGS: {
2939 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2940 		int reglen = cxgb_get_regs_len();
2941 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2942 		if (buf == NULL) {
2943 			return (ENOMEM);
2944 		}
2945 		if (regs->len > reglen)
2946 			regs->len = reglen;
2947 		else if (regs->len < reglen)
2948 			error = ENOBUFS;
2949 
2950 		if (!error) {
2951 			cxgb_get_regs(sc, regs, buf);
2952 			error = copyout(buf, regs->data, reglen);
2953 		}
2954 		free(buf, M_DEVBUF);
2955 
2956 		break;
2957 	}
2958 	case CHELSIO_SET_HW_SCHED: {
2959 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2960 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2961 
2962 		if ((sc->flags & FULL_INIT_DONE) == 0)
2963 			return (EAGAIN);       /* need TP to be initialized */
2964 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2965 		    !in_range(t->channel, 0, 1) ||
2966 		    !in_range(t->kbps, 0, 10000000) ||
2967 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2968 		    !in_range(t->flow_ipg, 0,
2969 			      dack_ticks_to_usec(sc, 0x7ff)))
2970 			return (EINVAL);
2971 
2972 		if (t->kbps >= 0) {
2973 			error = t3_config_sched(sc, t->kbps, t->sched);
2974 			if (error < 0)
2975 				return (-error);
2976 		}
2977 		if (t->class_ipg >= 0)
2978 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2979 		if (t->flow_ipg >= 0) {
2980 			t->flow_ipg *= 1000;     /* us -> ns */
2981 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2982 		}
2983 		if (t->mode >= 0) {
2984 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2985 
2986 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2987 					 bit, t->mode ? bit : 0);
2988 		}
2989 		if (t->channel >= 0)
2990 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2991 					 1 << t->sched, t->channel << t->sched);
2992 		break;
2993 	}
2994 	case CHELSIO_GET_EEPROM: {
2995 		int i;
2996 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2997 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2998 
2999 		if (buf == NULL) {
3000 			return (ENOMEM);
3001 		}
3002 		e->magic = EEPROM_MAGIC;
3003 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3004 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3005 
3006 		if (!error)
3007 			error = copyout(buf + e->offset, e->data, e->len);
3008 
3009 		free(buf, M_DEVBUF);
3010 		break;
3011 	}
3012 	case CHELSIO_CLEAR_STATS: {
3013 		if (!(sc->flags & FULL_INIT_DONE))
3014 			return EAGAIN;
3015 
3016 		PORT_LOCK(pi);
3017 		t3_mac_update_stats(&pi->mac);
3018 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3019 		PORT_UNLOCK(pi);
3020 		break;
3021 	}
3022 	case CHELSIO_GET_UP_LA: {
3023 		struct ch_up_la *la = (struct ch_up_la *)data;
3024 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3025 		if (buf == NULL) {
3026 			return (ENOMEM);
3027 		}
3028 		if (la->bufsize < LA_BUFSIZE)
3029 			error = ENOBUFS;
3030 
3031 		if (!error)
3032 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3033 					      &la->bufsize, buf);
3034 		if (!error)
3035 			error = copyout(buf, la->data, la->bufsize);
3036 
3037 		free(buf, M_DEVBUF);
3038 		break;
3039 	}
3040 	case CHELSIO_GET_UP_IOQS: {
3041 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3042 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3043 		uint32_t *v;
3044 
3045 		if (buf == NULL) {
3046 			return (ENOMEM);
3047 		}
3048 		if (ioqs->bufsize < IOQS_BUFSIZE)
3049 			error = ENOBUFS;
3050 
3051 		if (!error)
3052 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3053 
3054 		if (!error) {
3055 			v = (uint32_t *)buf;
3056 
3057 			ioqs->ioq_rx_enable = *v++;
3058 			ioqs->ioq_tx_enable = *v++;
3059 			ioqs->ioq_rx_status = *v++;
3060 			ioqs->ioq_tx_status = *v++;
3061 
3062 			error = copyout(v, ioqs->data, ioqs->bufsize);
3063 		}
3064 
3065 		free(buf, M_DEVBUF);
3066 		break;
3067 	}
3068 	case CHELSIO_SET_FILTER: {
3069 		struct ch_filter *f = (struct ch_filter *)data;;
3070 		struct filter_info *p;
3071 		unsigned int nfilters = sc->params.mc5.nfilters;
3072 
3073 		if (!is_offload(sc))
3074 			return (EOPNOTSUPP);	/* No TCAM */
3075 		if (!(sc->flags & FULL_INIT_DONE))
3076 			return (EAGAIN);	/* mc5 not setup yet */
3077 		if (nfilters == 0)
3078 			return (EBUSY);		/* TOE will use TCAM */
3079 
3080 		/* sanity checks */
3081 		if (f->filter_id >= nfilters ||
3082 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3083 		    (f->val.sport && f->mask.sport != 0xffff) ||
3084 		    (f->val.dport && f->mask.dport != 0xffff) ||
3085 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3086 		    (f->val.vlan_prio &&
3087 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3088 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3089 		    f->qset >= SGE_QSETS ||
3090 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3091 			return (EINVAL);
3092 
3093 		/* Was allocated with M_WAITOK */
3094 		KASSERT(sc->filters, ("filter table NULL\n"));
3095 
3096 		p = &sc->filters[f->filter_id];
3097 		if (p->locked)
3098 			return (EPERM);
3099 
3100 		bzero(p, sizeof(*p));
3101 		p->sip = f->val.sip;
3102 		p->sip_mask = f->mask.sip;
3103 		p->dip = f->val.dip;
3104 		p->sport = f->val.sport;
3105 		p->dport = f->val.dport;
3106 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3107 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3108 		    FILTER_NO_VLAN_PRI;
3109 		p->mac_hit = f->mac_hit;
3110 		p->mac_vld = f->mac_addr_idx != 0xffff;
3111 		p->mac_idx = f->mac_addr_idx;
3112 		p->pkt_type = f->proto;
3113 		p->report_filter_id = f->want_filter_id;
3114 		p->pass = f->pass;
3115 		p->rss = f->rss;
3116 		p->qset = f->qset;
3117 
3118 		error = set_filter(sc, f->filter_id, p);
3119 		if (error == 0)
3120 			p->valid = 1;
3121 		break;
3122 	}
3123 	case CHELSIO_DEL_FILTER: {
3124 		struct ch_filter *f = (struct ch_filter *)data;
3125 		struct filter_info *p;
3126 		unsigned int nfilters = sc->params.mc5.nfilters;
3127 
3128 		if (!is_offload(sc))
3129 			return (EOPNOTSUPP);
3130 		if (!(sc->flags & FULL_INIT_DONE))
3131 			return (EAGAIN);
3132 		if (nfilters == 0 || sc->filters == NULL)
3133 			return (EINVAL);
3134 		if (f->filter_id >= nfilters)
3135 		       return (EINVAL);
3136 
3137 		p = &sc->filters[f->filter_id];
3138 		if (p->locked)
3139 			return (EPERM);
3140 		if (!p->valid)
3141 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3142 
3143 		bzero(p, sizeof(*p));
3144 		p->sip = p->sip_mask = 0xffffffff;
3145 		p->vlan = 0xfff;
3146 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3147 		p->pkt_type = 1;
3148 		error = set_filter(sc, f->filter_id, p);
3149 		break;
3150 	}
3151 	case CHELSIO_GET_FILTER: {
3152 		struct ch_filter *f = (struct ch_filter *)data;
3153 		struct filter_info *p;
3154 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3155 
3156 		if (!is_offload(sc))
3157 			return (EOPNOTSUPP);
3158 		if (!(sc->flags & FULL_INIT_DONE))
3159 			return (EAGAIN);
3160 		if (nfilters == 0 || sc->filters == NULL)
3161 			return (EINVAL);
3162 
3163 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3164 		for (; i < nfilters; i++) {
3165 			p = &sc->filters[i];
3166 			if (!p->valid)
3167 				continue;
3168 
3169 			bzero(f, sizeof(*f));
3170 
3171 			f->filter_id = i;
3172 			f->val.sip = p->sip;
3173 			f->mask.sip = p->sip_mask;
3174 			f->val.dip = p->dip;
3175 			f->mask.dip = p->dip ? 0xffffffff : 0;
3176 			f->val.sport = p->sport;
3177 			f->mask.sport = p->sport ? 0xffff : 0;
3178 			f->val.dport = p->dport;
3179 			f->mask.dport = p->dport ? 0xffff : 0;
3180 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3181 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3182 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3183 			    0 : p->vlan_prio;
3184 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3185 			    0 : FILTER_NO_VLAN_PRI;
3186 			f->mac_hit = p->mac_hit;
3187 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3188 			f->proto = p->pkt_type;
3189 			f->want_filter_id = p->report_filter_id;
3190 			f->pass = p->pass;
3191 			f->rss = p->rss;
3192 			f->qset = p->qset;
3193 
3194 			break;
3195 		}
3196 
3197 		if (i == nfilters)
3198 			f->filter_id = 0xffffffff;
3199 		break;
3200 	}
3201 	default:
3202 		return (EOPNOTSUPP);
3203 		break;
3204 	}
3205 
3206 	return (error);
3207 }
3208 
3209 static __inline void
3210 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3211     unsigned int end)
3212 {
3213 	uint32_t *p = (uint32_t *)(buf + start);
3214 
3215 	for ( ; start <= end; start += sizeof(uint32_t))
3216 		*p++ = t3_read_reg(ap, start);
3217 }
3218 
3219 #define T3_REGMAP_SIZE (3 * 1024)
3220 static int
3221 cxgb_get_regs_len(void)
3222 {
3223 	return T3_REGMAP_SIZE;
3224 }
3225 
3226 static void
3227 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3228 {
3229 
3230 	/*
3231 	 * Version scheme:
3232 	 * bits 0..9: chip version
3233 	 * bits 10..15: chip revision
3234 	 * bit 31: set for PCIe cards
3235 	 */
3236 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3237 
3238 	/*
3239 	 * We skip the MAC statistics registers because they are clear-on-read.
3240 	 * Also reading multi-register stats would need to synchronize with the
3241 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3242 	 */
3243 	memset(buf, 0, cxgb_get_regs_len());
3244 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3245 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3246 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3247 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3248 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3249 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3250 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3251 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3252 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3253 }
3254 
3255 static int
3256 alloc_filters(struct adapter *sc)
3257 {
3258 	struct filter_info *p;
3259 	unsigned int nfilters = sc->params.mc5.nfilters;
3260 
3261 	if (nfilters == 0)
3262 		return (0);
3263 
3264 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3265 	sc->filters = p;
3266 
3267 	p = &sc->filters[nfilters - 1];
3268 	p->vlan = 0xfff;
3269 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3270 	p->pass = p->rss = p->valid = p->locked = 1;
3271 
3272 	return (0);
3273 }
3274 
3275 static int
3276 setup_hw_filters(struct adapter *sc)
3277 {
3278 	int i, rc;
3279 	unsigned int nfilters = sc->params.mc5.nfilters;
3280 
3281 	if (!sc->filters)
3282 		return (0);
3283 
3284 	t3_enable_filters(sc);
3285 
3286 	for (i = rc = 0; i < nfilters && !rc; i++) {
3287 		if (sc->filters[i].locked)
3288 			rc = set_filter(sc, i, &sc->filters[i]);
3289 	}
3290 
3291 	return (rc);
3292 }
3293 
3294 static int
3295 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3296 {
3297 	int len;
3298 	struct mbuf *m;
3299 	struct ulp_txpkt *txpkt;
3300 	struct work_request_hdr *wr;
3301 	struct cpl_pass_open_req *oreq;
3302 	struct cpl_set_tcb_field *sreq;
3303 
3304 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3305 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3306 
3307 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3308 	      sc->params.mc5.nfilters;
3309 
3310 	m = m_gethdr(M_WAITOK, MT_DATA);
3311 	m->m_len = m->m_pkthdr.len = len;
3312 	bzero(mtod(m, char *), len);
3313 
3314 	wr = mtod(m, struct work_request_hdr *);
3315 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3316 
3317 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3318 	txpkt = (struct ulp_txpkt *)oreq;
3319 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3320 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3321 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3322 	oreq->local_port = htons(f->dport);
3323 	oreq->peer_port = htons(f->sport);
3324 	oreq->local_ip = htonl(f->dip);
3325 	oreq->peer_ip = htonl(f->sip);
3326 	oreq->peer_netmask = htonl(f->sip_mask);
3327 	oreq->opt0h = 0;
3328 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3329 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3330 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3331 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3332 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3333 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3334 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3335 
3336 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3337 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3338 			  (f->report_filter_id << 15) | (1 << 23) |
3339 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3340 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3341 	t3_mgmt_tx(sc, m);
3342 
3343 	if (f->pass && !f->rss) {
3344 		len = sizeof(*sreq);
3345 		m = m_gethdr(M_WAITOK, MT_DATA);
3346 		m->m_len = m->m_pkthdr.len = len;
3347 		bzero(mtod(m, char *), len);
3348 		sreq = mtod(m, struct cpl_set_tcb_field *);
3349 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3350 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3351 				 (u64)sc->rrss_map[f->qset] << 19);
3352 		t3_mgmt_tx(sc, m);
3353 	}
3354 	return 0;
3355 }
3356 
3357 static inline void
3358 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3359     unsigned int word, u64 mask, u64 val)
3360 {
3361 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3362 	req->reply = V_NO_REPLY(1);
3363 	req->cpu_idx = 0;
3364 	req->word = htons(word);
3365 	req->mask = htobe64(mask);
3366 	req->val = htobe64(val);
3367 }
3368 
3369 static inline void
3370 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3371     unsigned int word, u64 mask, u64 val)
3372 {
3373 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3374 
3375 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3376 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3377 	mk_set_tcb_field(req, tid, word, mask, val);
3378 }
3379