xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	/* bus interface */
133 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
134 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
135 
136 	{ 0, 0 }
137 };
138 
139 static driver_t cxgb_controller_driver = {
140 	"cxgbc",
141 	cxgb_controller_methods,
142 	sizeof(struct adapter)
143 };
144 
145 static devclass_t	cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147 
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155 
156 static device_method_t cxgb_port_methods[] = {
157 	DEVMETHOD(device_probe,		cxgb_port_probe),
158 	DEVMETHOD(device_attach,	cxgb_port_attach),
159 	DEVMETHOD(device_detach,	cxgb_port_detach),
160 	{ 0, 0 }
161 };
162 
163 static driver_t cxgb_port_driver = {
164 	"cxgb",
165 	cxgb_port_methods,
166 	0
167 };
168 
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172 
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181 
182 static devclass_t	cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184 
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195 
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200 
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209 
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218 
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  *
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228 
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233 
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241 
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246 
247 enum {
248 	MAX_TXQ_ENTRIES      = 16384,
249 	MAX_CTRL_TXQ_ENTRIES = 1024,
250 	MAX_RSPQ_ENTRIES     = 16384,
251 	MAX_RX_BUFFERS       = 16384,
252 	MAX_RX_JUMBO_BUFFERS = 16384,
253 	MIN_TXQ_ENTRIES      = 4,
254 	MIN_CTRL_TXQ_ENTRIES = 4,
255 	MIN_RSPQ_ENTRIES     = 32,
256 	MIN_FL_ENTRIES       = 32,
257 	MIN_FL_JUMBO_ENTRIES = 32
258 };
259 
260 struct filter_info {
261 	u32 sip;
262 	u32 sip_mask;
263 	u32 dip;
264 	u16 sport;
265 	u16 dport;
266 	u32 vlan:12;
267 	u32 vlan_prio:3;
268 	u32 mac_hit:1;
269 	u32 mac_idx:4;
270 	u32 mac_vld:1;
271 	u32 pkt_type:2;
272 	u32 report_filter_id:1;
273 	u32 pass:1;
274 	u32 rss:1;
275 	u32 qset:3;
276 	u32 locked:1;
277 	u32 valid:1;
278 };
279 
280 enum { FILTER_NO_VLAN_PRI = 7 };
281 
282 #define EEPROM_MAGIC 0x38E2F10C
283 
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285 
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288 	uint16_t	vendor;
289 	uint16_t	device;
290 	int		index;
291 	char		*desc;
292 } cxgb_identifiers[] = {
293 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307 	{0, 0, 0, NULL}
308 };
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	/* find the PCIe link width and set max read request to 4KB*/
461 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
462 		uint16_t lnk, pectl;
463 		lnk = pci_read_config(dev, reg + 0x12, 2);
464 		sc->link_width = (lnk >> 4) & 0x3f;
465 
466 		pectl = pci_read_config(dev, reg + 0x8, 2);
467 		pectl = (pectl & ~0x7000) | (5 << 12);
468 		pci_write_config(dev, reg + 0x8, pectl, 2);
469 	}
470 
471 	if (sc->link_width != 0 && sc->link_width <= 4 &&
472 	    (ai->nports0 + ai->nports1) <= 2) {
473 		device_printf(sc->dev,
474 		    "PCIe x%d Link, expect reduced performance\n",
475 		    sc->link_width);
476 	}
477 
478 	touch_bars(dev);
479 	pci_enable_busmaster(dev);
480 	/*
481 	 * Allocate the registers and make them available to the driver.
482 	 * The registers that we care about for NIC mode are in BAR 0
483 	 */
484 	sc->regs_rid = PCIR_BAR(0);
485 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
486 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
487 		device_printf(dev, "Cannot allocate BAR region 0\n");
488 		return (ENXIO);
489 	}
490 	sc->udbs_rid = PCIR_BAR(2);
491 	sc->udbs_res = NULL;
492 	if (is_offload(sc) &&
493 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
494 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
495 		device_printf(dev, "Cannot allocate BAR region 1\n");
496 		error = ENXIO;
497 		goto out;
498 	}
499 
500 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
501 	    device_get_unit(dev));
502 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
503 
504 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
505 	    device_get_unit(dev));
506 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
507 	    device_get_unit(dev));
508 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
509 	    device_get_unit(dev));
510 
511 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
512 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
513 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
514 
515 	sc->bt = rman_get_bustag(sc->regs_res);
516 	sc->bh = rman_get_bushandle(sc->regs_res);
517 	sc->mmio_len = rman_get_size(sc->regs_res);
518 
519 	for (i = 0; i < MAX_NPORTS; i++)
520 		sc->port[i].adapter = sc;
521 
522 	if (t3_prep_adapter(sc, ai, 1) < 0) {
523 		printf("prep adapter failed\n");
524 		error = ENODEV;
525 		goto out;
526 	}
527         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
528 	 * enough messages for the queue sets.  If that fails, try falling
529 	 * back to MSI.  If that fails, then try falling back to the legacy
530 	 * interrupt pin model.
531 	 */
532 	sc->msix_regs_rid = 0x20;
533 	if ((msi_allowed >= 2) &&
534 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
535 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
536 
537 		if (multiq)
538 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
539 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
540 
541 		if (pci_msix_count(dev) == 0 ||
542 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
543 		    sc->msi_count != msi_needed) {
544 			device_printf(dev, "alloc msix failed - "
545 				      "msi_count=%d, msi_needed=%d, err=%d; "
546 				      "will try MSI\n", sc->msi_count,
547 				      msi_needed, error);
548 			sc->msi_count = 0;
549 			port_qsets = 1;
550 			pci_release_msi(dev);
551 			bus_release_resource(dev, SYS_RES_MEMORY,
552 			    sc->msix_regs_rid, sc->msix_regs_res);
553 			sc->msix_regs_res = NULL;
554 		} else {
555 			sc->flags |= USING_MSIX;
556 			sc->cxgb_intr = cxgb_async_intr;
557 			device_printf(dev,
558 				      "using MSI-X interrupts (%u vectors)\n",
559 				      sc->msi_count);
560 		}
561 	}
562 
563 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
564 		sc->msi_count = 1;
565 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
566 			device_printf(dev, "alloc msi failed - "
567 				      "err=%d; will try INTx\n", error);
568 			sc->msi_count = 0;
569 			port_qsets = 1;
570 			pci_release_msi(dev);
571 		} else {
572 			sc->flags |= USING_MSI;
573 			sc->cxgb_intr = t3_intr_msi;
574 			device_printf(dev, "using MSI interrupts\n");
575 		}
576 	}
577 	if (sc->msi_count == 0) {
578 		device_printf(dev, "using line interrupts\n");
579 		sc->cxgb_intr = t3b_intr;
580 	}
581 
582 	/* Create a private taskqueue thread for handling driver events */
583 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
584 	    taskqueue_thread_enqueue, &sc->tq);
585 	if (sc->tq == NULL) {
586 		device_printf(dev, "failed to allocate controller task queue\n");
587 		goto out;
588 	}
589 
590 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
591 	    device_get_nameunit(dev));
592 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
593 
594 
595 	/* Create a periodic callout for checking adapter status */
596 	callout_init(&sc->cxgb_tick_ch, TRUE);
597 
598 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
599 		/*
600 		 * Warn user that a firmware update will be attempted in init.
601 		 */
602 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
603 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
604 		sc->flags &= ~FW_UPTODATE;
605 	} else {
606 		sc->flags |= FW_UPTODATE;
607 	}
608 
609 	if (t3_check_tpsram_version(sc) < 0) {
610 		/*
611 		 * Warn user that a firmware update will be attempted in init.
612 		 */
613 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
614 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
615 		sc->flags &= ~TPS_UPTODATE;
616 	} else {
617 		sc->flags |= TPS_UPTODATE;
618 	}
619 
620 	/*
621 	 * Create a child device for each MAC.  The ethernet attachment
622 	 * will be done in these children.
623 	 */
624 	for (i = 0; i < (sc)->params.nports; i++) {
625 		struct port_info *pi;
626 
627 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
628 			device_printf(dev, "failed to add child port\n");
629 			error = EINVAL;
630 			goto out;
631 		}
632 		pi = &sc->port[i];
633 		pi->adapter = sc;
634 		pi->nqsets = port_qsets;
635 		pi->first_qset = i*port_qsets;
636 		pi->port_id = i;
637 		pi->tx_chan = i >= ai->nports0;
638 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
639 		sc->rxpkt_map[pi->txpkt_intf] = i;
640 		sc->port[i].tx_chan = i >= ai->nports0;
641 		sc->portdev[i] = child;
642 		device_set_softc(child, pi);
643 	}
644 	if ((error = bus_generic_attach(dev)) != 0)
645 		goto out;
646 
647 	/* initialize sge private state */
648 	t3_sge_init_adapter(sc);
649 
650 	t3_led_ready(sc);
651 
652 	cxgb_offload_init();
653 	if (is_offload(sc)) {
654 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
655 		cxgb_adapter_ofld(sc);
656         }
657 	error = t3_get_fw_version(sc, &vers);
658 	if (error)
659 		goto out;
660 
661 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663 	    G_FW_VERSION_MICRO(vers));
664 
665 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666 		 ai->desc, is_offload(sc) ? "R" : "",
667 		 sc->params.vpd.ec, sc->params.vpd.sn);
668 	device_set_desc_copy(dev, buf);
669 
670 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673 
674 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
676 	t3_add_attach_sysctls(sc);
677 out:
678 	if (error)
679 		cxgb_free(sc);
680 
681 	return (error);
682 }
683 
684 /*
685  * The cxgb_controller_detach routine is called with the device is
686  * unloaded from the system.
687  */
688 
689 static int
690 cxgb_controller_detach(device_t dev)
691 {
692 	struct adapter *sc;
693 
694 	sc = device_get_softc(dev);
695 
696 	cxgb_free(sc);
697 
698 	return (0);
699 }
700 
701 /*
702  * The cxgb_free() is called by the cxgb_controller_detach() routine
703  * to tear down the structures that were built up in
704  * cxgb_controller_attach(), and should be the final piece of work
705  * done when fully unloading the driver.
706  *
707  *
708  *  1. Shutting down the threads started by the cxgb_controller_attach()
709  *     routine.
710  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
711  *  3. Detaching all of the port devices created during the
712  *     cxgb_controller_attach() routine.
713  *  4. Removing the device children created via cxgb_controller_attach().
714  *  5. Releasing PCI resources associated with the device.
715  *  6. Turning off the offload support, iff it was turned on.
716  *  7. Destroying the mutexes created in cxgb_controller_attach().
717  *
718  */
719 static void
720 cxgb_free(struct adapter *sc)
721 {
722 	int i;
723 
724 	ADAPTER_LOCK(sc);
725 	sc->flags |= CXGB_SHUTDOWN;
726 	ADAPTER_UNLOCK(sc);
727 
728 	/*
729 	 * Make sure all child devices are gone.
730 	 */
731 	bus_generic_detach(sc->dev);
732 	for (i = 0; i < (sc)->params.nports; i++) {
733 		if (sc->portdev[i] &&
734 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
735 			device_printf(sc->dev, "failed to delete child port\n");
736 	}
737 
738 	/*
739 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
740 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
741 	 * all open devices have been closed.
742 	 */
743 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
744 					   __func__, sc->open_device_map));
745 	for (i = 0; i < sc->params.nports; i++) {
746 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
747 						  __func__, i));
748 	}
749 
750 	/*
751 	 * Finish off the adapter's callouts.
752 	 */
753 	callout_drain(&sc->cxgb_tick_ch);
754 	callout_drain(&sc->sge_timer_ch);
755 
756 	/*
757 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
758 	 * sysctls are cleaned up by the kernel linker.
759 	 */
760 	if (sc->flags & FULL_INIT_DONE) {
761  		t3_free_sge_resources(sc);
762  		sc->flags &= ~FULL_INIT_DONE;
763  	}
764 
765 	/*
766 	 * Release all interrupt resources.
767 	 */
768 	cxgb_teardown_interrupts(sc);
769 	if (sc->flags & (USING_MSI | USING_MSIX)) {
770 		device_printf(sc->dev, "releasing msi message(s)\n");
771 		pci_release_msi(sc->dev);
772 	} else {
773 		device_printf(sc->dev, "no msi message to release\n");
774 	}
775 
776 	if (sc->msix_regs_res != NULL) {
777 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
778 		    sc->msix_regs_res);
779 	}
780 
781 	/*
782 	 * Free the adapter's taskqueue.
783 	 */
784 	if (sc->tq != NULL) {
785 		taskqueue_free(sc->tq);
786 		sc->tq = NULL;
787 	}
788 
789 	if (is_offload(sc)) {
790 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
791 		cxgb_adapter_unofld(sc);
792 	}
793 
794 #ifdef notyet
795 	if (sc->flags & CXGB_OFLD_INIT)
796 		cxgb_offload_deactivate(sc);
797 #endif
798 	free(sc->filters, M_DEVBUF);
799 	t3_sge_free(sc);
800 
801 	cxgb_offload_exit();
802 
803 	if (sc->udbs_res != NULL)
804 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
805 		    sc->udbs_res);
806 
807 	if (sc->regs_res != NULL)
808 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
809 		    sc->regs_res);
810 
811 	MTX_DESTROY(&sc->mdio_lock);
812 	MTX_DESTROY(&sc->sge.reg_lock);
813 	MTX_DESTROY(&sc->elmer_lock);
814 	ADAPTER_LOCK_DEINIT(sc);
815 }
816 
817 /**
818  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
819  *	@sc: the controller softc
820  *
821  *	Determines how many sets of SGE queues to use and initializes them.
822  *	We support multiple queue sets per port if we have MSI-X, otherwise
823  *	just one queue set per port.
824  */
825 static int
826 setup_sge_qsets(adapter_t *sc)
827 {
828 	int i, j, err, irq_idx = 0, qset_idx = 0;
829 	u_int ntxq = SGE_TXQ_PER_SET;
830 
831 	if ((err = t3_sge_alloc(sc)) != 0) {
832 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
833 		return (err);
834 	}
835 
836 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
837 		irq_idx = -1;
838 
839 	for (i = 0; i < (sc)->params.nports; i++) {
840 		struct port_info *pi = &sc->port[i];
841 
842 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
843 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
844 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
845 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
846 			if (err) {
847 				t3_free_sge_resources(sc);
848 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
849 				    err);
850 				return (err);
851 			}
852 		}
853 	}
854 
855 	return (0);
856 }
857 
858 static void
859 cxgb_teardown_interrupts(adapter_t *sc)
860 {
861 	int i;
862 
863 	for (i = 0; i < SGE_QSETS; i++) {
864 		if (sc->msix_intr_tag[i] == NULL) {
865 
866 			/* Should have been setup fully or not at all */
867 			KASSERT(sc->msix_irq_res[i] == NULL &&
868 				sc->msix_irq_rid[i] == 0,
869 				("%s: half-done interrupt (%d).", __func__, i));
870 
871 			continue;
872 		}
873 
874 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
875 				  sc->msix_intr_tag[i]);
876 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
877 				     sc->msix_irq_res[i]);
878 
879 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
880 		sc->msix_irq_rid[i] = 0;
881 	}
882 
883 	if (sc->intr_tag) {
884 		KASSERT(sc->irq_res != NULL,
885 			("%s: half-done interrupt.", __func__));
886 
887 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
888 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
889 				     sc->irq_res);
890 
891 		sc->irq_res = sc->intr_tag = NULL;
892 		sc->irq_rid = 0;
893 	}
894 }
895 
896 static int
897 cxgb_setup_interrupts(adapter_t *sc)
898 {
899 	struct resource *res;
900 	void *tag;
901 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
902 
903 	sc->irq_rid = intr_flag ? 1 : 0;
904 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
905 					     RF_SHAREABLE | RF_ACTIVE);
906 	if (sc->irq_res == NULL) {
907 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
908 			      intr_flag, sc->irq_rid);
909 		err = EINVAL;
910 		sc->irq_rid = 0;
911 	} else {
912 		err = bus_setup_intr(sc->dev, sc->irq_res,
913 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
914 		    sc->cxgb_intr, sc, &sc->intr_tag);
915 
916 		if (err) {
917 			device_printf(sc->dev,
918 				      "Cannot set up interrupt (%x, %u, %d)\n",
919 				      intr_flag, sc->irq_rid, err);
920 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
921 					     sc->irq_res);
922 			sc->irq_res = sc->intr_tag = NULL;
923 			sc->irq_rid = 0;
924 		}
925 	}
926 
927 	/* That's all for INTx or MSI */
928 	if (!(intr_flag & USING_MSIX) || err)
929 		return (err);
930 
931 	for (i = 0; i < sc->msi_count - 1; i++) {
932 		rid = i + 2;
933 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
934 					     RF_SHAREABLE | RF_ACTIVE);
935 		if (res == NULL) {
936 			device_printf(sc->dev, "Cannot allocate interrupt "
937 				      "for message %d\n", rid);
938 			err = EINVAL;
939 			break;
940 		}
941 
942 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
943 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
944 		if (err) {
945 			device_printf(sc->dev, "Cannot set up interrupt "
946 				      "for message %d (%d)\n", rid, err);
947 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
948 			break;
949 		}
950 
951 		sc->msix_irq_rid[i] = rid;
952 		sc->msix_irq_res[i] = res;
953 		sc->msix_intr_tag[i] = tag;
954 	}
955 
956 	if (err)
957 		cxgb_teardown_interrupts(sc);
958 
959 	return (err);
960 }
961 
962 
963 static int
964 cxgb_port_probe(device_t dev)
965 {
966 	struct port_info *p;
967 	char buf[80];
968 	const char *desc;
969 
970 	p = device_get_softc(dev);
971 	desc = p->phy.desc;
972 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
973 	device_set_desc_copy(dev, buf);
974 	return (0);
975 }
976 
977 
978 static int
979 cxgb_makedev(struct port_info *pi)
980 {
981 
982 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
983 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
984 
985 	if (pi->port_cdev == NULL)
986 		return (ENOMEM);
987 
988 	pi->port_cdev->si_drv1 = (void *)pi;
989 
990 	return (0);
991 }
992 
993 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
994     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
995     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
996 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
997 
998 static int
999 cxgb_port_attach(device_t dev)
1000 {
1001 	struct port_info *p;
1002 	struct ifnet *ifp;
1003 	int err;
1004 	struct adapter *sc;
1005 
1006 	p = device_get_softc(dev);
1007 	sc = p->adapter;
1008 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1009 	    device_get_unit(device_get_parent(dev)), p->port_id);
1010 	PORT_LOCK_INIT(p, p->lockbuf);
1011 
1012 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1013 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1014 
1015 	/* Allocate an ifnet object and set it up */
1016 	ifp = p->ifp = if_alloc(IFT_ETHER);
1017 	if (ifp == NULL) {
1018 		device_printf(dev, "Cannot allocate ifnet\n");
1019 		return (ENOMEM);
1020 	}
1021 
1022 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1023 	ifp->if_init = cxgb_init;
1024 	ifp->if_softc = p;
1025 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1026 	ifp->if_ioctl = cxgb_ioctl;
1027 	ifp->if_start = cxgb_start;
1028 
1029 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1030 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1031 	IFQ_SET_READY(&ifp->if_snd);
1032 
1033 	ifp->if_capabilities = CXGB_CAP;
1034 	ifp->if_capenable = CXGB_CAP_ENABLE;
1035 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1036 
1037 	/*
1038 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1039 	 */
1040 	if (sc->params.nports > 2) {
1041 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1042 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1043 		ifp->if_hwassist &= ~CSUM_TSO;
1044 	}
1045 
1046 	ether_ifattach(ifp, p->hw_addr);
1047 	ifp->if_transmit = cxgb_transmit;
1048 	ifp->if_qflush = cxgb_qflush;
1049 
1050 #ifdef DEFAULT_JUMBO
1051 	if (sc->params.nports <= 2)
1052 		ifp->if_mtu = ETHERMTU_JUMBO;
1053 #endif
1054 	if ((err = cxgb_makedev(p)) != 0) {
1055 		printf("makedev failed %d\n", err);
1056 		return (err);
1057 	}
1058 
1059 	/* Create a list of media supported by this port */
1060 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1061 	    cxgb_media_status);
1062 	cxgb_build_medialist(p);
1063 
1064 	t3_sge_init_port(p);
1065 
1066 	return (err);
1067 }
1068 
1069 /*
1070  * cxgb_port_detach() is called via the device_detach methods when
1071  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1072  * removing the device from the view of the kernel, i.e. from all
1073  * interfaces lists etc.  This routine is only called when the driver is
1074  * being unloaded, not when the link goes down.
1075  */
1076 static int
1077 cxgb_port_detach(device_t dev)
1078 {
1079 	struct port_info *p;
1080 	struct adapter *sc;
1081 	int i;
1082 
1083 	p = device_get_softc(dev);
1084 	sc = p->adapter;
1085 
1086 	/* Tell cxgb_ioctl and if_init that the port is going away */
1087 	ADAPTER_LOCK(sc);
1088 	SET_DOOMED(p);
1089 	wakeup(&sc->flags);
1090 	while (IS_BUSY(sc))
1091 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1092 	SET_BUSY(sc);
1093 	ADAPTER_UNLOCK(sc);
1094 
1095 	if (p->port_cdev != NULL)
1096 		destroy_dev(p->port_cdev);
1097 
1098 	cxgb_uninit_synchronized(p);
1099 	ether_ifdetach(p->ifp);
1100 
1101 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1102 		struct sge_qset *qs = &sc->sge.qs[i];
1103 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1104 
1105 		callout_drain(&txq->txq_watchdog);
1106 		callout_drain(&txq->txq_timer);
1107 	}
1108 
1109 	PORT_LOCK_DEINIT(p);
1110 	if_free(p->ifp);
1111 	p->ifp = NULL;
1112 
1113 	ADAPTER_LOCK(sc);
1114 	CLR_BUSY(sc);
1115 	wakeup_one(&sc->flags);
1116 	ADAPTER_UNLOCK(sc);
1117 	return (0);
1118 }
1119 
1120 void
1121 t3_fatal_err(struct adapter *sc)
1122 {
1123 	u_int fw_status[4];
1124 
1125 	if (sc->flags & FULL_INIT_DONE) {
1126 		t3_sge_stop(sc);
1127 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1128 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1129 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1130 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1131 		t3_intr_disable(sc);
1132 	}
1133 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1134 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1135 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1136 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1137 }
1138 
1139 int
1140 t3_os_find_pci_capability(adapter_t *sc, int cap)
1141 {
1142 	device_t dev;
1143 	struct pci_devinfo *dinfo;
1144 	pcicfgregs *cfg;
1145 	uint32_t status;
1146 	uint8_t ptr;
1147 
1148 	dev = sc->dev;
1149 	dinfo = device_get_ivars(dev);
1150 	cfg = &dinfo->cfg;
1151 
1152 	status = pci_read_config(dev, PCIR_STATUS, 2);
1153 	if (!(status & PCIM_STATUS_CAPPRESENT))
1154 		return (0);
1155 
1156 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157 	case 0:
1158 	case 1:
1159 		ptr = PCIR_CAP_PTR;
1160 		break;
1161 	case 2:
1162 		ptr = PCIR_CAP_PTR_2;
1163 		break;
1164 	default:
1165 		return (0);
1166 		break;
1167 	}
1168 	ptr = pci_read_config(dev, ptr, 1);
1169 
1170 	while (ptr != 0) {
1171 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1172 			return (ptr);
1173 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1174 	}
1175 
1176 	return (0);
1177 }
1178 
1179 int
1180 t3_os_pci_save_state(struct adapter *sc)
1181 {
1182 	device_t dev;
1183 	struct pci_devinfo *dinfo;
1184 
1185 	dev = sc->dev;
1186 	dinfo = device_get_ivars(dev);
1187 
1188 	pci_cfg_save(dev, dinfo, 0);
1189 	return (0);
1190 }
1191 
1192 int
1193 t3_os_pci_restore_state(struct adapter *sc)
1194 {
1195 	device_t dev;
1196 	struct pci_devinfo *dinfo;
1197 
1198 	dev = sc->dev;
1199 	dinfo = device_get_ivars(dev);
1200 
1201 	pci_cfg_restore(dev, dinfo);
1202 	return (0);
1203 }
1204 
1205 /**
1206  *	t3_os_link_changed - handle link status changes
1207  *	@sc: the adapter associated with the link change
1208  *	@port_id: the port index whose link status has changed
1209  *	@link_status: the new status of the link
1210  *	@speed: the new speed setting
1211  *	@duplex: the new duplex setting
1212  *	@fc: the new flow-control setting
1213  *
1214  *	This is the OS-dependent handler for link status changes.  The OS
1215  *	neutral handler takes care of most of the processing for these events,
1216  *	then calls this handler for any OS-specific processing.
1217  */
1218 void
1219 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1220      int duplex, int fc, int mac_was_reset)
1221 {
1222 	struct port_info *pi = &adapter->port[port_id];
1223 	struct ifnet *ifp = pi->ifp;
1224 
1225 	/* no race with detach, so ifp should always be good */
1226 	KASSERT(ifp, ("%s: if detached.", __func__));
1227 
1228 	/* Reapply mac settings if they were lost due to a reset */
1229 	if (mac_was_reset) {
1230 		PORT_LOCK(pi);
1231 		cxgb_update_mac_settings(pi);
1232 		PORT_UNLOCK(pi);
1233 	}
1234 
1235 	if (link_status) {
1236 		ifp->if_baudrate = IF_Mbps(speed);
1237 		if_link_state_change(ifp, LINK_STATE_UP);
1238 	} else
1239 		if_link_state_change(ifp, LINK_STATE_DOWN);
1240 }
1241 
1242 /**
1243  *	t3_os_phymod_changed - handle PHY module changes
1244  *	@phy: the PHY reporting the module change
1245  *	@mod_type: new module type
1246  *
1247  *	This is the OS-dependent handler for PHY module changes.  It is
1248  *	invoked when a PHY module is removed or inserted for any OS-specific
1249  *	processing.
1250  */
1251 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1252 {
1253 	static const char *mod_str[] = {
1254 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1255 	};
1256 	struct port_info *pi = &adap->port[port_id];
1257 	int mod = pi->phy.modtype;
1258 
1259 	if (mod != pi->media.ifm_cur->ifm_data)
1260 		cxgb_build_medialist(pi);
1261 
1262 	if (mod == phy_modtype_none)
1263 		if_printf(pi->ifp, "PHY module unplugged\n");
1264 	else {
1265 		KASSERT(mod < ARRAY_SIZE(mod_str),
1266 			("invalid PHY module type %d", mod));
1267 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1268 	}
1269 }
1270 
1271 void
1272 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1273 {
1274 
1275 	/*
1276 	 * The ifnet might not be allocated before this gets called,
1277 	 * as this is called early on in attach by t3_prep_adapter
1278 	 * save the address off in the port structure
1279 	 */
1280 	if (cxgb_debug)
1281 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1282 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1283 }
1284 
1285 /*
1286  * Programs the XGMAC based on the settings in the ifnet.  These settings
1287  * include MTU, MAC address, mcast addresses, etc.
1288  */
1289 static void
1290 cxgb_update_mac_settings(struct port_info *p)
1291 {
1292 	struct ifnet *ifp = p->ifp;
1293 	struct t3_rx_mode rm;
1294 	struct cmac *mac = &p->mac;
1295 	int mtu, hwtagging;
1296 
1297 	PORT_LOCK_ASSERT_OWNED(p);
1298 
1299 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1300 
1301 	mtu = ifp->if_mtu;
1302 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1303 		mtu += ETHER_VLAN_ENCAP_LEN;
1304 
1305 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1306 
1307 	t3_mac_set_mtu(mac, mtu);
1308 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1309 	t3_mac_set_address(mac, 0, p->hw_addr);
1310 	t3_init_rx_mode(&rm, p);
1311 	t3_mac_set_rx_mode(mac, &rm);
1312 }
1313 
1314 
1315 static int
1316 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1317 			      unsigned long n)
1318 {
1319 	int attempts = 5;
1320 
1321 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1322 		if (!--attempts)
1323 			return (ETIMEDOUT);
1324 		t3_os_sleep(10);
1325 	}
1326 	return 0;
1327 }
1328 
1329 static int
1330 init_tp_parity(struct adapter *adap)
1331 {
1332 	int i;
1333 	struct mbuf *m;
1334 	struct cpl_set_tcb_field *greq;
1335 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1336 
1337 	t3_tp_set_offload_mode(adap, 1);
1338 
1339 	for (i = 0; i < 16; i++) {
1340 		struct cpl_smt_write_req *req;
1341 
1342 		m = m_gethdr(M_WAITOK, MT_DATA);
1343 		req = mtod(m, struct cpl_smt_write_req *);
1344 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1345 		memset(req, 0, sizeof(*req));
1346 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1347 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1348 		req->iff = i;
1349 		t3_mgmt_tx(adap, m);
1350 	}
1351 
1352 	for (i = 0; i < 2048; i++) {
1353 		struct cpl_l2t_write_req *req;
1354 
1355 		m = m_gethdr(M_WAITOK, MT_DATA);
1356 		req = mtod(m, struct cpl_l2t_write_req *);
1357 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1358 		memset(req, 0, sizeof(*req));
1359 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1360 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1361 		req->params = htonl(V_L2T_W_IDX(i));
1362 		t3_mgmt_tx(adap, m);
1363 	}
1364 
1365 	for (i = 0; i < 2048; i++) {
1366 		struct cpl_rte_write_req *req;
1367 
1368 		m = m_gethdr(M_WAITOK, MT_DATA);
1369 		req = mtod(m, struct cpl_rte_write_req *);
1370 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1371 		memset(req, 0, sizeof(*req));
1372 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1373 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1374 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1375 		t3_mgmt_tx(adap, m);
1376 	}
1377 
1378 	m = m_gethdr(M_WAITOK, MT_DATA);
1379 	greq = mtod(m, struct cpl_set_tcb_field *);
1380 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1381 	memset(greq, 0, sizeof(*greq));
1382 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1383 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1384 	greq->mask = htobe64(1);
1385 	t3_mgmt_tx(adap, m);
1386 
1387 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1388 	t3_tp_set_offload_mode(adap, 0);
1389 	return (i);
1390 }
1391 
1392 /**
1393  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1394  *	@adap: the adapter
1395  *
1396  *	Sets up RSS to distribute packets to multiple receive queues.  We
1397  *	configure the RSS CPU lookup table to distribute to the number of HW
1398  *	receive queues, and the response queue lookup table to narrow that
1399  *	down to the response queues actually configured for each port.
1400  *	We always configure the RSS mapping for two ports since the mapping
1401  *	table has plenty of entries.
1402  */
1403 static void
1404 setup_rss(adapter_t *adap)
1405 {
1406 	int i;
1407 	u_int nq[2];
1408 	uint8_t cpus[SGE_QSETS + 1];
1409 	uint16_t rspq_map[RSS_TABLE_SIZE];
1410 
1411 	for (i = 0; i < SGE_QSETS; ++i)
1412 		cpus[i] = i;
1413 	cpus[SGE_QSETS] = 0xff;
1414 
1415 	nq[0] = nq[1] = 0;
1416 	for_each_port(adap, i) {
1417 		const struct port_info *pi = adap2pinfo(adap, i);
1418 
1419 		nq[pi->tx_chan] += pi->nqsets;
1420 	}
1421 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1422 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1423 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1424 	}
1425 
1426 	/* Calculate the reverse RSS map table */
1427 	for (i = 0; i < SGE_QSETS; ++i)
1428 		adap->rrss_map[i] = 0xff;
1429 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1430 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1431 			adap->rrss_map[rspq_map[i]] = i;
1432 
1433 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1434 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1435 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1436 	              cpus, rspq_map);
1437 
1438 }
1439 
1440 /*
1441  * Sends an mbuf to an offload queue driver
1442  * after dealing with any active network taps.
1443  */
1444 static inline int
1445 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1446 {
1447 	int ret;
1448 
1449 	ret = t3_offload_tx(tdev, m);
1450 	return (ret);
1451 }
1452 
1453 static int
1454 write_smt_entry(struct adapter *adapter, int idx)
1455 {
1456 	struct port_info *pi = &adapter->port[idx];
1457 	struct cpl_smt_write_req *req;
1458 	struct mbuf *m;
1459 
1460 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1461 		return (ENOMEM);
1462 
1463 	req = mtod(m, struct cpl_smt_write_req *);
1464 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1465 
1466 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1467 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1468 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1469 	req->iff = idx;
1470 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1471 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1472 
1473 	m_set_priority(m, 1);
1474 
1475 	offload_tx(&adapter->tdev, m);
1476 
1477 	return (0);
1478 }
1479 
1480 static int
1481 init_smt(struct adapter *adapter)
1482 {
1483 	int i;
1484 
1485 	for_each_port(adapter, i)
1486 		write_smt_entry(adapter, i);
1487 	return 0;
1488 }
1489 
1490 static void
1491 init_port_mtus(adapter_t *adapter)
1492 {
1493 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1494 
1495 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1496 }
1497 
1498 static void
1499 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1500 			      int hi, int port)
1501 {
1502 	struct mbuf *m;
1503 	struct mngt_pktsched_wr *req;
1504 
1505 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1506 	if (m) {
1507 		req = mtod(m, struct mngt_pktsched_wr *);
1508 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1509 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1510 		req->sched = sched;
1511 		req->idx = qidx;
1512 		req->min = lo;
1513 		req->max = hi;
1514 		req->binding = port;
1515 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1516 		t3_mgmt_tx(adap, m);
1517 	}
1518 }
1519 
1520 static void
1521 bind_qsets(adapter_t *sc)
1522 {
1523 	int i, j;
1524 
1525 	for (i = 0; i < (sc)->params.nports; ++i) {
1526 		const struct port_info *pi = adap2pinfo(sc, i);
1527 
1528 		for (j = 0; j < pi->nqsets; ++j) {
1529 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1530 					  -1, pi->tx_chan);
1531 
1532 		}
1533 	}
1534 }
1535 
1536 static void
1537 update_tpeeprom(struct adapter *adap)
1538 {
1539 	const struct firmware *tpeeprom;
1540 
1541 	uint32_t version;
1542 	unsigned int major, minor;
1543 	int ret, len;
1544 	char rev, name[32];
1545 
1546 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1547 
1548 	major = G_TP_VERSION_MAJOR(version);
1549 	minor = G_TP_VERSION_MINOR(version);
1550 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1551 		return;
1552 
1553 	rev = t3rev2char(adap);
1554 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1555 
1556 	tpeeprom = firmware_get(name);
1557 	if (tpeeprom == NULL) {
1558 		device_printf(adap->dev,
1559 			      "could not load TP EEPROM: unable to load %s\n",
1560 			      name);
1561 		return;
1562 	}
1563 
1564 	len = tpeeprom->datasize - 4;
1565 
1566 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1567 	if (ret)
1568 		goto release_tpeeprom;
1569 
1570 	if (len != TP_SRAM_LEN) {
1571 		device_printf(adap->dev,
1572 			      "%s length is wrong len=%d expected=%d\n", name,
1573 			      len, TP_SRAM_LEN);
1574 		return;
1575 	}
1576 
1577 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1578 	    TP_SRAM_OFFSET);
1579 
1580 	if (!ret) {
1581 		device_printf(adap->dev,
1582 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1583 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1584 	} else
1585 		device_printf(adap->dev,
1586 			      "Protocol SRAM image update in EEPROM failed\n");
1587 
1588 release_tpeeprom:
1589 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1590 
1591 	return;
1592 }
1593 
1594 static int
1595 update_tpsram(struct adapter *adap)
1596 {
1597 	const struct firmware *tpsram;
1598 	int ret;
1599 	char rev, name[32];
1600 
1601 	rev = t3rev2char(adap);
1602 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1603 
1604 	update_tpeeprom(adap);
1605 
1606 	tpsram = firmware_get(name);
1607 	if (tpsram == NULL){
1608 		device_printf(adap->dev, "could not load TP SRAM\n");
1609 		return (EINVAL);
1610 	} else
1611 		device_printf(adap->dev, "updating TP SRAM\n");
1612 
1613 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1614 	if (ret)
1615 		goto release_tpsram;
1616 
1617 	ret = t3_set_proto_sram(adap, tpsram->data);
1618 	if (ret)
1619 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1620 
1621 release_tpsram:
1622 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1623 
1624 	return ret;
1625 }
1626 
1627 /**
1628  *	cxgb_up - enable the adapter
1629  *	@adap: adapter being enabled
1630  *
1631  *	Called when the first port is enabled, this function performs the
1632  *	actions necessary to make an adapter operational, such as completing
1633  *	the initialization of HW modules, and enabling interrupts.
1634  */
1635 static int
1636 cxgb_up(struct adapter *sc)
1637 {
1638 	int err = 0;
1639 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1640 
1641 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1642 					   __func__, sc->open_device_map));
1643 
1644 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1645 
1646 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1647 
1648 		if ((sc->flags & FW_UPTODATE) == 0)
1649 			if ((err = upgrade_fw(sc)))
1650 				goto out;
1651 
1652 		if ((sc->flags & TPS_UPTODATE) == 0)
1653 			if ((err = update_tpsram(sc)))
1654 				goto out;
1655 
1656 		if (is_offload(sc) && nfilters != 0) {
1657 			sc->params.mc5.nservers = 0;
1658 
1659 			if (nfilters < 0)
1660 				sc->params.mc5.nfilters = mxf;
1661 			else
1662 				sc->params.mc5.nfilters = min(nfilters, mxf);
1663 		}
1664 
1665 		err = t3_init_hw(sc, 0);
1666 		if (err)
1667 			goto out;
1668 
1669 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1670 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1671 
1672 		err = setup_sge_qsets(sc);
1673 		if (err)
1674 			goto out;
1675 
1676 		alloc_filters(sc);
1677 		setup_rss(sc);
1678 
1679 		t3_intr_clear(sc);
1680 		err = cxgb_setup_interrupts(sc);
1681 		if (err)
1682 			goto out;
1683 
1684 		t3_add_configured_sysctls(sc);
1685 		sc->flags |= FULL_INIT_DONE;
1686 	}
1687 
1688 	t3_intr_clear(sc);
1689 	t3_sge_start(sc);
1690 	t3_intr_enable(sc);
1691 
1692 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1693 	    is_offload(sc) && init_tp_parity(sc) == 0)
1694 		sc->flags |= TP_PARITY_INIT;
1695 
1696 	if (sc->flags & TP_PARITY_INIT) {
1697 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1698 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1699 	}
1700 
1701 	if (!(sc->flags & QUEUES_BOUND)) {
1702 		bind_qsets(sc);
1703 		setup_hw_filters(sc);
1704 		sc->flags |= QUEUES_BOUND;
1705 	}
1706 
1707 	t3_sge_reset_adapter(sc);
1708 out:
1709 	return (err);
1710 }
1711 
1712 /*
1713  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1714  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1715  * during controller_detach, not here.
1716  */
1717 static void
1718 cxgb_down(struct adapter *sc)
1719 {
1720 	t3_sge_stop(sc);
1721 	t3_intr_disable(sc);
1722 }
1723 
1724 static int
1725 offload_open(struct port_info *pi)
1726 {
1727 	struct adapter *sc = pi->adapter;
1728 	struct t3cdev *tdev = &sc->tdev;
1729 
1730 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1731 
1732 	t3_tp_set_offload_mode(sc, 1);
1733 	tdev->lldev = pi->ifp;
1734 	init_port_mtus(sc);
1735 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1736 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1737 	init_smt(sc);
1738 	cxgb_add_clients(tdev);
1739 
1740 	return (0);
1741 }
1742 
1743 static int
1744 offload_close(struct t3cdev *tdev)
1745 {
1746 	struct adapter *adapter = tdev2adap(tdev);
1747 
1748 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1749 		return (0);
1750 
1751 	/* Call back all registered clients */
1752 	cxgb_remove_clients(tdev);
1753 
1754 	tdev->lldev = NULL;
1755 	cxgb_set_dummy_ops(tdev);
1756 	t3_tp_set_offload_mode(adapter, 0);
1757 
1758 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1759 
1760 	return (0);
1761 }
1762 
1763 /*
1764  * if_init for cxgb ports.
1765  */
1766 static void
1767 cxgb_init(void *arg)
1768 {
1769 	struct port_info *p = arg;
1770 	struct adapter *sc = p->adapter;
1771 
1772 	ADAPTER_LOCK(sc);
1773 	cxgb_init_locked(p); /* releases adapter lock */
1774 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1775 }
1776 
1777 static int
1778 cxgb_init_locked(struct port_info *p)
1779 {
1780 	struct adapter *sc = p->adapter;
1781 	struct ifnet *ifp = p->ifp;
1782 	struct cmac *mac = &p->mac;
1783 	int i, rc = 0, may_sleep = 0;
1784 
1785 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1786 
1787 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1788 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1789 			rc = EINTR;
1790 			goto done;
1791 		}
1792 	}
1793 	if (IS_DOOMED(p)) {
1794 		rc = ENXIO;
1795 		goto done;
1796 	}
1797 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1798 
1799 	/*
1800 	 * The code that runs during one-time adapter initialization can sleep
1801 	 * so it's important not to hold any locks across it.
1802 	 */
1803 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1804 
1805 	if (may_sleep) {
1806 		SET_BUSY(sc);
1807 		ADAPTER_UNLOCK(sc);
1808 	}
1809 
1810 	if (sc->open_device_map == 0) {
1811 		if ((rc = cxgb_up(sc)) != 0)
1812 			goto done;
1813 
1814 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1815 			log(LOG_WARNING,
1816 			    "Could not initialize offload capabilities\n");
1817 	}
1818 
1819 	PORT_LOCK(p);
1820 	if (isset(&sc->open_device_map, p->port_id) &&
1821 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1822 		PORT_UNLOCK(p);
1823 		goto done;
1824 	}
1825 	t3_port_intr_enable(sc, p->port_id);
1826 	if (!mac->multiport)
1827 		t3_mac_init(mac);
1828 	cxgb_update_mac_settings(p);
1829 	t3_link_start(&p->phy, mac, &p->link_config);
1830 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1831 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1832 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1833 	PORT_UNLOCK(p);
1834 
1835 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1836 		struct sge_qset *qs = &sc->sge.qs[i];
1837 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1838 
1839 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1840 				 txq->txq_watchdog.c_cpu);
1841 	}
1842 
1843 	/* all ok */
1844 	setbit(&sc->open_device_map, p->port_id);
1845 	callout_reset(&p->link_check_ch,
1846 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1847 	    link_check_callout, p);
1848 
1849 done:
1850 	if (may_sleep) {
1851 		ADAPTER_LOCK(sc);
1852 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1853 		CLR_BUSY(sc);
1854 		wakeup_one(&sc->flags);
1855 	}
1856 	ADAPTER_UNLOCK(sc);
1857 	return (rc);
1858 }
1859 
1860 static int
1861 cxgb_uninit_locked(struct port_info *p)
1862 {
1863 	struct adapter *sc = p->adapter;
1864 	int rc;
1865 
1866 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1867 
1868 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1869 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1870 			rc = EINTR;
1871 			goto done;
1872 		}
1873 	}
1874 	if (IS_DOOMED(p)) {
1875 		rc = ENXIO;
1876 		goto done;
1877 	}
1878 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1879 	SET_BUSY(sc);
1880 	ADAPTER_UNLOCK(sc);
1881 
1882 	rc = cxgb_uninit_synchronized(p);
1883 
1884 	ADAPTER_LOCK(sc);
1885 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1886 	CLR_BUSY(sc);
1887 	wakeup_one(&sc->flags);
1888 done:
1889 	ADAPTER_UNLOCK(sc);
1890 	return (rc);
1891 }
1892 
1893 /*
1894  * Called on "ifconfig down", and from port_detach
1895  */
1896 static int
1897 cxgb_uninit_synchronized(struct port_info *pi)
1898 {
1899 	struct adapter *sc = pi->adapter;
1900 	struct ifnet *ifp = pi->ifp;
1901 
1902 	/*
1903 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1904 	 */
1905 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1906 
1907 	/*
1908 	 * Clear this port's bit from the open device map, and then drain all
1909 	 * the tasks that can access/manipulate this port's port_info or ifp.
1910 	 * We disable this port's interrupts here and so the the slow/ext
1911 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1912 	 * be enqueued every second but the runs after this drain will not see
1913 	 * this port in the open device map.
1914 	 *
1915 	 * A well behaved task must take open_device_map into account and ignore
1916 	 * ports that are not open.
1917 	 */
1918 	clrbit(&sc->open_device_map, pi->port_id);
1919 	t3_port_intr_disable(sc, pi->port_id);
1920 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1921 	taskqueue_drain(sc->tq, &sc->tick_task);
1922 
1923 	callout_drain(&pi->link_check_ch);
1924 	taskqueue_drain(sc->tq, &pi->link_check_task);
1925 
1926 	PORT_LOCK(pi);
1927 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1928 
1929 	/* disable pause frames */
1930 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1931 
1932 	/* Reset RX FIFO HWM */
1933 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1934 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1935 
1936 	DELAY(100 * 1000);
1937 
1938 	/* Wait for TXFIFO empty */
1939 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1940 			F_TXFIFO_EMPTY, 1, 20, 5);
1941 
1942 	DELAY(100 * 1000);
1943 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1944 
1945 
1946 	pi->phy.ops->power_down(&pi->phy, 1);
1947 
1948 	PORT_UNLOCK(pi);
1949 
1950 	pi->link_config.link_ok = 0;
1951 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1952 
1953 	if ((sc->open_device_map & PORT_MASK) == 0)
1954 		offload_close(&sc->tdev);
1955 
1956 	if (sc->open_device_map == 0)
1957 		cxgb_down(pi->adapter);
1958 
1959 	return (0);
1960 }
1961 
1962 /*
1963  * Mark lro enabled or disabled in all qsets for this port
1964  */
1965 static int
1966 cxgb_set_lro(struct port_info *p, int enabled)
1967 {
1968 	int i;
1969 	struct adapter *adp = p->adapter;
1970 	struct sge_qset *q;
1971 
1972 	for (i = 0; i < p->nqsets; i++) {
1973 		q = &adp->sge.qs[p->first_qset + i];
1974 		q->lro.enabled = (enabled != 0);
1975 	}
1976 	return (0);
1977 }
1978 
1979 static int
1980 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1981 {
1982 	struct port_info *p = ifp->if_softc;
1983 	struct adapter *sc = p->adapter;
1984 	struct ifreq *ifr = (struct ifreq *)data;
1985 	int flags, error = 0, mtu;
1986 	uint32_t mask;
1987 
1988 	switch (command) {
1989 	case SIOCSIFMTU:
1990 		ADAPTER_LOCK(sc);
1991 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1992 		if (error) {
1993 fail:
1994 			ADAPTER_UNLOCK(sc);
1995 			return (error);
1996 		}
1997 
1998 		mtu = ifr->ifr_mtu;
1999 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2000 			error = EINVAL;
2001 		} else {
2002 			ifp->if_mtu = mtu;
2003 			PORT_LOCK(p);
2004 			cxgb_update_mac_settings(p);
2005 			PORT_UNLOCK(p);
2006 		}
2007 		ADAPTER_UNLOCK(sc);
2008 		break;
2009 	case SIOCSIFFLAGS:
2010 		ADAPTER_LOCK(sc);
2011 		if (IS_DOOMED(p)) {
2012 			error = ENXIO;
2013 			goto fail;
2014 		}
2015 		if (ifp->if_flags & IFF_UP) {
2016 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2017 				flags = p->if_flags;
2018 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2019 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2020 					if (IS_BUSY(sc)) {
2021 						error = EBUSY;
2022 						goto fail;
2023 					}
2024 					PORT_LOCK(p);
2025 					cxgb_update_mac_settings(p);
2026 					PORT_UNLOCK(p);
2027 				}
2028 				ADAPTER_UNLOCK(sc);
2029 			} else
2030 				error = cxgb_init_locked(p);
2031 			p->if_flags = ifp->if_flags;
2032 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2033 			error = cxgb_uninit_locked(p);
2034 		else
2035 			ADAPTER_UNLOCK(sc);
2036 
2037 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2038 		break;
2039 	case SIOCADDMULTI:
2040 	case SIOCDELMULTI:
2041 		ADAPTER_LOCK(sc);
2042 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2043 		if (error)
2044 			goto fail;
2045 
2046 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2047 			PORT_LOCK(p);
2048 			cxgb_update_mac_settings(p);
2049 			PORT_UNLOCK(p);
2050 		}
2051 		ADAPTER_UNLOCK(sc);
2052 
2053 		break;
2054 	case SIOCSIFCAP:
2055 		ADAPTER_LOCK(sc);
2056 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2057 		if (error)
2058 			goto fail;
2059 
2060 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2061 		if (mask & IFCAP_TXCSUM) {
2062 			ifp->if_capenable ^= IFCAP_TXCSUM;
2063 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2064 
2065 			if (IFCAP_TSO & ifp->if_capenable &&
2066 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2067 				ifp->if_capenable &= ~IFCAP_TSO;
2068 				ifp->if_hwassist &= ~CSUM_TSO;
2069 				if_printf(ifp,
2070 				    "tso disabled due to -txcsum.\n");
2071 			}
2072 		}
2073 		if (mask & IFCAP_RXCSUM)
2074 			ifp->if_capenable ^= IFCAP_RXCSUM;
2075 		if (mask & IFCAP_TSO4) {
2076 			ifp->if_capenable ^= IFCAP_TSO4;
2077 
2078 			if (IFCAP_TSO & ifp->if_capenable) {
2079 				if (IFCAP_TXCSUM & ifp->if_capenable)
2080 					ifp->if_hwassist |= CSUM_TSO;
2081 				else {
2082 					ifp->if_capenable &= ~IFCAP_TSO;
2083 					ifp->if_hwassist &= ~CSUM_TSO;
2084 					if_printf(ifp,
2085 					    "enable txcsum first.\n");
2086 					error = EAGAIN;
2087 				}
2088 			} else
2089 				ifp->if_hwassist &= ~CSUM_TSO;
2090 		}
2091 		if (mask & IFCAP_LRO) {
2092 			ifp->if_capenable ^= IFCAP_LRO;
2093 
2094 			/* Safe to do this even if cxgb_up not called yet */
2095 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2096 		}
2097 		if (mask & IFCAP_VLAN_HWTAGGING) {
2098 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2099 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2100 				PORT_LOCK(p);
2101 				cxgb_update_mac_settings(p);
2102 				PORT_UNLOCK(p);
2103 			}
2104 		}
2105 		if (mask & IFCAP_VLAN_MTU) {
2106 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2107 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2108 				PORT_LOCK(p);
2109 				cxgb_update_mac_settings(p);
2110 				PORT_UNLOCK(p);
2111 			}
2112 		}
2113 		if (mask & IFCAP_VLAN_HWTSO)
2114 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2115 		if (mask & IFCAP_VLAN_HWCSUM)
2116 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2117 
2118 #ifdef VLAN_CAPABILITIES
2119 		VLAN_CAPABILITIES(ifp);
2120 #endif
2121 		ADAPTER_UNLOCK(sc);
2122 		break;
2123 	case SIOCSIFMEDIA:
2124 	case SIOCGIFMEDIA:
2125 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2126 		break;
2127 	default:
2128 		error = ether_ioctl(ifp, command, data);
2129 	}
2130 
2131 	return (error);
2132 }
2133 
2134 static int
2135 cxgb_media_change(struct ifnet *ifp)
2136 {
2137 	return (EOPNOTSUPP);
2138 }
2139 
2140 /*
2141  * Translates phy->modtype to the correct Ethernet media subtype.
2142  */
2143 static int
2144 cxgb_ifm_type(int mod)
2145 {
2146 	switch (mod) {
2147 	case phy_modtype_sr:
2148 		return (IFM_10G_SR);
2149 	case phy_modtype_lr:
2150 		return (IFM_10G_LR);
2151 	case phy_modtype_lrm:
2152 		return (IFM_10G_LRM);
2153 	case phy_modtype_twinax:
2154 		return (IFM_10G_TWINAX);
2155 	case phy_modtype_twinax_long:
2156 		return (IFM_10G_TWINAX_LONG);
2157 	case phy_modtype_none:
2158 		return (IFM_NONE);
2159 	case phy_modtype_unknown:
2160 		return (IFM_UNKNOWN);
2161 	}
2162 
2163 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2164 	return (IFM_UNKNOWN);
2165 }
2166 
2167 /*
2168  * Rebuilds the ifmedia list for this port, and sets the current media.
2169  */
2170 static void
2171 cxgb_build_medialist(struct port_info *p)
2172 {
2173 	struct cphy *phy = &p->phy;
2174 	struct ifmedia *media = &p->media;
2175 	int mod = phy->modtype;
2176 	int m = IFM_ETHER | IFM_FDX;
2177 
2178 	PORT_LOCK(p);
2179 
2180 	ifmedia_removeall(media);
2181 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2182 		/* Copper (RJ45) */
2183 
2184 		if (phy->caps & SUPPORTED_10000baseT_Full)
2185 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2186 
2187 		if (phy->caps & SUPPORTED_1000baseT_Full)
2188 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2189 
2190 		if (phy->caps & SUPPORTED_100baseT_Full)
2191 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2192 
2193 		if (phy->caps & SUPPORTED_10baseT_Full)
2194 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2195 
2196 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2197 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2198 
2199 	} else if (phy->caps & SUPPORTED_TP) {
2200 		/* Copper (CX4) */
2201 
2202 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2203 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2204 
2205 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2206 		ifmedia_set(media, m | IFM_10G_CX4);
2207 
2208 	} else if (phy->caps & SUPPORTED_FIBRE &&
2209 		   phy->caps & SUPPORTED_10000baseT_Full) {
2210 		/* 10G optical (but includes SFP+ twinax) */
2211 
2212 		m |= cxgb_ifm_type(mod);
2213 		if (IFM_SUBTYPE(m) == IFM_NONE)
2214 			m &= ~IFM_FDX;
2215 
2216 		ifmedia_add(media, m, mod, NULL);
2217 		ifmedia_set(media, m);
2218 
2219 	} else if (phy->caps & SUPPORTED_FIBRE &&
2220 		   phy->caps & SUPPORTED_1000baseT_Full) {
2221 		/* 1G optical */
2222 
2223 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2224 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2225 		ifmedia_set(media, m | IFM_1000_SX);
2226 
2227 	} else {
2228 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2229 			    phy->caps));
2230 	}
2231 
2232 	PORT_UNLOCK(p);
2233 }
2234 
2235 static void
2236 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2237 {
2238 	struct port_info *p = ifp->if_softc;
2239 	struct ifmedia_entry *cur = p->media.ifm_cur;
2240 	int speed = p->link_config.speed;
2241 
2242 	if (cur->ifm_data != p->phy.modtype) {
2243 		cxgb_build_medialist(p);
2244 		cur = p->media.ifm_cur;
2245 	}
2246 
2247 	ifmr->ifm_status = IFM_AVALID;
2248 	if (!p->link_config.link_ok)
2249 		return;
2250 
2251 	ifmr->ifm_status |= IFM_ACTIVE;
2252 
2253 	/*
2254 	 * active and current will differ iff current media is autoselect.  That
2255 	 * can happen only for copper RJ45.
2256 	 */
2257 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2258 		return;
2259 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2260 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2261 
2262 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2263 	if (speed == SPEED_10000)
2264 		ifmr->ifm_active |= IFM_10G_T;
2265 	else if (speed == SPEED_1000)
2266 		ifmr->ifm_active |= IFM_1000_T;
2267 	else if (speed == SPEED_100)
2268 		ifmr->ifm_active |= IFM_100_TX;
2269 	else if (speed == SPEED_10)
2270 		ifmr->ifm_active |= IFM_10_T;
2271 	else
2272 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2273 			    speed));
2274 }
2275 
2276 static void
2277 cxgb_async_intr(void *data)
2278 {
2279 	adapter_t *sc = data;
2280 
2281 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2282 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2283 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2284 }
2285 
2286 static void
2287 link_check_callout(void *arg)
2288 {
2289 	struct port_info *pi = arg;
2290 	struct adapter *sc = pi->adapter;
2291 
2292 	if (!isset(&sc->open_device_map, pi->port_id))
2293 		return;
2294 
2295 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2296 }
2297 
2298 static void
2299 check_link_status(void *arg, int pending)
2300 {
2301 	struct port_info *pi = arg;
2302 	struct adapter *sc = pi->adapter;
2303 
2304 	if (!isset(&sc->open_device_map, pi->port_id))
2305 		return;
2306 
2307 	t3_link_changed(sc, pi->port_id);
2308 
2309 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2310 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2311 }
2312 
2313 void
2314 t3_os_link_intr(struct port_info *pi)
2315 {
2316 	/*
2317 	 * Schedule a link check in the near future.  If the link is flapping
2318 	 * rapidly we'll keep resetting the callout and delaying the check until
2319 	 * things stabilize a bit.
2320 	 */
2321 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2322 }
2323 
2324 static void
2325 check_t3b2_mac(struct adapter *sc)
2326 {
2327 	int i;
2328 
2329 	if (sc->flags & CXGB_SHUTDOWN)
2330 		return;
2331 
2332 	for_each_port(sc, i) {
2333 		struct port_info *p = &sc->port[i];
2334 		int status;
2335 #ifdef INVARIANTS
2336 		struct ifnet *ifp = p->ifp;
2337 #endif
2338 
2339 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2340 		    !p->link_config.link_ok)
2341 			continue;
2342 
2343 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2344 			("%s: state mismatch (drv_flags %x, device_map %x)",
2345 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2346 
2347 		PORT_LOCK(p);
2348 		status = t3b2_mac_watchdog_task(&p->mac);
2349 		if (status == 1)
2350 			p->mac.stats.num_toggled++;
2351 		else if (status == 2) {
2352 			struct cmac *mac = &p->mac;
2353 
2354 			cxgb_update_mac_settings(p);
2355 			t3_link_start(&p->phy, mac, &p->link_config);
2356 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2357 			t3_port_intr_enable(sc, p->port_id);
2358 			p->mac.stats.num_resets++;
2359 		}
2360 		PORT_UNLOCK(p);
2361 	}
2362 }
2363 
2364 static void
2365 cxgb_tick(void *arg)
2366 {
2367 	adapter_t *sc = (adapter_t *)arg;
2368 
2369 	if (sc->flags & CXGB_SHUTDOWN)
2370 		return;
2371 
2372 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2373 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2374 }
2375 
2376 static void
2377 cxgb_tick_handler(void *arg, int count)
2378 {
2379 	adapter_t *sc = (adapter_t *)arg;
2380 	const struct adapter_params *p = &sc->params;
2381 	int i;
2382 	uint32_t cause, reset;
2383 
2384 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2385 		return;
2386 
2387 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2388 		check_t3b2_mac(sc);
2389 
2390 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2391 	if (cause) {
2392 		struct sge_qset *qs = &sc->sge.qs[0];
2393 		uint32_t mask, v;
2394 
2395 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2396 
2397 		mask = 1;
2398 		for (i = 0; i < SGE_QSETS; i++) {
2399 			if (v & mask)
2400 				qs[i].rspq.starved++;
2401 			mask <<= 1;
2402 		}
2403 
2404 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2405 
2406 		for (i = 0; i < SGE_QSETS * 2; i++) {
2407 			if (v & mask) {
2408 				qs[i / 2].fl[i % 2].empty++;
2409 			}
2410 			mask <<= 1;
2411 		}
2412 
2413 		/* clear */
2414 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2415 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2416 	}
2417 
2418 	for (i = 0; i < sc->params.nports; i++) {
2419 		struct port_info *pi = &sc->port[i];
2420 		struct ifnet *ifp = pi->ifp;
2421 		struct cmac *mac = &pi->mac;
2422 		struct mac_stats *mstats = &mac->stats;
2423 		int drops, j;
2424 
2425 		if (!isset(&sc->open_device_map, pi->port_id))
2426 			continue;
2427 
2428 		PORT_LOCK(pi);
2429 		t3_mac_update_stats(mac);
2430 		PORT_UNLOCK(pi);
2431 
2432 		ifp->if_opackets = mstats->tx_frames;
2433 		ifp->if_ipackets = mstats->rx_frames;
2434 		ifp->if_obytes = mstats->tx_octets;
2435 		ifp->if_ibytes = mstats->rx_octets;
2436 		ifp->if_omcasts = mstats->tx_mcast_frames;
2437 		ifp->if_imcasts = mstats->rx_mcast_frames;
2438 		ifp->if_collisions = mstats->tx_total_collisions;
2439 		ifp->if_iqdrops = mstats->rx_cong_drops;
2440 
2441 		drops = 0;
2442 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2443 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2444 		ifp->if_snd.ifq_drops = drops;
2445 
2446 		ifp->if_oerrors =
2447 		    mstats->tx_excess_collisions +
2448 		    mstats->tx_underrun +
2449 		    mstats->tx_len_errs +
2450 		    mstats->tx_mac_internal_errs +
2451 		    mstats->tx_excess_deferral +
2452 		    mstats->tx_fcs_errs;
2453 		ifp->if_ierrors =
2454 		    mstats->rx_jabber +
2455 		    mstats->rx_data_errs +
2456 		    mstats->rx_sequence_errs +
2457 		    mstats->rx_runt +
2458 		    mstats->rx_too_long +
2459 		    mstats->rx_mac_internal_errs +
2460 		    mstats->rx_short +
2461 		    mstats->rx_fcs_errs;
2462 
2463 		if (mac->multiport)
2464 			continue;
2465 
2466 		/* Count rx fifo overflows, once per second */
2467 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2468 		reset = 0;
2469 		if (cause & F_RXFIFO_OVERFLOW) {
2470 			mac->stats.rx_fifo_ovfl++;
2471 			reset |= F_RXFIFO_OVERFLOW;
2472 		}
2473 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2474 	}
2475 }
2476 
2477 static void
2478 touch_bars(device_t dev)
2479 {
2480 	/*
2481 	 * Don't enable yet
2482 	 */
2483 #if !defined(__LP64__) && 0
2484 	u32 v;
2485 
2486 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2487 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2488 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2489 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2490 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2491 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2492 #endif
2493 }
2494 
2495 static int
2496 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2497 {
2498 	uint8_t *buf;
2499 	int err = 0;
2500 	u32 aligned_offset, aligned_len, *p;
2501 	struct adapter *adapter = pi->adapter;
2502 
2503 
2504 	aligned_offset = offset & ~3;
2505 	aligned_len = (len + (offset & 3) + 3) & ~3;
2506 
2507 	if (aligned_offset != offset || aligned_len != len) {
2508 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2509 		if (!buf)
2510 			return (ENOMEM);
2511 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2512 		if (!err && aligned_len > 4)
2513 			err = t3_seeprom_read(adapter,
2514 					      aligned_offset + aligned_len - 4,
2515 					      (u32 *)&buf[aligned_len - 4]);
2516 		if (err)
2517 			goto out;
2518 		memcpy(buf + (offset & 3), data, len);
2519 	} else
2520 		buf = (uint8_t *)(uintptr_t)data;
2521 
2522 	err = t3_seeprom_wp(adapter, 0);
2523 	if (err)
2524 		goto out;
2525 
2526 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2527 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2528 		aligned_offset += 4;
2529 	}
2530 
2531 	if (!err)
2532 		err = t3_seeprom_wp(adapter, 1);
2533 out:
2534 	if (buf != data)
2535 		free(buf, M_DEVBUF);
2536 	return err;
2537 }
2538 
2539 
2540 static int
2541 in_range(int val, int lo, int hi)
2542 {
2543 	return val < 0 || (val <= hi && val >= lo);
2544 }
2545 
2546 static int
2547 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2548 {
2549        return (0);
2550 }
2551 
2552 static int
2553 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2554 {
2555        return (0);
2556 }
2557 
2558 static int
2559 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2560     int fflag, struct thread *td)
2561 {
2562 	int mmd, error = 0;
2563 	struct port_info *pi = dev->si_drv1;
2564 	adapter_t *sc = pi->adapter;
2565 
2566 #ifdef PRIV_SUPPORTED
2567 	if (priv_check(td, PRIV_DRIVER)) {
2568 		if (cxgb_debug)
2569 			printf("user does not have access to privileged ioctls\n");
2570 		return (EPERM);
2571 	}
2572 #else
2573 	if (suser(td)) {
2574 		if (cxgb_debug)
2575 			printf("user does not have access to privileged ioctls\n");
2576 		return (EPERM);
2577 	}
2578 #endif
2579 
2580 	switch (cmd) {
2581 	case CHELSIO_GET_MIIREG: {
2582 		uint32_t val;
2583 		struct cphy *phy = &pi->phy;
2584 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2585 
2586 		if (!phy->mdio_read)
2587 			return (EOPNOTSUPP);
2588 		if (is_10G(sc)) {
2589 			mmd = mid->phy_id >> 8;
2590 			if (!mmd)
2591 				mmd = MDIO_DEV_PCS;
2592 			else if (mmd > MDIO_DEV_VEND2)
2593 				return (EINVAL);
2594 
2595 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2596 					     mid->reg_num, &val);
2597 		} else
2598 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2599 					     mid->reg_num & 0x1f, &val);
2600 		if (error == 0)
2601 			mid->val_out = val;
2602 		break;
2603 	}
2604 	case CHELSIO_SET_MIIREG: {
2605 		struct cphy *phy = &pi->phy;
2606 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2607 
2608 		if (!phy->mdio_write)
2609 			return (EOPNOTSUPP);
2610 		if (is_10G(sc)) {
2611 			mmd = mid->phy_id >> 8;
2612 			if (!mmd)
2613 				mmd = MDIO_DEV_PCS;
2614 			else if (mmd > MDIO_DEV_VEND2)
2615 				return (EINVAL);
2616 
2617 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2618 					      mmd, mid->reg_num, mid->val_in);
2619 		} else
2620 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2621 					      mid->reg_num & 0x1f,
2622 					      mid->val_in);
2623 		break;
2624 	}
2625 	case CHELSIO_SETREG: {
2626 		struct ch_reg *edata = (struct ch_reg *)data;
2627 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2628 			return (EFAULT);
2629 		t3_write_reg(sc, edata->addr, edata->val);
2630 		break;
2631 	}
2632 	case CHELSIO_GETREG: {
2633 		struct ch_reg *edata = (struct ch_reg *)data;
2634 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2635 			return (EFAULT);
2636 		edata->val = t3_read_reg(sc, edata->addr);
2637 		break;
2638 	}
2639 	case CHELSIO_GET_SGE_CONTEXT: {
2640 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2641 		mtx_lock_spin(&sc->sge.reg_lock);
2642 		switch (ecntxt->cntxt_type) {
2643 		case CNTXT_TYPE_EGRESS:
2644 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2645 			    ecntxt->data);
2646 			break;
2647 		case CNTXT_TYPE_FL:
2648 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2649 			    ecntxt->data);
2650 			break;
2651 		case CNTXT_TYPE_RSP:
2652 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2653 			    ecntxt->data);
2654 			break;
2655 		case CNTXT_TYPE_CQ:
2656 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2657 			    ecntxt->data);
2658 			break;
2659 		default:
2660 			error = EINVAL;
2661 			break;
2662 		}
2663 		mtx_unlock_spin(&sc->sge.reg_lock);
2664 		break;
2665 	}
2666 	case CHELSIO_GET_SGE_DESC: {
2667 		struct ch_desc *edesc = (struct ch_desc *)data;
2668 		int ret;
2669 		if (edesc->queue_num >= SGE_QSETS * 6)
2670 			return (EINVAL);
2671 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2672 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2673 		if (ret < 0)
2674 			return (EINVAL);
2675 		edesc->size = ret;
2676 		break;
2677 	}
2678 	case CHELSIO_GET_QSET_PARAMS: {
2679 		struct qset_params *q;
2680 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2681 		int q1 = pi->first_qset;
2682 		int nqsets = pi->nqsets;
2683 		int i;
2684 
2685 		if (t->qset_idx >= nqsets)
2686 			return EINVAL;
2687 
2688 		i = q1 + t->qset_idx;
2689 		q = &sc->params.sge.qset[i];
2690 		t->rspq_size   = q->rspq_size;
2691 		t->txq_size[0] = q->txq_size[0];
2692 		t->txq_size[1] = q->txq_size[1];
2693 		t->txq_size[2] = q->txq_size[2];
2694 		t->fl_size[0]  = q->fl_size;
2695 		t->fl_size[1]  = q->jumbo_size;
2696 		t->polling     = q->polling;
2697 		t->lro         = q->lro;
2698 		t->intr_lat    = q->coalesce_usecs;
2699 		t->cong_thres  = q->cong_thres;
2700 		t->qnum        = i;
2701 
2702 		if ((sc->flags & FULL_INIT_DONE) == 0)
2703 			t->vector = 0;
2704 		else if (sc->flags & USING_MSIX)
2705 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2706 		else
2707 			t->vector = rman_get_start(sc->irq_res);
2708 
2709 		break;
2710 	}
2711 	case CHELSIO_GET_QSET_NUM: {
2712 		struct ch_reg *edata = (struct ch_reg *)data;
2713 		edata->val = pi->nqsets;
2714 		break;
2715 	}
2716 	case CHELSIO_LOAD_FW: {
2717 		uint8_t *fw_data;
2718 		uint32_t vers;
2719 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2720 
2721 		/*
2722 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2723 		 *
2724 		 * FW_UPTODATE is also set so the rest of the initialization
2725 		 * will not overwrite what was loaded here.  This gives you the
2726 		 * flexibility to load any firmware (and maybe shoot yourself in
2727 		 * the foot).
2728 		 */
2729 
2730 		ADAPTER_LOCK(sc);
2731 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2732 			ADAPTER_UNLOCK(sc);
2733 			return (EBUSY);
2734 		}
2735 
2736 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2737 		if (!fw_data)
2738 			error = ENOMEM;
2739 		else
2740 			error = copyin(t->buf, fw_data, t->len);
2741 
2742 		if (!error)
2743 			error = -t3_load_fw(sc, fw_data, t->len);
2744 
2745 		if (t3_get_fw_version(sc, &vers) == 0) {
2746 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2747 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2748 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2749 		}
2750 
2751 		if (!error)
2752 			sc->flags |= FW_UPTODATE;
2753 
2754 		free(fw_data, M_DEVBUF);
2755 		ADAPTER_UNLOCK(sc);
2756 		break;
2757 	}
2758 	case CHELSIO_LOAD_BOOT: {
2759 		uint8_t *boot_data;
2760 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2761 
2762 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2763 		if (!boot_data)
2764 			return ENOMEM;
2765 
2766 		error = copyin(t->buf, boot_data, t->len);
2767 		if (!error)
2768 			error = -t3_load_boot(sc, boot_data, t->len);
2769 
2770 		free(boot_data, M_DEVBUF);
2771 		break;
2772 	}
2773 	case CHELSIO_GET_PM: {
2774 		struct ch_pm *m = (struct ch_pm *)data;
2775 		struct tp_params *p = &sc->params.tp;
2776 
2777 		if (!is_offload(sc))
2778 			return (EOPNOTSUPP);
2779 
2780 		m->tx_pg_sz = p->tx_pg_size;
2781 		m->tx_num_pg = p->tx_num_pgs;
2782 		m->rx_pg_sz  = p->rx_pg_size;
2783 		m->rx_num_pg = p->rx_num_pgs;
2784 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2785 
2786 		break;
2787 	}
2788 	case CHELSIO_SET_PM: {
2789 		struct ch_pm *m = (struct ch_pm *)data;
2790 		struct tp_params *p = &sc->params.tp;
2791 
2792 		if (!is_offload(sc))
2793 			return (EOPNOTSUPP);
2794 		if (sc->flags & FULL_INIT_DONE)
2795 			return (EBUSY);
2796 
2797 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2798 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2799 			return (EINVAL);	/* not power of 2 */
2800 		if (!(m->rx_pg_sz & 0x14000))
2801 			return (EINVAL);	/* not 16KB or 64KB */
2802 		if (!(m->tx_pg_sz & 0x1554000))
2803 			return (EINVAL);
2804 		if (m->tx_num_pg == -1)
2805 			m->tx_num_pg = p->tx_num_pgs;
2806 		if (m->rx_num_pg == -1)
2807 			m->rx_num_pg = p->rx_num_pgs;
2808 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2809 			return (EINVAL);
2810 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2811 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2812 			return (EINVAL);
2813 
2814 		p->rx_pg_size = m->rx_pg_sz;
2815 		p->tx_pg_size = m->tx_pg_sz;
2816 		p->rx_num_pgs = m->rx_num_pg;
2817 		p->tx_num_pgs = m->tx_num_pg;
2818 		break;
2819 	}
2820 	case CHELSIO_SETMTUTAB: {
2821 		struct ch_mtus *m = (struct ch_mtus *)data;
2822 		int i;
2823 
2824 		if (!is_offload(sc))
2825 			return (EOPNOTSUPP);
2826 		if (offload_running(sc))
2827 			return (EBUSY);
2828 		if (m->nmtus != NMTUS)
2829 			return (EINVAL);
2830 		if (m->mtus[0] < 81)         /* accommodate SACK */
2831 			return (EINVAL);
2832 
2833 		/*
2834 		 * MTUs must be in ascending order
2835 		 */
2836 		for (i = 1; i < NMTUS; ++i)
2837 			if (m->mtus[i] < m->mtus[i - 1])
2838 				return (EINVAL);
2839 
2840 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2841 		break;
2842 	}
2843 	case CHELSIO_GETMTUTAB: {
2844 		struct ch_mtus *m = (struct ch_mtus *)data;
2845 
2846 		if (!is_offload(sc))
2847 			return (EOPNOTSUPP);
2848 
2849 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2850 		m->nmtus = NMTUS;
2851 		break;
2852 	}
2853 	case CHELSIO_GET_MEM: {
2854 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2855 		struct mc7 *mem;
2856 		uint8_t *useraddr;
2857 		u64 buf[32];
2858 
2859 		/*
2860 		 * Use these to avoid modifying len/addr in the the return
2861 		 * struct
2862 		 */
2863 		uint32_t len = t->len, addr = t->addr;
2864 
2865 		if (!is_offload(sc))
2866 			return (EOPNOTSUPP);
2867 		if (!(sc->flags & FULL_INIT_DONE))
2868 			return (EIO);         /* need the memory controllers */
2869 		if ((addr & 0x7) || (len & 0x7))
2870 			return (EINVAL);
2871 		if (t->mem_id == MEM_CM)
2872 			mem = &sc->cm;
2873 		else if (t->mem_id == MEM_PMRX)
2874 			mem = &sc->pmrx;
2875 		else if (t->mem_id == MEM_PMTX)
2876 			mem = &sc->pmtx;
2877 		else
2878 			return (EINVAL);
2879 
2880 		/*
2881 		 * Version scheme:
2882 		 * bits 0..9: chip version
2883 		 * bits 10..15: chip revision
2884 		 */
2885 		t->version = 3 | (sc->params.rev << 10);
2886 
2887 		/*
2888 		 * Read 256 bytes at a time as len can be large and we don't
2889 		 * want to use huge intermediate buffers.
2890 		 */
2891 		useraddr = (uint8_t *)t->buf;
2892 		while (len) {
2893 			unsigned int chunk = min(len, sizeof(buf));
2894 
2895 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2896 			if (error)
2897 				return (-error);
2898 			if (copyout(buf, useraddr, chunk))
2899 				return (EFAULT);
2900 			useraddr += chunk;
2901 			addr += chunk;
2902 			len -= chunk;
2903 		}
2904 		break;
2905 	}
2906 	case CHELSIO_READ_TCAM_WORD: {
2907 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2908 
2909 		if (!is_offload(sc))
2910 			return (EOPNOTSUPP);
2911 		if (!(sc->flags & FULL_INIT_DONE))
2912 			return (EIO);         /* need MC5 */
2913 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2914 		break;
2915 	}
2916 	case CHELSIO_SET_TRACE_FILTER: {
2917 		struct ch_trace *t = (struct ch_trace *)data;
2918 		const struct trace_params *tp;
2919 
2920 		tp = (const struct trace_params *)&t->sip;
2921 		if (t->config_tx)
2922 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2923 					       t->trace_tx);
2924 		if (t->config_rx)
2925 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2926 					       t->trace_rx);
2927 		break;
2928 	}
2929 	case CHELSIO_SET_PKTSCHED: {
2930 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2931 		if (sc->open_device_map == 0)
2932 			return (EAGAIN);
2933 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2934 		    p->binding);
2935 		break;
2936 	}
2937 	case CHELSIO_IFCONF_GETREGS: {
2938 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2939 		int reglen = cxgb_get_regs_len();
2940 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2941 		if (buf == NULL) {
2942 			return (ENOMEM);
2943 		}
2944 		if (regs->len > reglen)
2945 			regs->len = reglen;
2946 		else if (regs->len < reglen)
2947 			error = ENOBUFS;
2948 
2949 		if (!error) {
2950 			cxgb_get_regs(sc, regs, buf);
2951 			error = copyout(buf, regs->data, reglen);
2952 		}
2953 		free(buf, M_DEVBUF);
2954 
2955 		break;
2956 	}
2957 	case CHELSIO_SET_HW_SCHED: {
2958 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2959 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2960 
2961 		if ((sc->flags & FULL_INIT_DONE) == 0)
2962 			return (EAGAIN);       /* need TP to be initialized */
2963 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2964 		    !in_range(t->channel, 0, 1) ||
2965 		    !in_range(t->kbps, 0, 10000000) ||
2966 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2967 		    !in_range(t->flow_ipg, 0,
2968 			      dack_ticks_to_usec(sc, 0x7ff)))
2969 			return (EINVAL);
2970 
2971 		if (t->kbps >= 0) {
2972 			error = t3_config_sched(sc, t->kbps, t->sched);
2973 			if (error < 0)
2974 				return (-error);
2975 		}
2976 		if (t->class_ipg >= 0)
2977 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2978 		if (t->flow_ipg >= 0) {
2979 			t->flow_ipg *= 1000;     /* us -> ns */
2980 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2981 		}
2982 		if (t->mode >= 0) {
2983 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2984 
2985 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2986 					 bit, t->mode ? bit : 0);
2987 		}
2988 		if (t->channel >= 0)
2989 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2990 					 1 << t->sched, t->channel << t->sched);
2991 		break;
2992 	}
2993 	case CHELSIO_GET_EEPROM: {
2994 		int i;
2995 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2996 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2997 
2998 		if (buf == NULL) {
2999 			return (ENOMEM);
3000 		}
3001 		e->magic = EEPROM_MAGIC;
3002 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3003 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3004 
3005 		if (!error)
3006 			error = copyout(buf + e->offset, e->data, e->len);
3007 
3008 		free(buf, M_DEVBUF);
3009 		break;
3010 	}
3011 	case CHELSIO_CLEAR_STATS: {
3012 		if (!(sc->flags & FULL_INIT_DONE))
3013 			return EAGAIN;
3014 
3015 		PORT_LOCK(pi);
3016 		t3_mac_update_stats(&pi->mac);
3017 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3018 		PORT_UNLOCK(pi);
3019 		break;
3020 	}
3021 	case CHELSIO_GET_UP_LA: {
3022 		struct ch_up_la *la = (struct ch_up_la *)data;
3023 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3024 		if (buf == NULL) {
3025 			return (ENOMEM);
3026 		}
3027 		if (la->bufsize < LA_BUFSIZE)
3028 			error = ENOBUFS;
3029 
3030 		if (!error)
3031 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3032 					      &la->bufsize, buf);
3033 		if (!error)
3034 			error = copyout(buf, la->data, la->bufsize);
3035 
3036 		free(buf, M_DEVBUF);
3037 		break;
3038 	}
3039 	case CHELSIO_GET_UP_IOQS: {
3040 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3041 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3042 		uint32_t *v;
3043 
3044 		if (buf == NULL) {
3045 			return (ENOMEM);
3046 		}
3047 		if (ioqs->bufsize < IOQS_BUFSIZE)
3048 			error = ENOBUFS;
3049 
3050 		if (!error)
3051 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3052 
3053 		if (!error) {
3054 			v = (uint32_t *)buf;
3055 
3056 			ioqs->ioq_rx_enable = *v++;
3057 			ioqs->ioq_tx_enable = *v++;
3058 			ioqs->ioq_rx_status = *v++;
3059 			ioqs->ioq_tx_status = *v++;
3060 
3061 			error = copyout(v, ioqs->data, ioqs->bufsize);
3062 		}
3063 
3064 		free(buf, M_DEVBUF);
3065 		break;
3066 	}
3067 	case CHELSIO_SET_FILTER: {
3068 		struct ch_filter *f = (struct ch_filter *)data;;
3069 		struct filter_info *p;
3070 		unsigned int nfilters = sc->params.mc5.nfilters;
3071 
3072 		if (!is_offload(sc))
3073 			return (EOPNOTSUPP);	/* No TCAM */
3074 		if (!(sc->flags & FULL_INIT_DONE))
3075 			return (EAGAIN);	/* mc5 not setup yet */
3076 		if (nfilters == 0)
3077 			return (EBUSY);		/* TOE will use TCAM */
3078 
3079 		/* sanity checks */
3080 		if (f->filter_id >= nfilters ||
3081 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3082 		    (f->val.sport && f->mask.sport != 0xffff) ||
3083 		    (f->val.dport && f->mask.dport != 0xffff) ||
3084 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3085 		    (f->val.vlan_prio &&
3086 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3087 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3088 		    f->qset >= SGE_QSETS ||
3089 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3090 			return (EINVAL);
3091 
3092 		/* Was allocated with M_WAITOK */
3093 		KASSERT(sc->filters, ("filter table NULL\n"));
3094 
3095 		p = &sc->filters[f->filter_id];
3096 		if (p->locked)
3097 			return (EPERM);
3098 
3099 		bzero(p, sizeof(*p));
3100 		p->sip = f->val.sip;
3101 		p->sip_mask = f->mask.sip;
3102 		p->dip = f->val.dip;
3103 		p->sport = f->val.sport;
3104 		p->dport = f->val.dport;
3105 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3106 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3107 		    FILTER_NO_VLAN_PRI;
3108 		p->mac_hit = f->mac_hit;
3109 		p->mac_vld = f->mac_addr_idx != 0xffff;
3110 		p->mac_idx = f->mac_addr_idx;
3111 		p->pkt_type = f->proto;
3112 		p->report_filter_id = f->want_filter_id;
3113 		p->pass = f->pass;
3114 		p->rss = f->rss;
3115 		p->qset = f->qset;
3116 
3117 		error = set_filter(sc, f->filter_id, p);
3118 		if (error == 0)
3119 			p->valid = 1;
3120 		break;
3121 	}
3122 	case CHELSIO_DEL_FILTER: {
3123 		struct ch_filter *f = (struct ch_filter *)data;
3124 		struct filter_info *p;
3125 		unsigned int nfilters = sc->params.mc5.nfilters;
3126 
3127 		if (!is_offload(sc))
3128 			return (EOPNOTSUPP);
3129 		if (!(sc->flags & FULL_INIT_DONE))
3130 			return (EAGAIN);
3131 		if (nfilters == 0 || sc->filters == NULL)
3132 			return (EINVAL);
3133 		if (f->filter_id >= nfilters)
3134 		       return (EINVAL);
3135 
3136 		p = &sc->filters[f->filter_id];
3137 		if (p->locked)
3138 			return (EPERM);
3139 		if (!p->valid)
3140 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3141 
3142 		bzero(p, sizeof(*p));
3143 		p->sip = p->sip_mask = 0xffffffff;
3144 		p->vlan = 0xfff;
3145 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3146 		p->pkt_type = 1;
3147 		error = set_filter(sc, f->filter_id, p);
3148 		break;
3149 	}
3150 	case CHELSIO_GET_FILTER: {
3151 		struct ch_filter *f = (struct ch_filter *)data;
3152 		struct filter_info *p;
3153 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3154 
3155 		if (!is_offload(sc))
3156 			return (EOPNOTSUPP);
3157 		if (!(sc->flags & FULL_INIT_DONE))
3158 			return (EAGAIN);
3159 		if (nfilters == 0 || sc->filters == NULL)
3160 			return (EINVAL);
3161 
3162 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3163 		for (; i < nfilters; i++) {
3164 			p = &sc->filters[i];
3165 			if (!p->valid)
3166 				continue;
3167 
3168 			bzero(f, sizeof(*f));
3169 
3170 			f->filter_id = i;
3171 			f->val.sip = p->sip;
3172 			f->mask.sip = p->sip_mask;
3173 			f->val.dip = p->dip;
3174 			f->mask.dip = p->dip ? 0xffffffff : 0;
3175 			f->val.sport = p->sport;
3176 			f->mask.sport = p->sport ? 0xffff : 0;
3177 			f->val.dport = p->dport;
3178 			f->mask.dport = p->dport ? 0xffff : 0;
3179 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3180 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3181 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3182 			    0 : p->vlan_prio;
3183 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3184 			    0 : FILTER_NO_VLAN_PRI;
3185 			f->mac_hit = p->mac_hit;
3186 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3187 			f->proto = p->pkt_type;
3188 			f->want_filter_id = p->report_filter_id;
3189 			f->pass = p->pass;
3190 			f->rss = p->rss;
3191 			f->qset = p->qset;
3192 
3193 			break;
3194 		}
3195 
3196 		if (i == nfilters)
3197 			f->filter_id = 0xffffffff;
3198 		break;
3199 	}
3200 	default:
3201 		return (EOPNOTSUPP);
3202 		break;
3203 	}
3204 
3205 	return (error);
3206 }
3207 
3208 static __inline void
3209 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3210     unsigned int end)
3211 {
3212 	uint32_t *p = (uint32_t *)(buf + start);
3213 
3214 	for ( ; start <= end; start += sizeof(uint32_t))
3215 		*p++ = t3_read_reg(ap, start);
3216 }
3217 
3218 #define T3_REGMAP_SIZE (3 * 1024)
3219 static int
3220 cxgb_get_regs_len(void)
3221 {
3222 	return T3_REGMAP_SIZE;
3223 }
3224 
3225 static void
3226 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3227 {
3228 
3229 	/*
3230 	 * Version scheme:
3231 	 * bits 0..9: chip version
3232 	 * bits 10..15: chip revision
3233 	 * bit 31: set for PCIe cards
3234 	 */
3235 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3236 
3237 	/*
3238 	 * We skip the MAC statistics registers because they are clear-on-read.
3239 	 * Also reading multi-register stats would need to synchronize with the
3240 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3241 	 */
3242 	memset(buf, 0, cxgb_get_regs_len());
3243 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3244 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3245 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3246 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3247 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3248 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3249 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3250 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3251 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3252 }
3253 
3254 static int
3255 alloc_filters(struct adapter *sc)
3256 {
3257 	struct filter_info *p;
3258 	unsigned int nfilters = sc->params.mc5.nfilters;
3259 
3260 	if (nfilters == 0)
3261 		return (0);
3262 
3263 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3264 	sc->filters = p;
3265 
3266 	p = &sc->filters[nfilters - 1];
3267 	p->vlan = 0xfff;
3268 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3269 	p->pass = p->rss = p->valid = p->locked = 1;
3270 
3271 	return (0);
3272 }
3273 
3274 static int
3275 setup_hw_filters(struct adapter *sc)
3276 {
3277 	int i, rc;
3278 	unsigned int nfilters = sc->params.mc5.nfilters;
3279 
3280 	if (!sc->filters)
3281 		return (0);
3282 
3283 	t3_enable_filters(sc);
3284 
3285 	for (i = rc = 0; i < nfilters && !rc; i++) {
3286 		if (sc->filters[i].locked)
3287 			rc = set_filter(sc, i, &sc->filters[i]);
3288 	}
3289 
3290 	return (rc);
3291 }
3292 
3293 static int
3294 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3295 {
3296 	int len;
3297 	struct mbuf *m;
3298 	struct ulp_txpkt *txpkt;
3299 	struct work_request_hdr *wr;
3300 	struct cpl_pass_open_req *oreq;
3301 	struct cpl_set_tcb_field *sreq;
3302 
3303 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3304 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3305 
3306 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3307 	      sc->params.mc5.nfilters;
3308 
3309 	m = m_gethdr(M_WAITOK, MT_DATA);
3310 	m->m_len = m->m_pkthdr.len = len;
3311 	bzero(mtod(m, char *), len);
3312 
3313 	wr = mtod(m, struct work_request_hdr *);
3314 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3315 
3316 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3317 	txpkt = (struct ulp_txpkt *)oreq;
3318 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3319 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3320 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3321 	oreq->local_port = htons(f->dport);
3322 	oreq->peer_port = htons(f->sport);
3323 	oreq->local_ip = htonl(f->dip);
3324 	oreq->peer_ip = htonl(f->sip);
3325 	oreq->peer_netmask = htonl(f->sip_mask);
3326 	oreq->opt0h = 0;
3327 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3328 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3329 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3330 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3331 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3332 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3333 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3334 
3335 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3336 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3337 			  (f->report_filter_id << 15) | (1 << 23) |
3338 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3339 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3340 	t3_mgmt_tx(sc, m);
3341 
3342 	if (f->pass && !f->rss) {
3343 		len = sizeof(*sreq);
3344 		m = m_gethdr(M_WAITOK, MT_DATA);
3345 		m->m_len = m->m_pkthdr.len = len;
3346 		bzero(mtod(m, char *), len);
3347 		sreq = mtod(m, struct cpl_set_tcb_field *);
3348 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3349 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3350 				 (u64)sc->rrss_map[f->qset] << 19);
3351 		t3_mgmt_tx(sc, m);
3352 	}
3353 	return 0;
3354 }
3355 
3356 static inline void
3357 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3358     unsigned int word, u64 mask, u64 val)
3359 {
3360 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3361 	req->reply = V_NO_REPLY(1);
3362 	req->cpu_idx = 0;
3363 	req->word = htons(word);
3364 	req->mask = htobe64(mask);
3365 	req->val = htobe64(val);
3366 }
3367 
3368 static inline void
3369 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3370     unsigned int word, u64 mask, u64 val)
3371 {
3372 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3373 
3374 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3375 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3376 	mk_set_tcb_field(req, tid, word, mask, val);
3377 }
3378