xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static void cxgb_init(void *);
88 static int cxgb_init_locked(struct port_info *);
89 static int cxgb_uninit_locked(struct port_info *);
90 static int cxgb_uninit_synchronized(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_build_medialist(struct port_info *);
95 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
96 static int setup_sge_qsets(adapter_t *);
97 static void cxgb_async_intr(void *);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_tick(void *);
100 static void link_check_callout(void *);
101 static void check_link_status(void *, int);
102 static void setup_rss(adapter_t *sc);
103 static int alloc_filters(struct adapter *);
104 static int setup_hw_filters(struct adapter *);
105 static int set_filter(struct adapter *, int, const struct filter_info *);
106 static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
107     unsigned int, u64, u64);
108 static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
109     unsigned int, u64, u64);
110 
111 /* Attachment glue for the PCI controller end of the device.  Each port of
112  * the device is attached separately, as defined later.
113  */
114 static int cxgb_controller_probe(device_t);
115 static int cxgb_controller_attach(device_t);
116 static int cxgb_controller_detach(device_t);
117 static void cxgb_free(struct adapter *);
118 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
119     unsigned int end);
120 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
121 static int cxgb_get_regs_len(void);
122 static int offload_open(struct port_info *pi);
123 static void touch_bars(device_t dev);
124 static int offload_close(struct t3cdev *tdev);
125 static void cxgb_update_mac_settings(struct port_info *p);
126 
127 static device_method_t cxgb_controller_methods[] = {
128 	DEVMETHOD(device_probe,		cxgb_controller_probe),
129 	DEVMETHOD(device_attach,	cxgb_controller_attach),
130 	DEVMETHOD(device_detach,	cxgb_controller_detach),
131 
132 	/* bus interface */
133 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
134 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
135 
136 	{ 0, 0 }
137 };
138 
139 static driver_t cxgb_controller_driver = {
140 	"cxgbc",
141 	cxgb_controller_methods,
142 	sizeof(struct adapter)
143 };
144 
145 static devclass_t	cxgb_controller_devclass;
146 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
147 
148 /*
149  * Attachment glue for the ports.  Attachment is done directly to the
150  * controller device.
151  */
152 static int cxgb_port_probe(device_t);
153 static int cxgb_port_attach(device_t);
154 static int cxgb_port_detach(device_t);
155 
156 static device_method_t cxgb_port_methods[] = {
157 	DEVMETHOD(device_probe,		cxgb_port_probe),
158 	DEVMETHOD(device_attach,	cxgb_port_attach),
159 	DEVMETHOD(device_detach,	cxgb_port_detach),
160 	{ 0, 0 }
161 };
162 
163 static driver_t cxgb_port_driver = {
164 	"cxgb",
165 	cxgb_port_methods,
166 	0
167 };
168 
169 static d_ioctl_t cxgb_extension_ioctl;
170 static d_open_t cxgb_extension_open;
171 static d_close_t cxgb_extension_close;
172 
173 static struct cdevsw cxgb_cdevsw = {
174        .d_version =    D_VERSION,
175        .d_flags =      0,
176        .d_open =       cxgb_extension_open,
177        .d_close =      cxgb_extension_close,
178        .d_ioctl =      cxgb_extension_ioctl,
179        .d_name =       "cxgb",
180 };
181 
182 static devclass_t	cxgb_port_devclass;
183 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
184 
185 /*
186  * The driver uses the best interrupt scheme available on a platform in the
187  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
188  * of these schemes the driver may consider as follows:
189  *
190  * msi = 2: choose from among all three options
191  * msi = 1 : only consider MSI and pin interrupts
192  * msi = 0: force pin interrupts
193  */
194 static int msi_allowed = 2;
195 
196 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
197 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
198 SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
199     "MSI-X, MSI, INTx selector");
200 
201 /*
202  * The driver enables offload as a default.
203  * To disable it, use ofld_disable = 1.
204  */
205 static int ofld_disable = 0;
206 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
207 SYSCTL_INT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
208     "disable ULP offload");
209 
210 /*
211  * The driver uses an auto-queue algorithm by default.
212  * To disable it and force a single queue-set per port, use multiq = 0
213  */
214 static int multiq = 1;
215 TUNABLE_INT("hw.cxgb.multiq", &multiq);
216 SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
217     "use min(ncpus/ports, 8) queue-sets per port");
218 
219 /*
220  * By default the driver will not update the firmware unless
221  * it was compiled against a newer version
222  *
223  */
224 static int force_fw_update = 0;
225 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
226 SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
227     "update firmware even if up to date");
228 
229 int cxgb_use_16k_clusters = -1;
230 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
231 SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
232     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
233 
234 /*
235  * Tune the size of the output queue.
236  */
237 int cxgb_snd_queue_len = IFQ_MAXLEN;
238 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
239 SYSCTL_INT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
240     &cxgb_snd_queue_len, 0, "send queue size ");
241 
242 static int nfilters = -1;
243 TUNABLE_INT("hw.cxgb.nfilters", &nfilters);
244 SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
245     &nfilters, 0, "max number of entries in the filter table");
246 
247 enum {
248 	MAX_TXQ_ENTRIES      = 16384,
249 	MAX_CTRL_TXQ_ENTRIES = 1024,
250 	MAX_RSPQ_ENTRIES     = 16384,
251 	MAX_RX_BUFFERS       = 16384,
252 	MAX_RX_JUMBO_BUFFERS = 16384,
253 	MIN_TXQ_ENTRIES      = 4,
254 	MIN_CTRL_TXQ_ENTRIES = 4,
255 	MIN_RSPQ_ENTRIES     = 32,
256 	MIN_FL_ENTRIES       = 32,
257 	MIN_FL_JUMBO_ENTRIES = 32
258 };
259 
260 struct filter_info {
261 	u32 sip;
262 	u32 sip_mask;
263 	u32 dip;
264 	u16 sport;
265 	u16 dport;
266 	u32 vlan:12;
267 	u32 vlan_prio:3;
268 	u32 mac_hit:1;
269 	u32 mac_idx:4;
270 	u32 mac_vld:1;
271 	u32 pkt_type:2;
272 	u32 report_filter_id:1;
273 	u32 pass:1;
274 	u32 rss:1;
275 	u32 qset:3;
276 	u32 locked:1;
277 	u32 valid:1;
278 };
279 
280 enum { FILTER_NO_VLAN_PRI = 7 };
281 
282 #define EEPROM_MAGIC 0x38E2F10C
283 
284 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
285 
286 /* Table for probing the cards.  The desc field isn't actually used */
287 struct cxgb_ident {
288 	uint16_t	vendor;
289 	uint16_t	device;
290 	int		index;
291 	char		*desc;
292 } cxgb_identifiers[] = {
293 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
297 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
298 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
299 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
300 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
301 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
302 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
303 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
304 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
305 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
306 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
307 	{0, 0, 0, NULL}
308 };
309 
310 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
311 
312 
313 static __inline char
314 t3rev2char(struct adapter *adapter)
315 {
316 	char rev = 'z';
317 
318 	switch(adapter->params.rev) {
319 	case T3_REV_A:
320 		rev = 'a';
321 		break;
322 	case T3_REV_B:
323 	case T3_REV_B2:
324 		rev = 'b';
325 		break;
326 	case T3_REV_C:
327 		rev = 'c';
328 		break;
329 	}
330 	return rev;
331 }
332 
333 static struct cxgb_ident *
334 cxgb_get_ident(device_t dev)
335 {
336 	struct cxgb_ident *id;
337 
338 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
339 		if ((id->vendor == pci_get_vendor(dev)) &&
340 		    (id->device == pci_get_device(dev))) {
341 			return (id);
342 		}
343 	}
344 	return (NULL);
345 }
346 
347 static const struct adapter_info *
348 cxgb_get_adapter_info(device_t dev)
349 {
350 	struct cxgb_ident *id;
351 	const struct adapter_info *ai;
352 
353 	id = cxgb_get_ident(dev);
354 	if (id == NULL)
355 		return (NULL);
356 
357 	ai = t3_get_adapter_info(id->index);
358 
359 	return (ai);
360 }
361 
362 static int
363 cxgb_controller_probe(device_t dev)
364 {
365 	const struct adapter_info *ai;
366 	char *ports, buf[80];
367 	int nports;
368 
369 	ai = cxgb_get_adapter_info(dev);
370 	if (ai == NULL)
371 		return (ENXIO);
372 
373 	nports = ai->nports0 + ai->nports1;
374 	if (nports == 1)
375 		ports = "port";
376 	else
377 		ports = "ports";
378 
379 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
380 	device_set_desc_copy(dev, buf);
381 	return (BUS_PROBE_DEFAULT);
382 }
383 
384 #define FW_FNAME "cxgb_t3fw"
385 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
386 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
387 
388 static int
389 upgrade_fw(adapter_t *sc)
390 {
391 	const struct firmware *fw;
392 	int status;
393 	u32 vers;
394 
395 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
396 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
397 		return (ENOENT);
398 	} else
399 		device_printf(sc->dev, "installing firmware on card\n");
400 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
401 
402 	if (status != 0) {
403 		device_printf(sc->dev, "failed to install firmware: %d\n",
404 		    status);
405 	} else {
406 		t3_get_fw_version(sc, &vers);
407 		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
408 		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
409 		    G_FW_VERSION_MICRO(vers));
410 	}
411 
412 	firmware_put(fw, FIRMWARE_UNLOAD);
413 
414 	return (status);
415 }
416 
417 /*
418  * The cxgb_controller_attach function is responsible for the initial
419  * bringup of the device.  Its responsibilities include:
420  *
421  *  1. Determine if the device supports MSI or MSI-X.
422  *  2. Allocate bus resources so that we can access the Base Address Register
423  *  3. Create and initialize mutexes for the controller and its control
424  *     logic such as SGE and MDIO.
425  *  4. Call hardware specific setup routine for the adapter as a whole.
426  *  5. Allocate the BAR for doing MSI-X.
427  *  6. Setup the line interrupt iff MSI-X is not supported.
428  *  7. Create the driver's taskq.
429  *  8. Start one task queue service thread.
430  *  9. Check if the firmware and SRAM are up-to-date.  They will be
431  *     auto-updated later (before FULL_INIT_DONE), if required.
432  * 10. Create a child device for each MAC (port)
433  * 11. Initialize T3 private state.
434  * 12. Trigger the LED
435  * 13. Setup offload iff supported.
436  * 14. Reset/restart the tick callout.
437  * 15. Attach sysctls
438  *
439  * NOTE: Any modification or deviation from this list MUST be reflected in
440  * the above comment.  Failure to do so will result in problems on various
441  * error conditions including link flapping.
442  */
443 static int
444 cxgb_controller_attach(device_t dev)
445 {
446 	device_t child;
447 	const struct adapter_info *ai;
448 	struct adapter *sc;
449 	int i, error = 0;
450 	uint32_t vers;
451 	int port_qsets = 1;
452 	int msi_needed, reg;
453 	char buf[80];
454 
455 	sc = device_get_softc(dev);
456 	sc->dev = dev;
457 	sc->msi_count = 0;
458 	ai = cxgb_get_adapter_info(dev);
459 
460 	/* find the PCIe link width and set max read request to 4KB*/
461 	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
462 		uint16_t lnk;
463 
464 		lnk = pci_read_config(dev, reg + PCIR_EXPRESS_LINK_STA, 2);
465 		sc->link_width = (lnk & PCIM_LINK_STA_WIDTH) >> 4;
466 		if (sc->link_width < 8 &&
467 		    (ai->caps & SUPPORTED_10000baseT_Full)) {
468 			device_printf(sc->dev,
469 			    "PCIe x%d Link, expect reduced performance\n",
470 			    sc->link_width);
471 		}
472 
473 		pci_set_max_read_req(dev, 4096);
474 	}
475 
476 	touch_bars(dev);
477 	pci_enable_busmaster(dev);
478 	/*
479 	 * Allocate the registers and make them available to the driver.
480 	 * The registers that we care about for NIC mode are in BAR 0
481 	 */
482 	sc->regs_rid = PCIR_BAR(0);
483 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
484 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
485 		device_printf(dev, "Cannot allocate BAR region 0\n");
486 		return (ENXIO);
487 	}
488 	sc->udbs_rid = PCIR_BAR(2);
489 	sc->udbs_res = NULL;
490 	if (is_offload(sc) &&
491 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
493 		device_printf(dev, "Cannot allocate BAR region 1\n");
494 		error = ENXIO;
495 		goto out;
496 	}
497 
498 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
499 	    device_get_unit(dev));
500 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
501 
502 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
503 	    device_get_unit(dev));
504 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
505 	    device_get_unit(dev));
506 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
507 	    device_get_unit(dev));
508 
509 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
510 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
511 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
512 
513 	sc->bt = rman_get_bustag(sc->regs_res);
514 	sc->bh = rman_get_bushandle(sc->regs_res);
515 	sc->mmio_len = rman_get_size(sc->regs_res);
516 
517 	for (i = 0; i < MAX_NPORTS; i++)
518 		sc->port[i].adapter = sc;
519 
520 	if (t3_prep_adapter(sc, ai, 1) < 0) {
521 		printf("prep adapter failed\n");
522 		error = ENODEV;
523 		goto out;
524 	}
525         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
526 	 * enough messages for the queue sets.  If that fails, try falling
527 	 * back to MSI.  If that fails, then try falling back to the legacy
528 	 * interrupt pin model.
529 	 */
530 	sc->msix_regs_rid = 0x20;
531 	if ((msi_allowed >= 2) &&
532 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
533 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
534 
535 		if (multiq)
536 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
537 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
538 
539 		if (pci_msix_count(dev) == 0 ||
540 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
541 		    sc->msi_count != msi_needed) {
542 			device_printf(dev, "alloc msix failed - "
543 				      "msi_count=%d, msi_needed=%d, err=%d; "
544 				      "will try MSI\n", sc->msi_count,
545 				      msi_needed, error);
546 			sc->msi_count = 0;
547 			port_qsets = 1;
548 			pci_release_msi(dev);
549 			bus_release_resource(dev, SYS_RES_MEMORY,
550 			    sc->msix_regs_rid, sc->msix_regs_res);
551 			sc->msix_regs_res = NULL;
552 		} else {
553 			sc->flags |= USING_MSIX;
554 			sc->cxgb_intr = cxgb_async_intr;
555 			device_printf(dev,
556 				      "using MSI-X interrupts (%u vectors)\n",
557 				      sc->msi_count);
558 		}
559 	}
560 
561 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
562 		sc->msi_count = 1;
563 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
564 			device_printf(dev, "alloc msi failed - "
565 				      "err=%d; will try INTx\n", error);
566 			sc->msi_count = 0;
567 			port_qsets = 1;
568 			pci_release_msi(dev);
569 		} else {
570 			sc->flags |= USING_MSI;
571 			sc->cxgb_intr = t3_intr_msi;
572 			device_printf(dev, "using MSI interrupts\n");
573 		}
574 	}
575 	if (sc->msi_count == 0) {
576 		device_printf(dev, "using line interrupts\n");
577 		sc->cxgb_intr = t3b_intr;
578 	}
579 
580 	/* Create a private taskqueue thread for handling driver events */
581 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
582 	    taskqueue_thread_enqueue, &sc->tq);
583 	if (sc->tq == NULL) {
584 		device_printf(dev, "failed to allocate controller task queue\n");
585 		goto out;
586 	}
587 
588 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
589 	    device_get_nameunit(dev));
590 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
591 
592 
593 	/* Create a periodic callout for checking adapter status */
594 	callout_init(&sc->cxgb_tick_ch, TRUE);
595 
596 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
597 		/*
598 		 * Warn user that a firmware update will be attempted in init.
599 		 */
600 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
601 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
602 		sc->flags &= ~FW_UPTODATE;
603 	} else {
604 		sc->flags |= FW_UPTODATE;
605 	}
606 
607 	if (t3_check_tpsram_version(sc) < 0) {
608 		/*
609 		 * Warn user that a firmware update will be attempted in init.
610 		 */
611 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
612 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
613 		sc->flags &= ~TPS_UPTODATE;
614 	} else {
615 		sc->flags |= TPS_UPTODATE;
616 	}
617 
618 	/*
619 	 * Create a child device for each MAC.  The ethernet attachment
620 	 * will be done in these children.
621 	 */
622 	for (i = 0; i < (sc)->params.nports; i++) {
623 		struct port_info *pi;
624 
625 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
626 			device_printf(dev, "failed to add child port\n");
627 			error = EINVAL;
628 			goto out;
629 		}
630 		pi = &sc->port[i];
631 		pi->adapter = sc;
632 		pi->nqsets = port_qsets;
633 		pi->first_qset = i*port_qsets;
634 		pi->port_id = i;
635 		pi->tx_chan = i >= ai->nports0;
636 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
637 		sc->rxpkt_map[pi->txpkt_intf] = i;
638 		sc->port[i].tx_chan = i >= ai->nports0;
639 		sc->portdev[i] = child;
640 		device_set_softc(child, pi);
641 	}
642 	if ((error = bus_generic_attach(dev)) != 0)
643 		goto out;
644 
645 	/* initialize sge private state */
646 	t3_sge_init_adapter(sc);
647 
648 	t3_led_ready(sc);
649 
650 	cxgb_offload_init();
651 	if (is_offload(sc)) {
652 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
653 		cxgb_adapter_ofld(sc);
654         }
655 	error = t3_get_fw_version(sc, &vers);
656 	if (error)
657 		goto out;
658 
659 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
660 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
661 	    G_FW_VERSION_MICRO(vers));
662 
663 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
664 		 ai->desc, is_offload(sc) ? "R" : "",
665 		 sc->params.vpd.ec, sc->params.vpd.sn);
666 	device_set_desc_copy(dev, buf);
667 
668 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
669 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
670 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
671 
672 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
673 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
674 	t3_add_attach_sysctls(sc);
675 out:
676 	if (error)
677 		cxgb_free(sc);
678 
679 	return (error);
680 }
681 
682 /*
683  * The cxgb_controller_detach routine is called with the device is
684  * unloaded from the system.
685  */
686 
687 static int
688 cxgb_controller_detach(device_t dev)
689 {
690 	struct adapter *sc;
691 
692 	sc = device_get_softc(dev);
693 
694 	cxgb_free(sc);
695 
696 	return (0);
697 }
698 
699 /*
700  * The cxgb_free() is called by the cxgb_controller_detach() routine
701  * to tear down the structures that were built up in
702  * cxgb_controller_attach(), and should be the final piece of work
703  * done when fully unloading the driver.
704  *
705  *
706  *  1. Shutting down the threads started by the cxgb_controller_attach()
707  *     routine.
708  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
709  *  3. Detaching all of the port devices created during the
710  *     cxgb_controller_attach() routine.
711  *  4. Removing the device children created via cxgb_controller_attach().
712  *  5. Releasing PCI resources associated with the device.
713  *  6. Turning off the offload support, iff it was turned on.
714  *  7. Destroying the mutexes created in cxgb_controller_attach().
715  *
716  */
717 static void
718 cxgb_free(struct adapter *sc)
719 {
720 	int i, nqsets = 0;
721 
722 	ADAPTER_LOCK(sc);
723 	sc->flags |= CXGB_SHUTDOWN;
724 	ADAPTER_UNLOCK(sc);
725 
726 	/*
727 	 * Make sure all child devices are gone.
728 	 */
729 	bus_generic_detach(sc->dev);
730 	for (i = 0; i < (sc)->params.nports; i++) {
731 		if (sc->portdev[i] &&
732 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
733 			device_printf(sc->dev, "failed to delete child port\n");
734 		nqsets += sc->port[i].nqsets;
735 	}
736 
737 	/*
738 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
739 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
740 	 * all open devices have been closed.
741 	 */
742 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
743 					   __func__, sc->open_device_map));
744 	for (i = 0; i < sc->params.nports; i++) {
745 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
746 						  __func__, i));
747 	}
748 
749 	/*
750 	 * Finish off the adapter's callouts.
751 	 */
752 	callout_drain(&sc->cxgb_tick_ch);
753 	callout_drain(&sc->sge_timer_ch);
754 
755 	/*
756 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
757 	 * sysctls are cleaned up by the kernel linker.
758 	 */
759 	if (sc->flags & FULL_INIT_DONE) {
760  		t3_free_sge_resources(sc, nqsets);
761  		sc->flags &= ~FULL_INIT_DONE;
762  	}
763 
764 	/*
765 	 * Release all interrupt resources.
766 	 */
767 	cxgb_teardown_interrupts(sc);
768 	if (sc->flags & (USING_MSI | USING_MSIX)) {
769 		device_printf(sc->dev, "releasing msi message(s)\n");
770 		pci_release_msi(sc->dev);
771 	} else {
772 		device_printf(sc->dev, "no msi message to release\n");
773 	}
774 
775 	if (sc->msix_regs_res != NULL) {
776 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
777 		    sc->msix_regs_res);
778 	}
779 
780 	/*
781 	 * Free the adapter's taskqueue.
782 	 */
783 	if (sc->tq != NULL) {
784 		taskqueue_free(sc->tq);
785 		sc->tq = NULL;
786 	}
787 
788 	if (is_offload(sc)) {
789 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
790 		cxgb_adapter_unofld(sc);
791 	}
792 
793 #ifdef notyet
794 	if (sc->flags & CXGB_OFLD_INIT)
795 		cxgb_offload_deactivate(sc);
796 #endif
797 	free(sc->filters, M_DEVBUF);
798 	t3_sge_free(sc);
799 
800 	cxgb_offload_exit();
801 
802 	if (sc->udbs_res != NULL)
803 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
804 		    sc->udbs_res);
805 
806 	if (sc->regs_res != NULL)
807 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
808 		    sc->regs_res);
809 
810 	MTX_DESTROY(&sc->mdio_lock);
811 	MTX_DESTROY(&sc->sge.reg_lock);
812 	MTX_DESTROY(&sc->elmer_lock);
813 	ADAPTER_LOCK_DEINIT(sc);
814 }
815 
816 /**
817  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
818  *	@sc: the controller softc
819  *
820  *	Determines how many sets of SGE queues to use and initializes them.
821  *	We support multiple queue sets per port if we have MSI-X, otherwise
822  *	just one queue set per port.
823  */
824 static int
825 setup_sge_qsets(adapter_t *sc)
826 {
827 	int i, j, err, irq_idx = 0, qset_idx = 0;
828 	u_int ntxq = SGE_TXQ_PER_SET;
829 
830 	if ((err = t3_sge_alloc(sc)) != 0) {
831 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
832 		return (err);
833 	}
834 
835 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
836 		irq_idx = -1;
837 
838 	for (i = 0; i < (sc)->params.nports; i++) {
839 		struct port_info *pi = &sc->port[i];
840 
841 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
842 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
843 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
844 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
845 			if (err) {
846 				t3_free_sge_resources(sc, qset_idx);
847 				device_printf(sc->dev,
848 				    "t3_sge_alloc_qset failed with %d\n", err);
849 				return (err);
850 			}
851 		}
852 	}
853 
854 	return (0);
855 }
856 
857 static void
858 cxgb_teardown_interrupts(adapter_t *sc)
859 {
860 	int i;
861 
862 	for (i = 0; i < SGE_QSETS; i++) {
863 		if (sc->msix_intr_tag[i] == NULL) {
864 
865 			/* Should have been setup fully or not at all */
866 			KASSERT(sc->msix_irq_res[i] == NULL &&
867 				sc->msix_irq_rid[i] == 0,
868 				("%s: half-done interrupt (%d).", __func__, i));
869 
870 			continue;
871 		}
872 
873 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
874 				  sc->msix_intr_tag[i]);
875 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
876 				     sc->msix_irq_res[i]);
877 
878 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
879 		sc->msix_irq_rid[i] = 0;
880 	}
881 
882 	if (sc->intr_tag) {
883 		KASSERT(sc->irq_res != NULL,
884 			("%s: half-done interrupt.", __func__));
885 
886 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
887 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
888 				     sc->irq_res);
889 
890 		sc->irq_res = sc->intr_tag = NULL;
891 		sc->irq_rid = 0;
892 	}
893 }
894 
895 static int
896 cxgb_setup_interrupts(adapter_t *sc)
897 {
898 	struct resource *res;
899 	void *tag;
900 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
901 
902 	sc->irq_rid = intr_flag ? 1 : 0;
903 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
904 					     RF_SHAREABLE | RF_ACTIVE);
905 	if (sc->irq_res == NULL) {
906 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
907 			      intr_flag, sc->irq_rid);
908 		err = EINVAL;
909 		sc->irq_rid = 0;
910 	} else {
911 		err = bus_setup_intr(sc->dev, sc->irq_res,
912 		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
913 		    sc->cxgb_intr, sc, &sc->intr_tag);
914 
915 		if (err) {
916 			device_printf(sc->dev,
917 				      "Cannot set up interrupt (%x, %u, %d)\n",
918 				      intr_flag, sc->irq_rid, err);
919 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
920 					     sc->irq_res);
921 			sc->irq_res = sc->intr_tag = NULL;
922 			sc->irq_rid = 0;
923 		}
924 	}
925 
926 	/* That's all for INTx or MSI */
927 	if (!(intr_flag & USING_MSIX) || err)
928 		return (err);
929 
930 	for (i = 0; i < sc->msi_count - 1; i++) {
931 		rid = i + 2;
932 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
933 					     RF_SHAREABLE | RF_ACTIVE);
934 		if (res == NULL) {
935 			device_printf(sc->dev, "Cannot allocate interrupt "
936 				      "for message %d\n", rid);
937 			err = EINVAL;
938 			break;
939 		}
940 
941 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
942 				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
943 		if (err) {
944 			device_printf(sc->dev, "Cannot set up interrupt "
945 				      "for message %d (%d)\n", rid, err);
946 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
947 			break;
948 		}
949 
950 		sc->msix_irq_rid[i] = rid;
951 		sc->msix_irq_res[i] = res;
952 		sc->msix_intr_tag[i] = tag;
953 	}
954 
955 	if (err)
956 		cxgb_teardown_interrupts(sc);
957 
958 	return (err);
959 }
960 
961 
962 static int
963 cxgb_port_probe(device_t dev)
964 {
965 	struct port_info *p;
966 	char buf[80];
967 	const char *desc;
968 
969 	p = device_get_softc(dev);
970 	desc = p->phy.desc;
971 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
972 	device_set_desc_copy(dev, buf);
973 	return (0);
974 }
975 
976 
977 static int
978 cxgb_makedev(struct port_info *pi)
979 {
980 
981 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
982 	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
983 
984 	if (pi->port_cdev == NULL)
985 		return (ENOMEM);
986 
987 	pi->port_cdev->si_drv1 = (void *)pi;
988 
989 	return (0);
990 }
991 
992 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
993     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
994     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE)
995 #define CXGB_CAP_ENABLE (CXGB_CAP & ~IFCAP_TSO6)
996 
997 static int
998 cxgb_port_attach(device_t dev)
999 {
1000 	struct port_info *p;
1001 	struct ifnet *ifp;
1002 	int err;
1003 	struct adapter *sc;
1004 
1005 	p = device_get_softc(dev);
1006 	sc = p->adapter;
1007 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1008 	    device_get_unit(device_get_parent(dev)), p->port_id);
1009 	PORT_LOCK_INIT(p, p->lockbuf);
1010 
1011 	callout_init(&p->link_check_ch, CALLOUT_MPSAFE);
1012 	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1013 
1014 	/* Allocate an ifnet object and set it up */
1015 	ifp = p->ifp = if_alloc(IFT_ETHER);
1016 	if (ifp == NULL) {
1017 		device_printf(dev, "Cannot allocate ifnet\n");
1018 		return (ENOMEM);
1019 	}
1020 
1021 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1022 	ifp->if_init = cxgb_init;
1023 	ifp->if_softc = p;
1024 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1025 	ifp->if_ioctl = cxgb_ioctl;
1026 	ifp->if_start = cxgb_start;
1027 
1028 	ifp->if_snd.ifq_drv_maxlen = max(cxgb_snd_queue_len, ifqmaxlen);
1029 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1030 	IFQ_SET_READY(&ifp->if_snd);
1031 
1032 	ifp->if_capabilities = CXGB_CAP;
1033 	ifp->if_capenable = CXGB_CAP_ENABLE;
1034 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO;
1035 
1036 	/*
1037 	 * Disable TSO on 4-port - it isn't supported by the firmware.
1038 	 */
1039 	if (sc->params.nports > 2) {
1040 		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1041 		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1042 		ifp->if_hwassist &= ~CSUM_TSO;
1043 	}
1044 
1045 	ether_ifattach(ifp, p->hw_addr);
1046 	ifp->if_transmit = cxgb_transmit;
1047 	ifp->if_qflush = cxgb_qflush;
1048 
1049 #ifdef DEFAULT_JUMBO
1050 	if (sc->params.nports <= 2)
1051 		ifp->if_mtu = ETHERMTU_JUMBO;
1052 #endif
1053 	if ((err = cxgb_makedev(p)) != 0) {
1054 		printf("makedev failed %d\n", err);
1055 		return (err);
1056 	}
1057 
1058 	/* Create a list of media supported by this port */
1059 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1060 	    cxgb_media_status);
1061 	cxgb_build_medialist(p);
1062 
1063 	t3_sge_init_port(p);
1064 
1065 	return (err);
1066 }
1067 
1068 /*
1069  * cxgb_port_detach() is called via the device_detach methods when
1070  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1071  * removing the device from the view of the kernel, i.e. from all
1072  * interfaces lists etc.  This routine is only called when the driver is
1073  * being unloaded, not when the link goes down.
1074  */
1075 static int
1076 cxgb_port_detach(device_t dev)
1077 {
1078 	struct port_info *p;
1079 	struct adapter *sc;
1080 	int i;
1081 
1082 	p = device_get_softc(dev);
1083 	sc = p->adapter;
1084 
1085 	/* Tell cxgb_ioctl and if_init that the port is going away */
1086 	ADAPTER_LOCK(sc);
1087 	SET_DOOMED(p);
1088 	wakeup(&sc->flags);
1089 	while (IS_BUSY(sc))
1090 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1091 	SET_BUSY(sc);
1092 	ADAPTER_UNLOCK(sc);
1093 
1094 	if (p->port_cdev != NULL)
1095 		destroy_dev(p->port_cdev);
1096 
1097 	cxgb_uninit_synchronized(p);
1098 	ether_ifdetach(p->ifp);
1099 
1100 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1101 		struct sge_qset *qs = &sc->sge.qs[i];
1102 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1103 
1104 		callout_drain(&txq->txq_watchdog);
1105 		callout_drain(&txq->txq_timer);
1106 	}
1107 
1108 	PORT_LOCK_DEINIT(p);
1109 	if_free(p->ifp);
1110 	p->ifp = NULL;
1111 
1112 	ADAPTER_LOCK(sc);
1113 	CLR_BUSY(sc);
1114 	wakeup_one(&sc->flags);
1115 	ADAPTER_UNLOCK(sc);
1116 	return (0);
1117 }
1118 
1119 void
1120 t3_fatal_err(struct adapter *sc)
1121 {
1122 	u_int fw_status[4];
1123 
1124 	if (sc->flags & FULL_INIT_DONE) {
1125 		t3_sge_stop(sc);
1126 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1127 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1128 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1129 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1130 		t3_intr_disable(sc);
1131 	}
1132 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1133 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1134 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1135 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1136 }
1137 
1138 int
1139 t3_os_find_pci_capability(adapter_t *sc, int cap)
1140 {
1141 	device_t dev;
1142 	struct pci_devinfo *dinfo;
1143 	pcicfgregs *cfg;
1144 	uint32_t status;
1145 	uint8_t ptr;
1146 
1147 	dev = sc->dev;
1148 	dinfo = device_get_ivars(dev);
1149 	cfg = &dinfo->cfg;
1150 
1151 	status = pci_read_config(dev, PCIR_STATUS, 2);
1152 	if (!(status & PCIM_STATUS_CAPPRESENT))
1153 		return (0);
1154 
1155 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156 	case 0:
1157 	case 1:
1158 		ptr = PCIR_CAP_PTR;
1159 		break;
1160 	case 2:
1161 		ptr = PCIR_CAP_PTR_2;
1162 		break;
1163 	default:
1164 		return (0);
1165 		break;
1166 	}
1167 	ptr = pci_read_config(dev, ptr, 1);
1168 
1169 	while (ptr != 0) {
1170 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1171 			return (ptr);
1172 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1173 	}
1174 
1175 	return (0);
1176 }
1177 
1178 int
1179 t3_os_pci_save_state(struct adapter *sc)
1180 {
1181 	device_t dev;
1182 	struct pci_devinfo *dinfo;
1183 
1184 	dev = sc->dev;
1185 	dinfo = device_get_ivars(dev);
1186 
1187 	pci_cfg_save(dev, dinfo, 0);
1188 	return (0);
1189 }
1190 
1191 int
1192 t3_os_pci_restore_state(struct adapter *sc)
1193 {
1194 	device_t dev;
1195 	struct pci_devinfo *dinfo;
1196 
1197 	dev = sc->dev;
1198 	dinfo = device_get_ivars(dev);
1199 
1200 	pci_cfg_restore(dev, dinfo);
1201 	return (0);
1202 }
1203 
1204 /**
1205  *	t3_os_link_changed - handle link status changes
1206  *	@sc: the adapter associated with the link change
1207  *	@port_id: the port index whose link status has changed
1208  *	@link_status: the new status of the link
1209  *	@speed: the new speed setting
1210  *	@duplex: the new duplex setting
1211  *	@fc: the new flow-control setting
1212  *
1213  *	This is the OS-dependent handler for link status changes.  The OS
1214  *	neutral handler takes care of most of the processing for these events,
1215  *	then calls this handler for any OS-specific processing.
1216  */
1217 void
1218 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1219      int duplex, int fc, int mac_was_reset)
1220 {
1221 	struct port_info *pi = &adapter->port[port_id];
1222 	struct ifnet *ifp = pi->ifp;
1223 
1224 	/* no race with detach, so ifp should always be good */
1225 	KASSERT(ifp, ("%s: if detached.", __func__));
1226 
1227 	/* Reapply mac settings if they were lost due to a reset */
1228 	if (mac_was_reset) {
1229 		PORT_LOCK(pi);
1230 		cxgb_update_mac_settings(pi);
1231 		PORT_UNLOCK(pi);
1232 	}
1233 
1234 	if (link_status) {
1235 		ifp->if_baudrate = IF_Mbps(speed);
1236 		if_link_state_change(ifp, LINK_STATE_UP);
1237 	} else
1238 		if_link_state_change(ifp, LINK_STATE_DOWN);
1239 }
1240 
1241 /**
1242  *	t3_os_phymod_changed - handle PHY module changes
1243  *	@phy: the PHY reporting the module change
1244  *	@mod_type: new module type
1245  *
1246  *	This is the OS-dependent handler for PHY module changes.  It is
1247  *	invoked when a PHY module is removed or inserted for any OS-specific
1248  *	processing.
1249  */
1250 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1251 {
1252 	static const char *mod_str[] = {
1253 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1254 	};
1255 	struct port_info *pi = &adap->port[port_id];
1256 	int mod = pi->phy.modtype;
1257 
1258 	if (mod != pi->media.ifm_cur->ifm_data)
1259 		cxgb_build_medialist(pi);
1260 
1261 	if (mod == phy_modtype_none)
1262 		if_printf(pi->ifp, "PHY module unplugged\n");
1263 	else {
1264 		KASSERT(mod < ARRAY_SIZE(mod_str),
1265 			("invalid PHY module type %d", mod));
1266 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1267 	}
1268 }
1269 
1270 void
1271 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1272 {
1273 
1274 	/*
1275 	 * The ifnet might not be allocated before this gets called,
1276 	 * as this is called early on in attach by t3_prep_adapter
1277 	 * save the address off in the port structure
1278 	 */
1279 	if (cxgb_debug)
1280 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1281 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1282 }
1283 
1284 /*
1285  * Programs the XGMAC based on the settings in the ifnet.  These settings
1286  * include MTU, MAC address, mcast addresses, etc.
1287  */
1288 static void
1289 cxgb_update_mac_settings(struct port_info *p)
1290 {
1291 	struct ifnet *ifp = p->ifp;
1292 	struct t3_rx_mode rm;
1293 	struct cmac *mac = &p->mac;
1294 	int mtu, hwtagging;
1295 
1296 	PORT_LOCK_ASSERT_OWNED(p);
1297 
1298 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1299 
1300 	mtu = ifp->if_mtu;
1301 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1302 		mtu += ETHER_VLAN_ENCAP_LEN;
1303 
1304 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1305 
1306 	t3_mac_set_mtu(mac, mtu);
1307 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1308 	t3_mac_set_address(mac, 0, p->hw_addr);
1309 	t3_init_rx_mode(&rm, p);
1310 	t3_mac_set_rx_mode(mac, &rm);
1311 }
1312 
1313 
1314 static int
1315 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1316 			      unsigned long n)
1317 {
1318 	int attempts = 5;
1319 
1320 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1321 		if (!--attempts)
1322 			return (ETIMEDOUT);
1323 		t3_os_sleep(10);
1324 	}
1325 	return 0;
1326 }
1327 
1328 static int
1329 init_tp_parity(struct adapter *adap)
1330 {
1331 	int i;
1332 	struct mbuf *m;
1333 	struct cpl_set_tcb_field *greq;
1334 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1335 
1336 	t3_tp_set_offload_mode(adap, 1);
1337 
1338 	for (i = 0; i < 16; i++) {
1339 		struct cpl_smt_write_req *req;
1340 
1341 		m = m_gethdr(M_WAITOK, MT_DATA);
1342 		req = mtod(m, struct cpl_smt_write_req *);
1343 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1344 		memset(req, 0, sizeof(*req));
1345 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1346 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1347 		req->iff = i;
1348 		t3_mgmt_tx(adap, m);
1349 	}
1350 
1351 	for (i = 0; i < 2048; i++) {
1352 		struct cpl_l2t_write_req *req;
1353 
1354 		m = m_gethdr(M_WAITOK, MT_DATA);
1355 		req = mtod(m, struct cpl_l2t_write_req *);
1356 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1357 		memset(req, 0, sizeof(*req));
1358 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1359 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1360 		req->params = htonl(V_L2T_W_IDX(i));
1361 		t3_mgmt_tx(adap, m);
1362 	}
1363 
1364 	for (i = 0; i < 2048; i++) {
1365 		struct cpl_rte_write_req *req;
1366 
1367 		m = m_gethdr(M_WAITOK, MT_DATA);
1368 		req = mtod(m, struct cpl_rte_write_req *);
1369 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1370 		memset(req, 0, sizeof(*req));
1371 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1372 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1373 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1374 		t3_mgmt_tx(adap, m);
1375 	}
1376 
1377 	m = m_gethdr(M_WAITOK, MT_DATA);
1378 	greq = mtod(m, struct cpl_set_tcb_field *);
1379 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1380 	memset(greq, 0, sizeof(*greq));
1381 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1382 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1383 	greq->mask = htobe64(1);
1384 	t3_mgmt_tx(adap, m);
1385 
1386 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1387 	t3_tp_set_offload_mode(adap, 0);
1388 	return (i);
1389 }
1390 
1391 /**
1392  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1393  *	@adap: the adapter
1394  *
1395  *	Sets up RSS to distribute packets to multiple receive queues.  We
1396  *	configure the RSS CPU lookup table to distribute to the number of HW
1397  *	receive queues, and the response queue lookup table to narrow that
1398  *	down to the response queues actually configured for each port.
1399  *	We always configure the RSS mapping for two ports since the mapping
1400  *	table has plenty of entries.
1401  */
1402 static void
1403 setup_rss(adapter_t *adap)
1404 {
1405 	int i;
1406 	u_int nq[2];
1407 	uint8_t cpus[SGE_QSETS + 1];
1408 	uint16_t rspq_map[RSS_TABLE_SIZE];
1409 
1410 	for (i = 0; i < SGE_QSETS; ++i)
1411 		cpus[i] = i;
1412 	cpus[SGE_QSETS] = 0xff;
1413 
1414 	nq[0] = nq[1] = 0;
1415 	for_each_port(adap, i) {
1416 		const struct port_info *pi = adap2pinfo(adap, i);
1417 
1418 		nq[pi->tx_chan] += pi->nqsets;
1419 	}
1420 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1421 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1422 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1423 	}
1424 
1425 	/* Calculate the reverse RSS map table */
1426 	for (i = 0; i < SGE_QSETS; ++i)
1427 		adap->rrss_map[i] = 0xff;
1428 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1429 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1430 			adap->rrss_map[rspq_map[i]] = i;
1431 
1432 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1433 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1434 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1435 	              cpus, rspq_map);
1436 
1437 }
1438 
1439 /*
1440  * Sends an mbuf to an offload queue driver
1441  * after dealing with any active network taps.
1442  */
1443 static inline int
1444 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1445 {
1446 	int ret;
1447 
1448 	ret = t3_offload_tx(tdev, m);
1449 	return (ret);
1450 }
1451 
1452 static int
1453 write_smt_entry(struct adapter *adapter, int idx)
1454 {
1455 	struct port_info *pi = &adapter->port[idx];
1456 	struct cpl_smt_write_req *req;
1457 	struct mbuf *m;
1458 
1459 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1460 		return (ENOMEM);
1461 
1462 	req = mtod(m, struct cpl_smt_write_req *);
1463 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1464 
1465 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1466 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1467 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1468 	req->iff = idx;
1469 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1470 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1471 
1472 	m_set_priority(m, 1);
1473 
1474 	offload_tx(&adapter->tdev, m);
1475 
1476 	return (0);
1477 }
1478 
1479 static int
1480 init_smt(struct adapter *adapter)
1481 {
1482 	int i;
1483 
1484 	for_each_port(adapter, i)
1485 		write_smt_entry(adapter, i);
1486 	return 0;
1487 }
1488 
1489 static void
1490 init_port_mtus(adapter_t *adapter)
1491 {
1492 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1493 
1494 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1495 }
1496 
1497 static void
1498 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1499 			      int hi, int port)
1500 {
1501 	struct mbuf *m;
1502 	struct mngt_pktsched_wr *req;
1503 
1504 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1505 	if (m) {
1506 		req = mtod(m, struct mngt_pktsched_wr *);
1507 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1508 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1509 		req->sched = sched;
1510 		req->idx = qidx;
1511 		req->min = lo;
1512 		req->max = hi;
1513 		req->binding = port;
1514 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1515 		t3_mgmt_tx(adap, m);
1516 	}
1517 }
1518 
1519 static void
1520 bind_qsets(adapter_t *sc)
1521 {
1522 	int i, j;
1523 
1524 	for (i = 0; i < (sc)->params.nports; ++i) {
1525 		const struct port_info *pi = adap2pinfo(sc, i);
1526 
1527 		for (j = 0; j < pi->nqsets; ++j) {
1528 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1529 					  -1, pi->tx_chan);
1530 
1531 		}
1532 	}
1533 }
1534 
1535 static void
1536 update_tpeeprom(struct adapter *adap)
1537 {
1538 	const struct firmware *tpeeprom;
1539 
1540 	uint32_t version;
1541 	unsigned int major, minor;
1542 	int ret, len;
1543 	char rev, name[32];
1544 
1545 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1546 
1547 	major = G_TP_VERSION_MAJOR(version);
1548 	minor = G_TP_VERSION_MINOR(version);
1549 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1550 		return;
1551 
1552 	rev = t3rev2char(adap);
1553 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1554 
1555 	tpeeprom = firmware_get(name);
1556 	if (tpeeprom == NULL) {
1557 		device_printf(adap->dev,
1558 			      "could not load TP EEPROM: unable to load %s\n",
1559 			      name);
1560 		return;
1561 	}
1562 
1563 	len = tpeeprom->datasize - 4;
1564 
1565 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1566 	if (ret)
1567 		goto release_tpeeprom;
1568 
1569 	if (len != TP_SRAM_LEN) {
1570 		device_printf(adap->dev,
1571 			      "%s length is wrong len=%d expected=%d\n", name,
1572 			      len, TP_SRAM_LEN);
1573 		return;
1574 	}
1575 
1576 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1577 	    TP_SRAM_OFFSET);
1578 
1579 	if (!ret) {
1580 		device_printf(adap->dev,
1581 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1582 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1583 	} else
1584 		device_printf(adap->dev,
1585 			      "Protocol SRAM image update in EEPROM failed\n");
1586 
1587 release_tpeeprom:
1588 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1589 
1590 	return;
1591 }
1592 
1593 static int
1594 update_tpsram(struct adapter *adap)
1595 {
1596 	const struct firmware *tpsram;
1597 	int ret;
1598 	char rev, name[32];
1599 
1600 	rev = t3rev2char(adap);
1601 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1602 
1603 	update_tpeeprom(adap);
1604 
1605 	tpsram = firmware_get(name);
1606 	if (tpsram == NULL){
1607 		device_printf(adap->dev, "could not load TP SRAM\n");
1608 		return (EINVAL);
1609 	} else
1610 		device_printf(adap->dev, "updating TP SRAM\n");
1611 
1612 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1613 	if (ret)
1614 		goto release_tpsram;
1615 
1616 	ret = t3_set_proto_sram(adap, tpsram->data);
1617 	if (ret)
1618 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1619 
1620 release_tpsram:
1621 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1622 
1623 	return ret;
1624 }
1625 
1626 /**
1627  *	cxgb_up - enable the adapter
1628  *	@adap: adapter being enabled
1629  *
1630  *	Called when the first port is enabled, this function performs the
1631  *	actions necessary to make an adapter operational, such as completing
1632  *	the initialization of HW modules, and enabling interrupts.
1633  */
1634 static int
1635 cxgb_up(struct adapter *sc)
1636 {
1637 	int err = 0;
1638 	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1639 
1640 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1641 					   __func__, sc->open_device_map));
1642 
1643 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1644 
1645 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1646 
1647 		if ((sc->flags & FW_UPTODATE) == 0)
1648 			if ((err = upgrade_fw(sc)))
1649 				goto out;
1650 
1651 		if ((sc->flags & TPS_UPTODATE) == 0)
1652 			if ((err = update_tpsram(sc)))
1653 				goto out;
1654 
1655 		if (is_offload(sc) && nfilters != 0) {
1656 			sc->params.mc5.nservers = 0;
1657 
1658 			if (nfilters < 0)
1659 				sc->params.mc5.nfilters = mxf;
1660 			else
1661 				sc->params.mc5.nfilters = min(nfilters, mxf);
1662 		}
1663 
1664 		err = t3_init_hw(sc, 0);
1665 		if (err)
1666 			goto out;
1667 
1668 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1669 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1670 
1671 		err = setup_sge_qsets(sc);
1672 		if (err)
1673 			goto out;
1674 
1675 		alloc_filters(sc);
1676 		setup_rss(sc);
1677 
1678 		t3_intr_clear(sc);
1679 		err = cxgb_setup_interrupts(sc);
1680 		if (err)
1681 			goto out;
1682 
1683 		t3_add_configured_sysctls(sc);
1684 		sc->flags |= FULL_INIT_DONE;
1685 	}
1686 
1687 	t3_intr_clear(sc);
1688 	t3_sge_start(sc);
1689 	t3_intr_enable(sc);
1690 
1691 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1692 	    is_offload(sc) && init_tp_parity(sc) == 0)
1693 		sc->flags |= TP_PARITY_INIT;
1694 
1695 	if (sc->flags & TP_PARITY_INIT) {
1696 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1697 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1698 	}
1699 
1700 	if (!(sc->flags & QUEUES_BOUND)) {
1701 		bind_qsets(sc);
1702 		setup_hw_filters(sc);
1703 		sc->flags |= QUEUES_BOUND;
1704 	}
1705 
1706 	t3_sge_reset_adapter(sc);
1707 out:
1708 	return (err);
1709 }
1710 
1711 /*
1712  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1713  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1714  * during controller_detach, not here.
1715  */
1716 static void
1717 cxgb_down(struct adapter *sc)
1718 {
1719 	t3_sge_stop(sc);
1720 	t3_intr_disable(sc);
1721 }
1722 
1723 static int
1724 offload_open(struct port_info *pi)
1725 {
1726 	struct adapter *sc = pi->adapter;
1727 	struct t3cdev *tdev = &sc->tdev;
1728 
1729 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1730 
1731 	t3_tp_set_offload_mode(sc, 1);
1732 	tdev->lldev = pi->ifp;
1733 	init_port_mtus(sc);
1734 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1735 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1736 	init_smt(sc);
1737 	cxgb_add_clients(tdev);
1738 
1739 	return (0);
1740 }
1741 
1742 static int
1743 offload_close(struct t3cdev *tdev)
1744 {
1745 	struct adapter *adapter = tdev2adap(tdev);
1746 
1747 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1748 		return (0);
1749 
1750 	/* Call back all registered clients */
1751 	cxgb_remove_clients(tdev);
1752 
1753 	tdev->lldev = NULL;
1754 	cxgb_set_dummy_ops(tdev);
1755 	t3_tp_set_offload_mode(adapter, 0);
1756 
1757 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1758 
1759 	return (0);
1760 }
1761 
1762 /*
1763  * if_init for cxgb ports.
1764  */
1765 static void
1766 cxgb_init(void *arg)
1767 {
1768 	struct port_info *p = arg;
1769 	struct adapter *sc = p->adapter;
1770 
1771 	ADAPTER_LOCK(sc);
1772 	cxgb_init_locked(p); /* releases adapter lock */
1773 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1774 }
1775 
1776 static int
1777 cxgb_init_locked(struct port_info *p)
1778 {
1779 	struct adapter *sc = p->adapter;
1780 	struct ifnet *ifp = p->ifp;
1781 	struct cmac *mac = &p->mac;
1782 	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1783 
1784 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1785 
1786 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1787 		gave_up_lock = 1;
1788 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1789 			rc = EINTR;
1790 			goto done;
1791 		}
1792 	}
1793 	if (IS_DOOMED(p)) {
1794 		rc = ENXIO;
1795 		goto done;
1796 	}
1797 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1798 
1799 	/*
1800 	 * The code that runs during one-time adapter initialization can sleep
1801 	 * so it's important not to hold any locks across it.
1802 	 */
1803 	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1804 
1805 	if (may_sleep) {
1806 		SET_BUSY(sc);
1807 		gave_up_lock = 1;
1808 		ADAPTER_UNLOCK(sc);
1809 	}
1810 
1811 	if (sc->open_device_map == 0) {
1812 		if ((rc = cxgb_up(sc)) != 0)
1813 			goto done;
1814 
1815 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1816 			log(LOG_WARNING,
1817 			    "Could not initialize offload capabilities\n");
1818 	}
1819 
1820 	PORT_LOCK(p);
1821 	if (isset(&sc->open_device_map, p->port_id) &&
1822 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1823 		PORT_UNLOCK(p);
1824 		goto done;
1825 	}
1826 	t3_port_intr_enable(sc, p->port_id);
1827 	if (!mac->multiport)
1828 		t3_mac_init(mac);
1829 	cxgb_update_mac_settings(p);
1830 	t3_link_start(&p->phy, mac, &p->link_config);
1831 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1832 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1833 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1834 	PORT_UNLOCK(p);
1835 
1836 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1837 		struct sge_qset *qs = &sc->sge.qs[i];
1838 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1839 
1840 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1841 				 txq->txq_watchdog.c_cpu);
1842 	}
1843 
1844 	/* all ok */
1845 	setbit(&sc->open_device_map, p->port_id);
1846 	callout_reset(&p->link_check_ch,
1847 	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1848 	    link_check_callout, p);
1849 
1850 done:
1851 	if (may_sleep) {
1852 		ADAPTER_LOCK(sc);
1853 		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1854 		CLR_BUSY(sc);
1855 	}
1856 	if (gave_up_lock)
1857 		wakeup_one(&sc->flags);
1858 	ADAPTER_UNLOCK(sc);
1859 	return (rc);
1860 }
1861 
1862 static int
1863 cxgb_uninit_locked(struct port_info *p)
1864 {
1865 	struct adapter *sc = p->adapter;
1866 	int rc;
1867 
1868 	ADAPTER_LOCK_ASSERT_OWNED(sc);
1869 
1870 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1871 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1872 			rc = EINTR;
1873 			goto done;
1874 		}
1875 	}
1876 	if (IS_DOOMED(p)) {
1877 		rc = ENXIO;
1878 		goto done;
1879 	}
1880 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1881 	SET_BUSY(sc);
1882 	ADAPTER_UNLOCK(sc);
1883 
1884 	rc = cxgb_uninit_synchronized(p);
1885 
1886 	ADAPTER_LOCK(sc);
1887 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1888 	CLR_BUSY(sc);
1889 	wakeup_one(&sc->flags);
1890 done:
1891 	ADAPTER_UNLOCK(sc);
1892 	return (rc);
1893 }
1894 
1895 /*
1896  * Called on "ifconfig down", and from port_detach
1897  */
1898 static int
1899 cxgb_uninit_synchronized(struct port_info *pi)
1900 {
1901 	struct adapter *sc = pi->adapter;
1902 	struct ifnet *ifp = pi->ifp;
1903 
1904 	/*
1905 	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1906 	 */
1907 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1908 
1909 	/*
1910 	 * Clear this port's bit from the open device map, and then drain all
1911 	 * the tasks that can access/manipulate this port's port_info or ifp.
1912 	 * We disable this port's interrupts here and so the slow/ext
1913 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1914 	 * be enqueued every second but the runs after this drain will not see
1915 	 * this port in the open device map.
1916 	 *
1917 	 * A well behaved task must take open_device_map into account and ignore
1918 	 * ports that are not open.
1919 	 */
1920 	clrbit(&sc->open_device_map, pi->port_id);
1921 	t3_port_intr_disable(sc, pi->port_id);
1922 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1923 	taskqueue_drain(sc->tq, &sc->tick_task);
1924 
1925 	callout_drain(&pi->link_check_ch);
1926 	taskqueue_drain(sc->tq, &pi->link_check_task);
1927 
1928 	PORT_LOCK(pi);
1929 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1930 
1931 	/* disable pause frames */
1932 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1933 
1934 	/* Reset RX FIFO HWM */
1935 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1936 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1937 
1938 	DELAY(100 * 1000);
1939 
1940 	/* Wait for TXFIFO empty */
1941 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1942 			F_TXFIFO_EMPTY, 1, 20, 5);
1943 
1944 	DELAY(100 * 1000);
1945 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1946 
1947 
1948 	pi->phy.ops->power_down(&pi->phy, 1);
1949 
1950 	PORT_UNLOCK(pi);
1951 
1952 	pi->link_config.link_ok = 0;
1953 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1954 
1955 	if ((sc->open_device_map & PORT_MASK) == 0)
1956 		offload_close(&sc->tdev);
1957 
1958 	if (sc->open_device_map == 0)
1959 		cxgb_down(pi->adapter);
1960 
1961 	return (0);
1962 }
1963 
1964 /*
1965  * Mark lro enabled or disabled in all qsets for this port
1966  */
1967 static int
1968 cxgb_set_lro(struct port_info *p, int enabled)
1969 {
1970 	int i;
1971 	struct adapter *adp = p->adapter;
1972 	struct sge_qset *q;
1973 
1974 	for (i = 0; i < p->nqsets; i++) {
1975 		q = &adp->sge.qs[p->first_qset + i];
1976 		q->lro.enabled = (enabled != 0);
1977 	}
1978 	return (0);
1979 }
1980 
1981 static int
1982 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1983 {
1984 	struct port_info *p = ifp->if_softc;
1985 	struct adapter *sc = p->adapter;
1986 	struct ifreq *ifr = (struct ifreq *)data;
1987 	int flags, error = 0, mtu;
1988 	uint32_t mask;
1989 
1990 	switch (command) {
1991 	case SIOCSIFMTU:
1992 		ADAPTER_LOCK(sc);
1993 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1994 		if (error) {
1995 fail:
1996 			ADAPTER_UNLOCK(sc);
1997 			return (error);
1998 		}
1999 
2000 		mtu = ifr->ifr_mtu;
2001 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2002 			error = EINVAL;
2003 		} else {
2004 			ifp->if_mtu = mtu;
2005 			PORT_LOCK(p);
2006 			cxgb_update_mac_settings(p);
2007 			PORT_UNLOCK(p);
2008 		}
2009 		ADAPTER_UNLOCK(sc);
2010 		break;
2011 	case SIOCSIFFLAGS:
2012 		ADAPTER_LOCK(sc);
2013 		if (IS_DOOMED(p)) {
2014 			error = ENXIO;
2015 			goto fail;
2016 		}
2017 		if (ifp->if_flags & IFF_UP) {
2018 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2019 				flags = p->if_flags;
2020 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2021 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2022 					if (IS_BUSY(sc)) {
2023 						error = EBUSY;
2024 						goto fail;
2025 					}
2026 					PORT_LOCK(p);
2027 					cxgb_update_mac_settings(p);
2028 					PORT_UNLOCK(p);
2029 				}
2030 				ADAPTER_UNLOCK(sc);
2031 			} else
2032 				error = cxgb_init_locked(p);
2033 			p->if_flags = ifp->if_flags;
2034 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2035 			error = cxgb_uninit_locked(p);
2036 		else
2037 			ADAPTER_UNLOCK(sc);
2038 
2039 		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
2040 		break;
2041 	case SIOCADDMULTI:
2042 	case SIOCDELMULTI:
2043 		ADAPTER_LOCK(sc);
2044 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2045 		if (error)
2046 			goto fail;
2047 
2048 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2049 			PORT_LOCK(p);
2050 			cxgb_update_mac_settings(p);
2051 			PORT_UNLOCK(p);
2052 		}
2053 		ADAPTER_UNLOCK(sc);
2054 
2055 		break;
2056 	case SIOCSIFCAP:
2057 		ADAPTER_LOCK(sc);
2058 		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
2059 		if (error)
2060 			goto fail;
2061 
2062 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2063 		if (mask & IFCAP_TXCSUM) {
2064 			ifp->if_capenable ^= IFCAP_TXCSUM;
2065 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2066 
2067 			if (IFCAP_TSO & ifp->if_capenable &&
2068 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2069 				ifp->if_capenable &= ~IFCAP_TSO;
2070 				ifp->if_hwassist &= ~CSUM_TSO;
2071 				if_printf(ifp,
2072 				    "tso disabled due to -txcsum.\n");
2073 			}
2074 		}
2075 		if (mask & IFCAP_RXCSUM)
2076 			ifp->if_capenable ^= IFCAP_RXCSUM;
2077 		if (mask & IFCAP_TSO4) {
2078 			ifp->if_capenable ^= IFCAP_TSO4;
2079 
2080 			if (IFCAP_TSO & ifp->if_capenable) {
2081 				if (IFCAP_TXCSUM & ifp->if_capenable)
2082 					ifp->if_hwassist |= CSUM_TSO;
2083 				else {
2084 					ifp->if_capenable &= ~IFCAP_TSO;
2085 					ifp->if_hwassist &= ~CSUM_TSO;
2086 					if_printf(ifp,
2087 					    "enable txcsum first.\n");
2088 					error = EAGAIN;
2089 				}
2090 			} else
2091 				ifp->if_hwassist &= ~CSUM_TSO;
2092 		}
2093 		if (mask & IFCAP_LRO) {
2094 			ifp->if_capenable ^= IFCAP_LRO;
2095 
2096 			/* Safe to do this even if cxgb_up not called yet */
2097 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2098 		}
2099 		if (mask & IFCAP_VLAN_HWTAGGING) {
2100 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2101 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2102 				PORT_LOCK(p);
2103 				cxgb_update_mac_settings(p);
2104 				PORT_UNLOCK(p);
2105 			}
2106 		}
2107 		if (mask & IFCAP_VLAN_MTU) {
2108 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2109 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2110 				PORT_LOCK(p);
2111 				cxgb_update_mac_settings(p);
2112 				PORT_UNLOCK(p);
2113 			}
2114 		}
2115 		if (mask & IFCAP_VLAN_HWTSO)
2116 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2117 		if (mask & IFCAP_VLAN_HWCSUM)
2118 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2119 
2120 #ifdef VLAN_CAPABILITIES
2121 		VLAN_CAPABILITIES(ifp);
2122 #endif
2123 		ADAPTER_UNLOCK(sc);
2124 		break;
2125 	case SIOCSIFMEDIA:
2126 	case SIOCGIFMEDIA:
2127 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2128 		break;
2129 	default:
2130 		error = ether_ioctl(ifp, command, data);
2131 	}
2132 
2133 	return (error);
2134 }
2135 
2136 static int
2137 cxgb_media_change(struct ifnet *ifp)
2138 {
2139 	return (EOPNOTSUPP);
2140 }
2141 
2142 /*
2143  * Translates phy->modtype to the correct Ethernet media subtype.
2144  */
2145 static int
2146 cxgb_ifm_type(int mod)
2147 {
2148 	switch (mod) {
2149 	case phy_modtype_sr:
2150 		return (IFM_10G_SR);
2151 	case phy_modtype_lr:
2152 		return (IFM_10G_LR);
2153 	case phy_modtype_lrm:
2154 		return (IFM_10G_LRM);
2155 	case phy_modtype_twinax:
2156 		return (IFM_10G_TWINAX);
2157 	case phy_modtype_twinax_long:
2158 		return (IFM_10G_TWINAX_LONG);
2159 	case phy_modtype_none:
2160 		return (IFM_NONE);
2161 	case phy_modtype_unknown:
2162 		return (IFM_UNKNOWN);
2163 	}
2164 
2165 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2166 	return (IFM_UNKNOWN);
2167 }
2168 
2169 /*
2170  * Rebuilds the ifmedia list for this port, and sets the current media.
2171  */
2172 static void
2173 cxgb_build_medialist(struct port_info *p)
2174 {
2175 	struct cphy *phy = &p->phy;
2176 	struct ifmedia *media = &p->media;
2177 	int mod = phy->modtype;
2178 	int m = IFM_ETHER | IFM_FDX;
2179 
2180 	PORT_LOCK(p);
2181 
2182 	ifmedia_removeall(media);
2183 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2184 		/* Copper (RJ45) */
2185 
2186 		if (phy->caps & SUPPORTED_10000baseT_Full)
2187 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2188 
2189 		if (phy->caps & SUPPORTED_1000baseT_Full)
2190 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2191 
2192 		if (phy->caps & SUPPORTED_100baseT_Full)
2193 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2194 
2195 		if (phy->caps & SUPPORTED_10baseT_Full)
2196 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2197 
2198 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2199 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2200 
2201 	} else if (phy->caps & SUPPORTED_TP) {
2202 		/* Copper (CX4) */
2203 
2204 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2205 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2206 
2207 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2208 		ifmedia_set(media, m | IFM_10G_CX4);
2209 
2210 	} else if (phy->caps & SUPPORTED_FIBRE &&
2211 		   phy->caps & SUPPORTED_10000baseT_Full) {
2212 		/* 10G optical (but includes SFP+ twinax) */
2213 
2214 		m |= cxgb_ifm_type(mod);
2215 		if (IFM_SUBTYPE(m) == IFM_NONE)
2216 			m &= ~IFM_FDX;
2217 
2218 		ifmedia_add(media, m, mod, NULL);
2219 		ifmedia_set(media, m);
2220 
2221 	} else if (phy->caps & SUPPORTED_FIBRE &&
2222 		   phy->caps & SUPPORTED_1000baseT_Full) {
2223 		/* 1G optical */
2224 
2225 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2226 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2227 		ifmedia_set(media, m | IFM_1000_SX);
2228 
2229 	} else {
2230 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2231 			    phy->caps));
2232 	}
2233 
2234 	PORT_UNLOCK(p);
2235 }
2236 
2237 static void
2238 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2239 {
2240 	struct port_info *p = ifp->if_softc;
2241 	struct ifmedia_entry *cur = p->media.ifm_cur;
2242 	int speed = p->link_config.speed;
2243 
2244 	if (cur->ifm_data != p->phy.modtype) {
2245 		cxgb_build_medialist(p);
2246 		cur = p->media.ifm_cur;
2247 	}
2248 
2249 	ifmr->ifm_status = IFM_AVALID;
2250 	if (!p->link_config.link_ok)
2251 		return;
2252 
2253 	ifmr->ifm_status |= IFM_ACTIVE;
2254 
2255 	/*
2256 	 * active and current will differ iff current media is autoselect.  That
2257 	 * can happen only for copper RJ45.
2258 	 */
2259 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2260 		return;
2261 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2262 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2263 
2264 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2265 	if (speed == SPEED_10000)
2266 		ifmr->ifm_active |= IFM_10G_T;
2267 	else if (speed == SPEED_1000)
2268 		ifmr->ifm_active |= IFM_1000_T;
2269 	else if (speed == SPEED_100)
2270 		ifmr->ifm_active |= IFM_100_TX;
2271 	else if (speed == SPEED_10)
2272 		ifmr->ifm_active |= IFM_10_T;
2273 	else
2274 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2275 			    speed));
2276 }
2277 
2278 static void
2279 cxgb_async_intr(void *data)
2280 {
2281 	adapter_t *sc = data;
2282 
2283 	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2284 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2285 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2286 }
2287 
2288 static void
2289 link_check_callout(void *arg)
2290 {
2291 	struct port_info *pi = arg;
2292 	struct adapter *sc = pi->adapter;
2293 
2294 	if (!isset(&sc->open_device_map, pi->port_id))
2295 		return;
2296 
2297 	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2298 }
2299 
2300 static void
2301 check_link_status(void *arg, int pending)
2302 {
2303 	struct port_info *pi = arg;
2304 	struct adapter *sc = pi->adapter;
2305 
2306 	if (!isset(&sc->open_device_map, pi->port_id))
2307 		return;
2308 
2309 	t3_link_changed(sc, pi->port_id);
2310 
2311 	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ))
2312 		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2313 }
2314 
2315 void
2316 t3_os_link_intr(struct port_info *pi)
2317 {
2318 	/*
2319 	 * Schedule a link check in the near future.  If the link is flapping
2320 	 * rapidly we'll keep resetting the callout and delaying the check until
2321 	 * things stabilize a bit.
2322 	 */
2323 	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2324 }
2325 
2326 static void
2327 check_t3b2_mac(struct adapter *sc)
2328 {
2329 	int i;
2330 
2331 	if (sc->flags & CXGB_SHUTDOWN)
2332 		return;
2333 
2334 	for_each_port(sc, i) {
2335 		struct port_info *p = &sc->port[i];
2336 		int status;
2337 #ifdef INVARIANTS
2338 		struct ifnet *ifp = p->ifp;
2339 #endif
2340 
2341 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2342 		    !p->link_config.link_ok)
2343 			continue;
2344 
2345 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2346 			("%s: state mismatch (drv_flags %x, device_map %x)",
2347 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2348 
2349 		PORT_LOCK(p);
2350 		status = t3b2_mac_watchdog_task(&p->mac);
2351 		if (status == 1)
2352 			p->mac.stats.num_toggled++;
2353 		else if (status == 2) {
2354 			struct cmac *mac = &p->mac;
2355 
2356 			cxgb_update_mac_settings(p);
2357 			t3_link_start(&p->phy, mac, &p->link_config);
2358 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2359 			t3_port_intr_enable(sc, p->port_id);
2360 			p->mac.stats.num_resets++;
2361 		}
2362 		PORT_UNLOCK(p);
2363 	}
2364 }
2365 
2366 static void
2367 cxgb_tick(void *arg)
2368 {
2369 	adapter_t *sc = (adapter_t *)arg;
2370 
2371 	if (sc->flags & CXGB_SHUTDOWN)
2372 		return;
2373 
2374 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2375 	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2376 }
2377 
2378 static void
2379 cxgb_tick_handler(void *arg, int count)
2380 {
2381 	adapter_t *sc = (adapter_t *)arg;
2382 	const struct adapter_params *p = &sc->params;
2383 	int i;
2384 	uint32_t cause, reset;
2385 
2386 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2387 		return;
2388 
2389 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2390 		check_t3b2_mac(sc);
2391 
2392 	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2393 	if (cause) {
2394 		struct sge_qset *qs = &sc->sge.qs[0];
2395 		uint32_t mask, v;
2396 
2397 		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2398 
2399 		mask = 1;
2400 		for (i = 0; i < SGE_QSETS; i++) {
2401 			if (v & mask)
2402 				qs[i].rspq.starved++;
2403 			mask <<= 1;
2404 		}
2405 
2406 		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2407 
2408 		for (i = 0; i < SGE_QSETS * 2; i++) {
2409 			if (v & mask) {
2410 				qs[i / 2].fl[i % 2].empty++;
2411 			}
2412 			mask <<= 1;
2413 		}
2414 
2415 		/* clear */
2416 		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2417 		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2418 	}
2419 
2420 	for (i = 0; i < sc->params.nports; i++) {
2421 		struct port_info *pi = &sc->port[i];
2422 		struct ifnet *ifp = pi->ifp;
2423 		struct cmac *mac = &pi->mac;
2424 		struct mac_stats *mstats = &mac->stats;
2425 		int drops, j;
2426 
2427 		if (!isset(&sc->open_device_map, pi->port_id))
2428 			continue;
2429 
2430 		PORT_LOCK(pi);
2431 		t3_mac_update_stats(mac);
2432 		PORT_UNLOCK(pi);
2433 
2434 		ifp->if_opackets = mstats->tx_frames;
2435 		ifp->if_ipackets = mstats->rx_frames;
2436 		ifp->if_obytes = mstats->tx_octets;
2437 		ifp->if_ibytes = mstats->rx_octets;
2438 		ifp->if_omcasts = mstats->tx_mcast_frames;
2439 		ifp->if_imcasts = mstats->rx_mcast_frames;
2440 		ifp->if_collisions = mstats->tx_total_collisions;
2441 		ifp->if_iqdrops = mstats->rx_cong_drops;
2442 
2443 		drops = 0;
2444 		for (j = pi->first_qset; j < pi->first_qset + pi->nqsets; j++)
2445 			drops += sc->sge.qs[j].txq[TXQ_ETH].txq_mr->br_drops;
2446 		ifp->if_snd.ifq_drops = drops;
2447 
2448 		ifp->if_oerrors =
2449 		    mstats->tx_excess_collisions +
2450 		    mstats->tx_underrun +
2451 		    mstats->tx_len_errs +
2452 		    mstats->tx_mac_internal_errs +
2453 		    mstats->tx_excess_deferral +
2454 		    mstats->tx_fcs_errs;
2455 		ifp->if_ierrors =
2456 		    mstats->rx_jabber +
2457 		    mstats->rx_data_errs +
2458 		    mstats->rx_sequence_errs +
2459 		    mstats->rx_runt +
2460 		    mstats->rx_too_long +
2461 		    mstats->rx_mac_internal_errs +
2462 		    mstats->rx_short +
2463 		    mstats->rx_fcs_errs;
2464 
2465 		if (mac->multiport)
2466 			continue;
2467 
2468 		/* Count rx fifo overflows, once per second */
2469 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2470 		reset = 0;
2471 		if (cause & F_RXFIFO_OVERFLOW) {
2472 			mac->stats.rx_fifo_ovfl++;
2473 			reset |= F_RXFIFO_OVERFLOW;
2474 		}
2475 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2476 	}
2477 }
2478 
2479 static void
2480 touch_bars(device_t dev)
2481 {
2482 	/*
2483 	 * Don't enable yet
2484 	 */
2485 #if !defined(__LP64__) && 0
2486 	u32 v;
2487 
2488 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2489 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2490 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2491 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2492 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2493 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2494 #endif
2495 }
2496 
2497 static int
2498 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2499 {
2500 	uint8_t *buf;
2501 	int err = 0;
2502 	u32 aligned_offset, aligned_len, *p;
2503 	struct adapter *adapter = pi->adapter;
2504 
2505 
2506 	aligned_offset = offset & ~3;
2507 	aligned_len = (len + (offset & 3) + 3) & ~3;
2508 
2509 	if (aligned_offset != offset || aligned_len != len) {
2510 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2511 		if (!buf)
2512 			return (ENOMEM);
2513 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2514 		if (!err && aligned_len > 4)
2515 			err = t3_seeprom_read(adapter,
2516 					      aligned_offset + aligned_len - 4,
2517 					      (u32 *)&buf[aligned_len - 4]);
2518 		if (err)
2519 			goto out;
2520 		memcpy(buf + (offset & 3), data, len);
2521 	} else
2522 		buf = (uint8_t *)(uintptr_t)data;
2523 
2524 	err = t3_seeprom_wp(adapter, 0);
2525 	if (err)
2526 		goto out;
2527 
2528 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2529 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2530 		aligned_offset += 4;
2531 	}
2532 
2533 	if (!err)
2534 		err = t3_seeprom_wp(adapter, 1);
2535 out:
2536 	if (buf != data)
2537 		free(buf, M_DEVBUF);
2538 	return err;
2539 }
2540 
2541 
2542 static int
2543 in_range(int val, int lo, int hi)
2544 {
2545 	return val < 0 || (val <= hi && val >= lo);
2546 }
2547 
2548 static int
2549 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2550 {
2551        return (0);
2552 }
2553 
2554 static int
2555 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2556 {
2557        return (0);
2558 }
2559 
2560 static int
2561 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2562     int fflag, struct thread *td)
2563 {
2564 	int mmd, error = 0;
2565 	struct port_info *pi = dev->si_drv1;
2566 	adapter_t *sc = pi->adapter;
2567 
2568 #ifdef PRIV_SUPPORTED
2569 	if (priv_check(td, PRIV_DRIVER)) {
2570 		if (cxgb_debug)
2571 			printf("user does not have access to privileged ioctls\n");
2572 		return (EPERM);
2573 	}
2574 #else
2575 	if (suser(td)) {
2576 		if (cxgb_debug)
2577 			printf("user does not have access to privileged ioctls\n");
2578 		return (EPERM);
2579 	}
2580 #endif
2581 
2582 	switch (cmd) {
2583 	case CHELSIO_GET_MIIREG: {
2584 		uint32_t val;
2585 		struct cphy *phy = &pi->phy;
2586 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2587 
2588 		if (!phy->mdio_read)
2589 			return (EOPNOTSUPP);
2590 		if (is_10G(sc)) {
2591 			mmd = mid->phy_id >> 8;
2592 			if (!mmd)
2593 				mmd = MDIO_DEV_PCS;
2594 			else if (mmd > MDIO_DEV_VEND2)
2595 				return (EINVAL);
2596 
2597 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2598 					     mid->reg_num, &val);
2599 		} else
2600 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2601 					     mid->reg_num & 0x1f, &val);
2602 		if (error == 0)
2603 			mid->val_out = val;
2604 		break;
2605 	}
2606 	case CHELSIO_SET_MIIREG: {
2607 		struct cphy *phy = &pi->phy;
2608 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2609 
2610 		if (!phy->mdio_write)
2611 			return (EOPNOTSUPP);
2612 		if (is_10G(sc)) {
2613 			mmd = mid->phy_id >> 8;
2614 			if (!mmd)
2615 				mmd = MDIO_DEV_PCS;
2616 			else if (mmd > MDIO_DEV_VEND2)
2617 				return (EINVAL);
2618 
2619 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2620 					      mmd, mid->reg_num, mid->val_in);
2621 		} else
2622 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2623 					      mid->reg_num & 0x1f,
2624 					      mid->val_in);
2625 		break;
2626 	}
2627 	case CHELSIO_SETREG: {
2628 		struct ch_reg *edata = (struct ch_reg *)data;
2629 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2630 			return (EFAULT);
2631 		t3_write_reg(sc, edata->addr, edata->val);
2632 		break;
2633 	}
2634 	case CHELSIO_GETREG: {
2635 		struct ch_reg *edata = (struct ch_reg *)data;
2636 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2637 			return (EFAULT);
2638 		edata->val = t3_read_reg(sc, edata->addr);
2639 		break;
2640 	}
2641 	case CHELSIO_GET_SGE_CONTEXT: {
2642 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2643 		mtx_lock_spin(&sc->sge.reg_lock);
2644 		switch (ecntxt->cntxt_type) {
2645 		case CNTXT_TYPE_EGRESS:
2646 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2647 			    ecntxt->data);
2648 			break;
2649 		case CNTXT_TYPE_FL:
2650 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2651 			    ecntxt->data);
2652 			break;
2653 		case CNTXT_TYPE_RSP:
2654 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2655 			    ecntxt->data);
2656 			break;
2657 		case CNTXT_TYPE_CQ:
2658 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2659 			    ecntxt->data);
2660 			break;
2661 		default:
2662 			error = EINVAL;
2663 			break;
2664 		}
2665 		mtx_unlock_spin(&sc->sge.reg_lock);
2666 		break;
2667 	}
2668 	case CHELSIO_GET_SGE_DESC: {
2669 		struct ch_desc *edesc = (struct ch_desc *)data;
2670 		int ret;
2671 		if (edesc->queue_num >= SGE_QSETS * 6)
2672 			return (EINVAL);
2673 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2674 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2675 		if (ret < 0)
2676 			return (EINVAL);
2677 		edesc->size = ret;
2678 		break;
2679 	}
2680 	case CHELSIO_GET_QSET_PARAMS: {
2681 		struct qset_params *q;
2682 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2683 		int q1 = pi->first_qset;
2684 		int nqsets = pi->nqsets;
2685 		int i;
2686 
2687 		if (t->qset_idx >= nqsets)
2688 			return EINVAL;
2689 
2690 		i = q1 + t->qset_idx;
2691 		q = &sc->params.sge.qset[i];
2692 		t->rspq_size   = q->rspq_size;
2693 		t->txq_size[0] = q->txq_size[0];
2694 		t->txq_size[1] = q->txq_size[1];
2695 		t->txq_size[2] = q->txq_size[2];
2696 		t->fl_size[0]  = q->fl_size;
2697 		t->fl_size[1]  = q->jumbo_size;
2698 		t->polling     = q->polling;
2699 		t->lro         = q->lro;
2700 		t->intr_lat    = q->coalesce_usecs;
2701 		t->cong_thres  = q->cong_thres;
2702 		t->qnum        = i;
2703 
2704 		if ((sc->flags & FULL_INIT_DONE) == 0)
2705 			t->vector = 0;
2706 		else if (sc->flags & USING_MSIX)
2707 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2708 		else
2709 			t->vector = rman_get_start(sc->irq_res);
2710 
2711 		break;
2712 	}
2713 	case CHELSIO_GET_QSET_NUM: {
2714 		struct ch_reg *edata = (struct ch_reg *)data;
2715 		edata->val = pi->nqsets;
2716 		break;
2717 	}
2718 	case CHELSIO_LOAD_FW: {
2719 		uint8_t *fw_data;
2720 		uint32_t vers;
2721 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2722 
2723 		/*
2724 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2725 		 *
2726 		 * FW_UPTODATE is also set so the rest of the initialization
2727 		 * will not overwrite what was loaded here.  This gives you the
2728 		 * flexibility to load any firmware (and maybe shoot yourself in
2729 		 * the foot).
2730 		 */
2731 
2732 		ADAPTER_LOCK(sc);
2733 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2734 			ADAPTER_UNLOCK(sc);
2735 			return (EBUSY);
2736 		}
2737 
2738 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2739 		if (!fw_data)
2740 			error = ENOMEM;
2741 		else
2742 			error = copyin(t->buf, fw_data, t->len);
2743 
2744 		if (!error)
2745 			error = -t3_load_fw(sc, fw_data, t->len);
2746 
2747 		if (t3_get_fw_version(sc, &vers) == 0) {
2748 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2749 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2750 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2751 		}
2752 
2753 		if (!error)
2754 			sc->flags |= FW_UPTODATE;
2755 
2756 		free(fw_data, M_DEVBUF);
2757 		ADAPTER_UNLOCK(sc);
2758 		break;
2759 	}
2760 	case CHELSIO_LOAD_BOOT: {
2761 		uint8_t *boot_data;
2762 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2763 
2764 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2765 		if (!boot_data)
2766 			return ENOMEM;
2767 
2768 		error = copyin(t->buf, boot_data, t->len);
2769 		if (!error)
2770 			error = -t3_load_boot(sc, boot_data, t->len);
2771 
2772 		free(boot_data, M_DEVBUF);
2773 		break;
2774 	}
2775 	case CHELSIO_GET_PM: {
2776 		struct ch_pm *m = (struct ch_pm *)data;
2777 		struct tp_params *p = &sc->params.tp;
2778 
2779 		if (!is_offload(sc))
2780 			return (EOPNOTSUPP);
2781 
2782 		m->tx_pg_sz = p->tx_pg_size;
2783 		m->tx_num_pg = p->tx_num_pgs;
2784 		m->rx_pg_sz  = p->rx_pg_size;
2785 		m->rx_num_pg = p->rx_num_pgs;
2786 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2787 
2788 		break;
2789 	}
2790 	case CHELSIO_SET_PM: {
2791 		struct ch_pm *m = (struct ch_pm *)data;
2792 		struct tp_params *p = &sc->params.tp;
2793 
2794 		if (!is_offload(sc))
2795 			return (EOPNOTSUPP);
2796 		if (sc->flags & FULL_INIT_DONE)
2797 			return (EBUSY);
2798 
2799 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2800 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2801 			return (EINVAL);	/* not power of 2 */
2802 		if (!(m->rx_pg_sz & 0x14000))
2803 			return (EINVAL);	/* not 16KB or 64KB */
2804 		if (!(m->tx_pg_sz & 0x1554000))
2805 			return (EINVAL);
2806 		if (m->tx_num_pg == -1)
2807 			m->tx_num_pg = p->tx_num_pgs;
2808 		if (m->rx_num_pg == -1)
2809 			m->rx_num_pg = p->rx_num_pgs;
2810 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2811 			return (EINVAL);
2812 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2813 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2814 			return (EINVAL);
2815 
2816 		p->rx_pg_size = m->rx_pg_sz;
2817 		p->tx_pg_size = m->tx_pg_sz;
2818 		p->rx_num_pgs = m->rx_num_pg;
2819 		p->tx_num_pgs = m->tx_num_pg;
2820 		break;
2821 	}
2822 	case CHELSIO_SETMTUTAB: {
2823 		struct ch_mtus *m = (struct ch_mtus *)data;
2824 		int i;
2825 
2826 		if (!is_offload(sc))
2827 			return (EOPNOTSUPP);
2828 		if (offload_running(sc))
2829 			return (EBUSY);
2830 		if (m->nmtus != NMTUS)
2831 			return (EINVAL);
2832 		if (m->mtus[0] < 81)         /* accommodate SACK */
2833 			return (EINVAL);
2834 
2835 		/*
2836 		 * MTUs must be in ascending order
2837 		 */
2838 		for (i = 1; i < NMTUS; ++i)
2839 			if (m->mtus[i] < m->mtus[i - 1])
2840 				return (EINVAL);
2841 
2842 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2843 		break;
2844 	}
2845 	case CHELSIO_GETMTUTAB: {
2846 		struct ch_mtus *m = (struct ch_mtus *)data;
2847 
2848 		if (!is_offload(sc))
2849 			return (EOPNOTSUPP);
2850 
2851 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2852 		m->nmtus = NMTUS;
2853 		break;
2854 	}
2855 	case CHELSIO_GET_MEM: {
2856 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2857 		struct mc7 *mem;
2858 		uint8_t *useraddr;
2859 		u64 buf[32];
2860 
2861 		/*
2862 		 * Use these to avoid modifying len/addr in the return
2863 		 * struct
2864 		 */
2865 		uint32_t len = t->len, addr = t->addr;
2866 
2867 		if (!is_offload(sc))
2868 			return (EOPNOTSUPP);
2869 		if (!(sc->flags & FULL_INIT_DONE))
2870 			return (EIO);         /* need the memory controllers */
2871 		if ((addr & 0x7) || (len & 0x7))
2872 			return (EINVAL);
2873 		if (t->mem_id == MEM_CM)
2874 			mem = &sc->cm;
2875 		else if (t->mem_id == MEM_PMRX)
2876 			mem = &sc->pmrx;
2877 		else if (t->mem_id == MEM_PMTX)
2878 			mem = &sc->pmtx;
2879 		else
2880 			return (EINVAL);
2881 
2882 		/*
2883 		 * Version scheme:
2884 		 * bits 0..9: chip version
2885 		 * bits 10..15: chip revision
2886 		 */
2887 		t->version = 3 | (sc->params.rev << 10);
2888 
2889 		/*
2890 		 * Read 256 bytes at a time as len can be large and we don't
2891 		 * want to use huge intermediate buffers.
2892 		 */
2893 		useraddr = (uint8_t *)t->buf;
2894 		while (len) {
2895 			unsigned int chunk = min(len, sizeof(buf));
2896 
2897 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2898 			if (error)
2899 				return (-error);
2900 			if (copyout(buf, useraddr, chunk))
2901 				return (EFAULT);
2902 			useraddr += chunk;
2903 			addr += chunk;
2904 			len -= chunk;
2905 		}
2906 		break;
2907 	}
2908 	case CHELSIO_READ_TCAM_WORD: {
2909 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2910 
2911 		if (!is_offload(sc))
2912 			return (EOPNOTSUPP);
2913 		if (!(sc->flags & FULL_INIT_DONE))
2914 			return (EIO);         /* need MC5 */
2915 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2916 		break;
2917 	}
2918 	case CHELSIO_SET_TRACE_FILTER: {
2919 		struct ch_trace *t = (struct ch_trace *)data;
2920 		const struct trace_params *tp;
2921 
2922 		tp = (const struct trace_params *)&t->sip;
2923 		if (t->config_tx)
2924 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2925 					       t->trace_tx);
2926 		if (t->config_rx)
2927 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2928 					       t->trace_rx);
2929 		break;
2930 	}
2931 	case CHELSIO_SET_PKTSCHED: {
2932 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2933 		if (sc->open_device_map == 0)
2934 			return (EAGAIN);
2935 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2936 		    p->binding);
2937 		break;
2938 	}
2939 	case CHELSIO_IFCONF_GETREGS: {
2940 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2941 		int reglen = cxgb_get_regs_len();
2942 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2943 		if (buf == NULL) {
2944 			return (ENOMEM);
2945 		}
2946 		if (regs->len > reglen)
2947 			regs->len = reglen;
2948 		else if (regs->len < reglen)
2949 			error = ENOBUFS;
2950 
2951 		if (!error) {
2952 			cxgb_get_regs(sc, regs, buf);
2953 			error = copyout(buf, regs->data, reglen);
2954 		}
2955 		free(buf, M_DEVBUF);
2956 
2957 		break;
2958 	}
2959 	case CHELSIO_SET_HW_SCHED: {
2960 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2961 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2962 
2963 		if ((sc->flags & FULL_INIT_DONE) == 0)
2964 			return (EAGAIN);       /* need TP to be initialized */
2965 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2966 		    !in_range(t->channel, 0, 1) ||
2967 		    !in_range(t->kbps, 0, 10000000) ||
2968 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2969 		    !in_range(t->flow_ipg, 0,
2970 			      dack_ticks_to_usec(sc, 0x7ff)))
2971 			return (EINVAL);
2972 
2973 		if (t->kbps >= 0) {
2974 			error = t3_config_sched(sc, t->kbps, t->sched);
2975 			if (error < 0)
2976 				return (-error);
2977 		}
2978 		if (t->class_ipg >= 0)
2979 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2980 		if (t->flow_ipg >= 0) {
2981 			t->flow_ipg *= 1000;     /* us -> ns */
2982 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2983 		}
2984 		if (t->mode >= 0) {
2985 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2986 
2987 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2988 					 bit, t->mode ? bit : 0);
2989 		}
2990 		if (t->channel >= 0)
2991 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2992 					 1 << t->sched, t->channel << t->sched);
2993 		break;
2994 	}
2995 	case CHELSIO_GET_EEPROM: {
2996 		int i;
2997 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2998 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2999 
3000 		if (buf == NULL) {
3001 			return (ENOMEM);
3002 		}
3003 		e->magic = EEPROM_MAGIC;
3004 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3005 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3006 
3007 		if (!error)
3008 			error = copyout(buf + e->offset, e->data, e->len);
3009 
3010 		free(buf, M_DEVBUF);
3011 		break;
3012 	}
3013 	case CHELSIO_CLEAR_STATS: {
3014 		if (!(sc->flags & FULL_INIT_DONE))
3015 			return EAGAIN;
3016 
3017 		PORT_LOCK(pi);
3018 		t3_mac_update_stats(&pi->mac);
3019 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3020 		PORT_UNLOCK(pi);
3021 		break;
3022 	}
3023 	case CHELSIO_GET_UP_LA: {
3024 		struct ch_up_la *la = (struct ch_up_la *)data;
3025 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3026 		if (buf == NULL) {
3027 			return (ENOMEM);
3028 		}
3029 		if (la->bufsize < LA_BUFSIZE)
3030 			error = ENOBUFS;
3031 
3032 		if (!error)
3033 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3034 					      &la->bufsize, buf);
3035 		if (!error)
3036 			error = copyout(buf, la->data, la->bufsize);
3037 
3038 		free(buf, M_DEVBUF);
3039 		break;
3040 	}
3041 	case CHELSIO_GET_UP_IOQS: {
3042 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3043 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3044 		uint32_t *v;
3045 
3046 		if (buf == NULL) {
3047 			return (ENOMEM);
3048 		}
3049 		if (ioqs->bufsize < IOQS_BUFSIZE)
3050 			error = ENOBUFS;
3051 
3052 		if (!error)
3053 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3054 
3055 		if (!error) {
3056 			v = (uint32_t *)buf;
3057 
3058 			ioqs->ioq_rx_enable = *v++;
3059 			ioqs->ioq_tx_enable = *v++;
3060 			ioqs->ioq_rx_status = *v++;
3061 			ioqs->ioq_tx_status = *v++;
3062 
3063 			error = copyout(v, ioqs->data, ioqs->bufsize);
3064 		}
3065 
3066 		free(buf, M_DEVBUF);
3067 		break;
3068 	}
3069 	case CHELSIO_SET_FILTER: {
3070 		struct ch_filter *f = (struct ch_filter *)data;;
3071 		struct filter_info *p;
3072 		unsigned int nfilters = sc->params.mc5.nfilters;
3073 
3074 		if (!is_offload(sc))
3075 			return (EOPNOTSUPP);	/* No TCAM */
3076 		if (!(sc->flags & FULL_INIT_DONE))
3077 			return (EAGAIN);	/* mc5 not setup yet */
3078 		if (nfilters == 0)
3079 			return (EBUSY);		/* TOE will use TCAM */
3080 
3081 		/* sanity checks */
3082 		if (f->filter_id >= nfilters ||
3083 		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3084 		    (f->val.sport && f->mask.sport != 0xffff) ||
3085 		    (f->val.dport && f->mask.dport != 0xffff) ||
3086 		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3087 		    (f->val.vlan_prio &&
3088 			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3089 		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3090 		    f->qset >= SGE_QSETS ||
3091 		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3092 			return (EINVAL);
3093 
3094 		/* Was allocated with M_WAITOK */
3095 		KASSERT(sc->filters, ("filter table NULL\n"));
3096 
3097 		p = &sc->filters[f->filter_id];
3098 		if (p->locked)
3099 			return (EPERM);
3100 
3101 		bzero(p, sizeof(*p));
3102 		p->sip = f->val.sip;
3103 		p->sip_mask = f->mask.sip;
3104 		p->dip = f->val.dip;
3105 		p->sport = f->val.sport;
3106 		p->dport = f->val.dport;
3107 		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3108 		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3109 		    FILTER_NO_VLAN_PRI;
3110 		p->mac_hit = f->mac_hit;
3111 		p->mac_vld = f->mac_addr_idx != 0xffff;
3112 		p->mac_idx = f->mac_addr_idx;
3113 		p->pkt_type = f->proto;
3114 		p->report_filter_id = f->want_filter_id;
3115 		p->pass = f->pass;
3116 		p->rss = f->rss;
3117 		p->qset = f->qset;
3118 
3119 		error = set_filter(sc, f->filter_id, p);
3120 		if (error == 0)
3121 			p->valid = 1;
3122 		break;
3123 	}
3124 	case CHELSIO_DEL_FILTER: {
3125 		struct ch_filter *f = (struct ch_filter *)data;
3126 		struct filter_info *p;
3127 		unsigned int nfilters = sc->params.mc5.nfilters;
3128 
3129 		if (!is_offload(sc))
3130 			return (EOPNOTSUPP);
3131 		if (!(sc->flags & FULL_INIT_DONE))
3132 			return (EAGAIN);
3133 		if (nfilters == 0 || sc->filters == NULL)
3134 			return (EINVAL);
3135 		if (f->filter_id >= nfilters)
3136 		       return (EINVAL);
3137 
3138 		p = &sc->filters[f->filter_id];
3139 		if (p->locked)
3140 			return (EPERM);
3141 		if (!p->valid)
3142 			return (EFAULT); /* Read "Bad address" as "Bad index" */
3143 
3144 		bzero(p, sizeof(*p));
3145 		p->sip = p->sip_mask = 0xffffffff;
3146 		p->vlan = 0xfff;
3147 		p->vlan_prio = FILTER_NO_VLAN_PRI;
3148 		p->pkt_type = 1;
3149 		error = set_filter(sc, f->filter_id, p);
3150 		break;
3151 	}
3152 	case CHELSIO_GET_FILTER: {
3153 		struct ch_filter *f = (struct ch_filter *)data;
3154 		struct filter_info *p;
3155 		unsigned int i, nfilters = sc->params.mc5.nfilters;
3156 
3157 		if (!is_offload(sc))
3158 			return (EOPNOTSUPP);
3159 		if (!(sc->flags & FULL_INIT_DONE))
3160 			return (EAGAIN);
3161 		if (nfilters == 0 || sc->filters == NULL)
3162 			return (EINVAL);
3163 
3164 		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3165 		for (; i < nfilters; i++) {
3166 			p = &sc->filters[i];
3167 			if (!p->valid)
3168 				continue;
3169 
3170 			bzero(f, sizeof(*f));
3171 
3172 			f->filter_id = i;
3173 			f->val.sip = p->sip;
3174 			f->mask.sip = p->sip_mask;
3175 			f->val.dip = p->dip;
3176 			f->mask.dip = p->dip ? 0xffffffff : 0;
3177 			f->val.sport = p->sport;
3178 			f->mask.sport = p->sport ? 0xffff : 0;
3179 			f->val.dport = p->dport;
3180 			f->mask.dport = p->dport ? 0xffff : 0;
3181 			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3182 			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3183 			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3184 			    0 : p->vlan_prio;
3185 			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3186 			    0 : FILTER_NO_VLAN_PRI;
3187 			f->mac_hit = p->mac_hit;
3188 			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3189 			f->proto = p->pkt_type;
3190 			f->want_filter_id = p->report_filter_id;
3191 			f->pass = p->pass;
3192 			f->rss = p->rss;
3193 			f->qset = p->qset;
3194 
3195 			break;
3196 		}
3197 
3198 		if (i == nfilters)
3199 			f->filter_id = 0xffffffff;
3200 		break;
3201 	}
3202 	default:
3203 		return (EOPNOTSUPP);
3204 		break;
3205 	}
3206 
3207 	return (error);
3208 }
3209 
3210 static __inline void
3211 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3212     unsigned int end)
3213 {
3214 	uint32_t *p = (uint32_t *)(buf + start);
3215 
3216 	for ( ; start <= end; start += sizeof(uint32_t))
3217 		*p++ = t3_read_reg(ap, start);
3218 }
3219 
3220 #define T3_REGMAP_SIZE (3 * 1024)
3221 static int
3222 cxgb_get_regs_len(void)
3223 {
3224 	return T3_REGMAP_SIZE;
3225 }
3226 
3227 static void
3228 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3229 {
3230 
3231 	/*
3232 	 * Version scheme:
3233 	 * bits 0..9: chip version
3234 	 * bits 10..15: chip revision
3235 	 * bit 31: set for PCIe cards
3236 	 */
3237 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3238 
3239 	/*
3240 	 * We skip the MAC statistics registers because they are clear-on-read.
3241 	 * Also reading multi-register stats would need to synchronize with the
3242 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3243 	 */
3244 	memset(buf, 0, cxgb_get_regs_len());
3245 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3246 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3247 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3248 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3249 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3250 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3251 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3252 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3253 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3254 }
3255 
3256 static int
3257 alloc_filters(struct adapter *sc)
3258 {
3259 	struct filter_info *p;
3260 	unsigned int nfilters = sc->params.mc5.nfilters;
3261 
3262 	if (nfilters == 0)
3263 		return (0);
3264 
3265 	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3266 	sc->filters = p;
3267 
3268 	p = &sc->filters[nfilters - 1];
3269 	p->vlan = 0xfff;
3270 	p->vlan_prio = FILTER_NO_VLAN_PRI;
3271 	p->pass = p->rss = p->valid = p->locked = 1;
3272 
3273 	return (0);
3274 }
3275 
3276 static int
3277 setup_hw_filters(struct adapter *sc)
3278 {
3279 	int i, rc;
3280 	unsigned int nfilters = sc->params.mc5.nfilters;
3281 
3282 	if (!sc->filters)
3283 		return (0);
3284 
3285 	t3_enable_filters(sc);
3286 
3287 	for (i = rc = 0; i < nfilters && !rc; i++) {
3288 		if (sc->filters[i].locked)
3289 			rc = set_filter(sc, i, &sc->filters[i]);
3290 	}
3291 
3292 	return (rc);
3293 }
3294 
3295 static int
3296 set_filter(struct adapter *sc, int id, const struct filter_info *f)
3297 {
3298 	int len;
3299 	struct mbuf *m;
3300 	struct ulp_txpkt *txpkt;
3301 	struct work_request_hdr *wr;
3302 	struct cpl_pass_open_req *oreq;
3303 	struct cpl_set_tcb_field *sreq;
3304 
3305 	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3306 	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3307 
3308 	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3309 	      sc->params.mc5.nfilters;
3310 
3311 	m = m_gethdr(M_WAITOK, MT_DATA);
3312 	m->m_len = m->m_pkthdr.len = len;
3313 	bzero(mtod(m, char *), len);
3314 
3315 	wr = mtod(m, struct work_request_hdr *);
3316 	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3317 
3318 	oreq = (struct cpl_pass_open_req *)(wr + 1);
3319 	txpkt = (struct ulp_txpkt *)oreq;
3320 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3321 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3322 	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3323 	oreq->local_port = htons(f->dport);
3324 	oreq->peer_port = htons(f->sport);
3325 	oreq->local_ip = htonl(f->dip);
3326 	oreq->peer_ip = htonl(f->sip);
3327 	oreq->peer_netmask = htonl(f->sip_mask);
3328 	oreq->opt0h = 0;
3329 	oreq->opt0l = htonl(F_NO_OFFLOAD);
3330 	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3331 			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3332 			 V_VLAN_PRI(f->vlan_prio >> 1) |
3333 			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3334 			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3335 			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3336 
3337 	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3338 	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3339 			  (f->report_filter_id << 15) | (1 << 23) |
3340 			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3341 	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3342 	t3_mgmt_tx(sc, m);
3343 
3344 	if (f->pass && !f->rss) {
3345 		len = sizeof(*sreq);
3346 		m = m_gethdr(M_WAITOK, MT_DATA);
3347 		m->m_len = m->m_pkthdr.len = len;
3348 		bzero(mtod(m, char *), len);
3349 		sreq = mtod(m, struct cpl_set_tcb_field *);
3350 		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3351 		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3352 				 (u64)sc->rrss_map[f->qset] << 19);
3353 		t3_mgmt_tx(sc, m);
3354 	}
3355 	return 0;
3356 }
3357 
3358 static inline void
3359 mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3360     unsigned int word, u64 mask, u64 val)
3361 {
3362 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3363 	req->reply = V_NO_REPLY(1);
3364 	req->cpu_idx = 0;
3365 	req->word = htons(word);
3366 	req->mask = htobe64(mask);
3367 	req->val = htobe64(val);
3368 }
3369 
3370 static inline void
3371 set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3372     unsigned int word, u64 mask, u64 val)
3373 {
3374 	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3375 
3376 	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3377 	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3378 	mk_set_tcb_field(req, tid, word, mask, val);
3379 }
3380