xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision eb6d21b4ca6d668cf89afd99eef7baeafa712197)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_interrupts(adapter_t *);
86 static void cxgb_teardown_interrupts(adapter_t *);
87 static int cxgb_begin_op(struct port_info *, const char *);
88 static int cxgb_begin_detach(struct port_info *);
89 static int cxgb_end_op(struct port_info *);
90 static void cxgb_init(void *);
91 static int cxgb_init_synchronized(struct port_info *);
92 static int cxgb_uninit_synchronized(struct port_info *);
93 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
94 static int cxgb_media_change(struct ifnet *);
95 static int cxgb_ifm_type(int);
96 static void cxgb_build_medialist(struct port_info *);
97 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98 static int setup_sge_qsets(adapter_t *);
99 static void cxgb_async_intr(void *);
100 static void cxgb_ext_intr_handler(void *, int);
101 static void cxgb_tick_handler(void *, int);
102 static void cxgb_tick(void *);
103 static void setup_rss(adapter_t *sc);
104 
105 /* Attachment glue for the PCI controller end of the device.  Each port of
106  * the device is attached separately, as defined later.
107  */
108 static int cxgb_controller_probe(device_t);
109 static int cxgb_controller_attach(device_t);
110 static int cxgb_controller_detach(device_t);
111 static void cxgb_free(struct adapter *);
112 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
113     unsigned int end);
114 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
115 static int cxgb_get_regs_len(void);
116 static int offload_open(struct port_info *pi);
117 static void touch_bars(device_t dev);
118 static int offload_close(struct t3cdev *tdev);
119 static void cxgb_update_mac_settings(struct port_info *p);
120 
121 static device_method_t cxgb_controller_methods[] = {
122 	DEVMETHOD(device_probe,		cxgb_controller_probe),
123 	DEVMETHOD(device_attach,	cxgb_controller_attach),
124 	DEVMETHOD(device_detach,	cxgb_controller_detach),
125 
126 	/* bus interface */
127 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
128 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
129 
130 	{ 0, 0 }
131 };
132 
133 static driver_t cxgb_controller_driver = {
134 	"cxgbc",
135 	cxgb_controller_methods,
136 	sizeof(struct adapter)
137 };
138 
139 static devclass_t	cxgb_controller_devclass;
140 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
141 
142 /*
143  * Attachment glue for the ports.  Attachment is done directly to the
144  * controller device.
145  */
146 static int cxgb_port_probe(device_t);
147 static int cxgb_port_attach(device_t);
148 static int cxgb_port_detach(device_t);
149 
150 static device_method_t cxgb_port_methods[] = {
151 	DEVMETHOD(device_probe,		cxgb_port_probe),
152 	DEVMETHOD(device_attach,	cxgb_port_attach),
153 	DEVMETHOD(device_detach,	cxgb_port_detach),
154 	{ 0, 0 }
155 };
156 
157 static driver_t cxgb_port_driver = {
158 	"cxgb",
159 	cxgb_port_methods,
160 	0
161 };
162 
163 static d_ioctl_t cxgb_extension_ioctl;
164 static d_open_t cxgb_extension_open;
165 static d_close_t cxgb_extension_close;
166 
167 static struct cdevsw cxgb_cdevsw = {
168        .d_version =    D_VERSION,
169        .d_flags =      0,
170        .d_open =       cxgb_extension_open,
171        .d_close =      cxgb_extension_close,
172        .d_ioctl =      cxgb_extension_ioctl,
173        .d_name =       "cxgb",
174 };
175 
176 static devclass_t	cxgb_port_devclass;
177 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
178 
179 /*
180  * The driver uses the best interrupt scheme available on a platform in the
181  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
182  * of these schemes the driver may consider as follows:
183  *
184  * msi = 2: choose from among all three options
185  * msi = 1 : only consider MSI and pin interrupts
186  * msi = 0: force pin interrupts
187  */
188 static int msi_allowed = 2;
189 
190 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
191 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
192 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
193     "MSI-X, MSI, INTx selector");
194 
195 /*
196  * The driver enables offload as a default.
197  * To disable it, use ofld_disable = 1.
198  */
199 static int ofld_disable = 0;
200 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
201 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
202     "disable ULP offload");
203 
204 /*
205  * The driver uses an auto-queue algorithm by default.
206  * To disable it and force a single queue-set per port, use multiq = 0
207  */
208 static int multiq = 1;
209 TUNABLE_INT("hw.cxgb.multiq", &multiq);
210 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
211     "use min(ncpus/ports, 8) queue-sets per port");
212 
213 /*
214  * By default the driver will not update the firmware unless
215  * it was compiled against a newer version
216  *
217  */
218 static int force_fw_update = 0;
219 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
220 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
221     "update firmware even if up to date");
222 
223 int cxgb_use_16k_clusters = 1;
224 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
225 SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
226     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
227 
228 /*
229  * Tune the size of the output queue.
230  */
231 int cxgb_snd_queue_len = IFQ_MAXLEN;
232 TUNABLE_INT("hw.cxgb.snd_queue_len", &cxgb_snd_queue_len);
233 SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_queue_len, CTLFLAG_RDTUN,
234     &cxgb_snd_queue_len, 0, "send queue size ");
235 
236 
237 enum {
238 	MAX_TXQ_ENTRIES      = 16384,
239 	MAX_CTRL_TXQ_ENTRIES = 1024,
240 	MAX_RSPQ_ENTRIES     = 16384,
241 	MAX_RX_BUFFERS       = 16384,
242 	MAX_RX_JUMBO_BUFFERS = 16384,
243 	MIN_TXQ_ENTRIES      = 4,
244 	MIN_CTRL_TXQ_ENTRIES = 4,
245 	MIN_RSPQ_ENTRIES     = 32,
246 	MIN_FL_ENTRIES       = 32,
247 	MIN_FL_JUMBO_ENTRIES = 32
248 };
249 
250 struct filter_info {
251 	u32 sip;
252 	u32 sip_mask;
253 	u32 dip;
254 	u16 sport;
255 	u16 dport;
256 	u32 vlan:12;
257 	u32 vlan_prio:3;
258 	u32 mac_hit:1;
259 	u32 mac_idx:4;
260 	u32 mac_vld:1;
261 	u32 pkt_type:2;
262 	u32 report_filter_id:1;
263 	u32 pass:1;
264 	u32 rss:1;
265 	u32 qset:3;
266 	u32 locked:1;
267 	u32 valid:1;
268 };
269 
270 enum { FILTER_NO_VLAN_PRI = 7 };
271 
272 #define EEPROM_MAGIC 0x38E2F10C
273 
274 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
275 
276 /* Table for probing the cards.  The desc field isn't actually used */
277 struct cxgb_ident {
278 	uint16_t	vendor;
279 	uint16_t	device;
280 	int		index;
281 	char		*desc;
282 } cxgb_identifiers[] = {
283 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
286 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
288 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
289 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
290 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
291 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
292 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
293 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
294 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
295 	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
296 	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
297 	{0, 0, 0, NULL}
298 };
299 
300 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
301 
302 
303 static __inline char
304 t3rev2char(struct adapter *adapter)
305 {
306 	char rev = 'z';
307 
308 	switch(adapter->params.rev) {
309 	case T3_REV_A:
310 		rev = 'a';
311 		break;
312 	case T3_REV_B:
313 	case T3_REV_B2:
314 		rev = 'b';
315 		break;
316 	case T3_REV_C:
317 		rev = 'c';
318 		break;
319 	}
320 	return rev;
321 }
322 
323 static struct cxgb_ident *
324 cxgb_get_ident(device_t dev)
325 {
326 	struct cxgb_ident *id;
327 
328 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
329 		if ((id->vendor == pci_get_vendor(dev)) &&
330 		    (id->device == pci_get_device(dev))) {
331 			return (id);
332 		}
333 	}
334 	return (NULL);
335 }
336 
337 static const struct adapter_info *
338 cxgb_get_adapter_info(device_t dev)
339 {
340 	struct cxgb_ident *id;
341 	const struct adapter_info *ai;
342 
343 	id = cxgb_get_ident(dev);
344 	if (id == NULL)
345 		return (NULL);
346 
347 	ai = t3_get_adapter_info(id->index);
348 
349 	return (ai);
350 }
351 
352 static int
353 cxgb_controller_probe(device_t dev)
354 {
355 	const struct adapter_info *ai;
356 	char *ports, buf[80];
357 	int nports;
358 
359 	ai = cxgb_get_adapter_info(dev);
360 	if (ai == NULL)
361 		return (ENXIO);
362 
363 	nports = ai->nports0 + ai->nports1;
364 	if (nports == 1)
365 		ports = "port";
366 	else
367 		ports = "ports";
368 
369 	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
370 	device_set_desc_copy(dev, buf);
371 	return (BUS_PROBE_DEFAULT);
372 }
373 
374 #define FW_FNAME "cxgb_t3fw"
375 #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
376 #define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
377 
378 static int
379 upgrade_fw(adapter_t *sc)
380 {
381 #ifdef FIRMWARE_LATEST
382 	const struct firmware *fw;
383 #else
384 	struct firmware *fw;
385 #endif
386 	int status;
387 
388 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
389 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
390 		return (ENOENT);
391 	} else
392 		device_printf(sc->dev, "updating firmware on card\n");
393 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
394 
395 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
396 
397 	firmware_put(fw, FIRMWARE_UNLOAD);
398 
399 	return (status);
400 }
401 
402 /*
403  * The cxgb_controller_attach function is responsible for the initial
404  * bringup of the device.  Its responsibilities include:
405  *
406  *  1. Determine if the device supports MSI or MSI-X.
407  *  2. Allocate bus resources so that we can access the Base Address Register
408  *  3. Create and initialize mutexes for the controller and its control
409  *     logic such as SGE and MDIO.
410  *  4. Call hardware specific setup routine for the adapter as a whole.
411  *  5. Allocate the BAR for doing MSI-X.
412  *  6. Setup the line interrupt iff MSI-X is not supported.
413  *  7. Create the driver's taskq.
414  *  8. Start one task queue service thread.
415  *  9. Check if the firmware and SRAM are up-to-date.  They will be
416  *     auto-updated later (before FULL_INIT_DONE), if required.
417  * 10. Create a child device for each MAC (port)
418  * 11. Initialize T3 private state.
419  * 12. Trigger the LED
420  * 13. Setup offload iff supported.
421  * 14. Reset/restart the tick callout.
422  * 15. Attach sysctls
423  *
424  * NOTE: Any modification or deviation from this list MUST be reflected in
425  * the above comment.  Failure to do so will result in problems on various
426  * error conditions including link flapping.
427  */
428 static int
429 cxgb_controller_attach(device_t dev)
430 {
431 	device_t child;
432 	const struct adapter_info *ai;
433 	struct adapter *sc;
434 	int i, error = 0;
435 	uint32_t vers;
436 	int port_qsets = 1;
437 #ifdef MSI_SUPPORTED
438 	int msi_needed, reg;
439 #endif
440 	char buf[80];
441 
442 	sc = device_get_softc(dev);
443 	sc->dev = dev;
444 	sc->msi_count = 0;
445 	ai = cxgb_get_adapter_info(dev);
446 
447 	/*
448 	 * XXX not really related but a recent addition
449 	 */
450 #ifdef MSI_SUPPORTED
451 	/* find the PCIe link width and set max read request to 4KB*/
452 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
453 		uint16_t lnk, pectl;
454 		lnk = pci_read_config(dev, reg + 0x12, 2);
455 		sc->link_width = (lnk >> 4) & 0x3f;
456 
457 		pectl = pci_read_config(dev, reg + 0x8, 2);
458 		pectl = (pectl & ~0x7000) | (5 << 12);
459 		pci_write_config(dev, reg + 0x8, pectl, 2);
460 	}
461 
462 	if (sc->link_width != 0 && sc->link_width <= 4 &&
463 	    (ai->nports0 + ai->nports1) <= 2) {
464 		device_printf(sc->dev,
465 		    "PCIe x%d Link, expect reduced performance\n",
466 		    sc->link_width);
467 	}
468 #endif
469 	touch_bars(dev);
470 	pci_enable_busmaster(dev);
471 	/*
472 	 * Allocate the registers and make them available to the driver.
473 	 * The registers that we care about for NIC mode are in BAR 0
474 	 */
475 	sc->regs_rid = PCIR_BAR(0);
476 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
477 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
478 		device_printf(dev, "Cannot allocate BAR region 0\n");
479 		return (ENXIO);
480 	}
481 	sc->udbs_rid = PCIR_BAR(2);
482 	sc->udbs_res = NULL;
483 	if (is_offload(sc) &&
484 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
485 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
486 		device_printf(dev, "Cannot allocate BAR region 1\n");
487 		error = ENXIO;
488 		goto out;
489 	}
490 
491 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
492 	    device_get_unit(dev));
493 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
494 
495 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
496 	    device_get_unit(dev));
497 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
498 	    device_get_unit(dev));
499 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
500 	    device_get_unit(dev));
501 
502 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
503 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
504 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
505 
506 	sc->bt = rman_get_bustag(sc->regs_res);
507 	sc->bh = rman_get_bushandle(sc->regs_res);
508 	sc->mmio_len = rman_get_size(sc->regs_res);
509 
510 	for (i = 0; i < MAX_NPORTS; i++)
511 		sc->port[i].adapter = sc;
512 
513 	if (t3_prep_adapter(sc, ai, 1) < 0) {
514 		printf("prep adapter failed\n");
515 		error = ENODEV;
516 		goto out;
517 	}
518         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
519 	 * enough messages for the queue sets.  If that fails, try falling
520 	 * back to MSI.  If that fails, then try falling back to the legacy
521 	 * interrupt pin model.
522 	 */
523 #ifdef MSI_SUPPORTED
524 
525 	sc->msix_regs_rid = 0x20;
526 	if ((msi_allowed >= 2) &&
527 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
528 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
529 
530 		if (multiq)
531 			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
532 		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
533 
534 		if (pci_msix_count(dev) == 0 ||
535 		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
536 		    sc->msi_count != msi_needed) {
537 			device_printf(dev, "alloc msix failed - "
538 				      "msi_count=%d, msi_needed=%d, err=%d; "
539 				      "will try MSI\n", sc->msi_count,
540 				      msi_needed, error);
541 			sc->msi_count = 0;
542 			port_qsets = 1;
543 			pci_release_msi(dev);
544 			bus_release_resource(dev, SYS_RES_MEMORY,
545 			    sc->msix_regs_rid, sc->msix_regs_res);
546 			sc->msix_regs_res = NULL;
547 		} else {
548 			sc->flags |= USING_MSIX;
549 			sc->cxgb_intr = cxgb_async_intr;
550 			device_printf(dev,
551 				      "using MSI-X interrupts (%u vectors)\n",
552 				      sc->msi_count);
553 		}
554 	}
555 
556 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
557 		sc->msi_count = 1;
558 		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
559 			device_printf(dev, "alloc msi failed - "
560 				      "err=%d; will try INTx\n", error);
561 			sc->msi_count = 0;
562 			port_qsets = 1;
563 			pci_release_msi(dev);
564 		} else {
565 			sc->flags |= USING_MSI;
566 			sc->cxgb_intr = t3_intr_msi;
567 			device_printf(dev, "using MSI interrupts\n");
568 		}
569 	}
570 #endif
571 	if (sc->msi_count == 0) {
572 		device_printf(dev, "using line interrupts\n");
573 		sc->cxgb_intr = t3b_intr;
574 	}
575 
576 	/* Create a private taskqueue thread for handling driver events */
577 #ifdef TASKQUEUE_CURRENT
578 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
579 	    taskqueue_thread_enqueue, &sc->tq);
580 #else
581 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
582 	    taskqueue_thread_enqueue, &sc->tq);
583 #endif
584 	if (sc->tq == NULL) {
585 		device_printf(dev, "failed to allocate controller task queue\n");
586 		goto out;
587 	}
588 
589 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
590 	    device_get_nameunit(dev));
591 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
592 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
593 
594 
595 	/* Create a periodic callout for checking adapter status */
596 	callout_init(&sc->cxgb_tick_ch, TRUE);
597 
598 	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
599 		/*
600 		 * Warn user that a firmware update will be attempted in init.
601 		 */
602 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
603 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
604 		sc->flags &= ~FW_UPTODATE;
605 	} else {
606 		sc->flags |= FW_UPTODATE;
607 	}
608 
609 	if (t3_check_tpsram_version(sc) < 0) {
610 		/*
611 		 * Warn user that a firmware update will be attempted in init.
612 		 */
613 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
614 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
615 		sc->flags &= ~TPS_UPTODATE;
616 	} else {
617 		sc->flags |= TPS_UPTODATE;
618 	}
619 
620 	/*
621 	 * Create a child device for each MAC.  The ethernet attachment
622 	 * will be done in these children.
623 	 */
624 	for (i = 0; i < (sc)->params.nports; i++) {
625 		struct port_info *pi;
626 
627 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
628 			device_printf(dev, "failed to add child port\n");
629 			error = EINVAL;
630 			goto out;
631 		}
632 		pi = &sc->port[i];
633 		pi->adapter = sc;
634 		pi->nqsets = port_qsets;
635 		pi->first_qset = i*port_qsets;
636 		pi->port_id = i;
637 		pi->tx_chan = i >= ai->nports0;
638 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
639 		sc->rxpkt_map[pi->txpkt_intf] = i;
640 		sc->port[i].tx_chan = i >= ai->nports0;
641 		sc->portdev[i] = child;
642 		device_set_softc(child, pi);
643 	}
644 	if ((error = bus_generic_attach(dev)) != 0)
645 		goto out;
646 
647 	/* initialize sge private state */
648 	t3_sge_init_adapter(sc);
649 
650 	t3_led_ready(sc);
651 
652 	cxgb_offload_init();
653 	if (is_offload(sc)) {
654 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
655 		cxgb_adapter_ofld(sc);
656         }
657 	error = t3_get_fw_version(sc, &vers);
658 	if (error)
659 		goto out;
660 
661 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
662 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
663 	    G_FW_VERSION_MICRO(vers));
664 
665 	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
666 		 ai->desc, is_offload(sc) ? "R" : "",
667 		 sc->params.vpd.ec, sc->params.vpd.sn);
668 	device_set_desc_copy(dev, buf);
669 
670 	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
671 		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
672 		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
673 
674 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
675 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
676 	t3_add_attach_sysctls(sc);
677 out:
678 	if (error)
679 		cxgb_free(sc);
680 
681 	return (error);
682 }
683 
684 /*
685  * The cxgb_controller_detach routine is called with the device is
686  * unloaded from the system.
687  */
688 
689 static int
690 cxgb_controller_detach(device_t dev)
691 {
692 	struct adapter *sc;
693 
694 	sc = device_get_softc(dev);
695 
696 	cxgb_free(sc);
697 
698 	return (0);
699 }
700 
701 /*
702  * The cxgb_free() is called by the cxgb_controller_detach() routine
703  * to tear down the structures that were built up in
704  * cxgb_controller_attach(), and should be the final piece of work
705  * done when fully unloading the driver.
706  *
707  *
708  *  1. Shutting down the threads started by the cxgb_controller_attach()
709  *     routine.
710  *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
711  *  3. Detaching all of the port devices created during the
712  *     cxgb_controller_attach() routine.
713  *  4. Removing the device children created via cxgb_controller_attach().
714  *  5. Releasing PCI resources associated with the device.
715  *  6. Turning off the offload support, iff it was turned on.
716  *  7. Destroying the mutexes created in cxgb_controller_attach().
717  *
718  */
719 static void
720 cxgb_free(struct adapter *sc)
721 {
722 	int i;
723 
724 	ADAPTER_LOCK(sc);
725 	sc->flags |= CXGB_SHUTDOWN;
726 	ADAPTER_UNLOCK(sc);
727 
728 	/*
729 	 * Make sure all child devices are gone.
730 	 */
731 	bus_generic_detach(sc->dev);
732 	for (i = 0; i < (sc)->params.nports; i++) {
733 		if (sc->portdev[i] &&
734 		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
735 			device_printf(sc->dev, "failed to delete child port\n");
736 	}
737 
738 	/*
739 	 * At this point, it is as if cxgb_port_detach has run on all ports, and
740 	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
741 	 * all open devices have been closed.
742 	 */
743 	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
744 					   __func__, sc->open_device_map));
745 	for (i = 0; i < sc->params.nports; i++) {
746 		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
747 						  __func__, i));
748 	}
749 
750 	/*
751 	 * Finish off the adapter's callouts.
752 	 */
753 	callout_drain(&sc->cxgb_tick_ch);
754 	callout_drain(&sc->sge_timer_ch);
755 
756 	/*
757 	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
758 	 * sysctls are cleaned up by the kernel linker.
759 	 */
760 	if (sc->flags & FULL_INIT_DONE) {
761  		t3_free_sge_resources(sc);
762  		sc->flags &= ~FULL_INIT_DONE;
763  	}
764 
765 	/*
766 	 * Release all interrupt resources.
767 	 */
768 	cxgb_teardown_interrupts(sc);
769 #ifdef MSI_SUPPORTED
770 	if (sc->flags & (USING_MSI | USING_MSIX)) {
771 		device_printf(sc->dev, "releasing msi message(s)\n");
772 		pci_release_msi(sc->dev);
773 	} else {
774 		device_printf(sc->dev, "no msi message to release\n");
775 	}
776 
777 	if (sc->msix_regs_res != NULL) {
778 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
779 		    sc->msix_regs_res);
780 	}
781 #endif
782 
783 	/*
784 	 * Free the adapter's taskqueue.
785 	 */
786 	if (sc->tq != NULL) {
787 		taskqueue_free(sc->tq);
788 		sc->tq = NULL;
789 	}
790 
791 	if (is_offload(sc)) {
792 		clrbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
793 		cxgb_adapter_unofld(sc);
794 	}
795 
796 #ifdef notyet
797 	if (sc->flags & CXGB_OFLD_INIT)
798 		cxgb_offload_deactivate(sc);
799 #endif
800 	free(sc->filters, M_DEVBUF);
801 	t3_sge_free(sc);
802 
803 	cxgb_offload_exit();
804 
805 	if (sc->udbs_res != NULL)
806 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
807 		    sc->udbs_res);
808 
809 	if (sc->regs_res != NULL)
810 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
811 		    sc->regs_res);
812 
813 	MTX_DESTROY(&sc->mdio_lock);
814 	MTX_DESTROY(&sc->sge.reg_lock);
815 	MTX_DESTROY(&sc->elmer_lock);
816 	ADAPTER_LOCK_DEINIT(sc);
817 }
818 
819 /**
820  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
821  *	@sc: the controller softc
822  *
823  *	Determines how many sets of SGE queues to use and initializes them.
824  *	We support multiple queue sets per port if we have MSI-X, otherwise
825  *	just one queue set per port.
826  */
827 static int
828 setup_sge_qsets(adapter_t *sc)
829 {
830 	int i, j, err, irq_idx = 0, qset_idx = 0;
831 	u_int ntxq = SGE_TXQ_PER_SET;
832 
833 	if ((err = t3_sge_alloc(sc)) != 0) {
834 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
835 		return (err);
836 	}
837 
838 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
839 		irq_idx = -1;
840 
841 	for (i = 0; i < (sc)->params.nports; i++) {
842 		struct port_info *pi = &sc->port[i];
843 
844 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
845 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
846 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
847 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
848 			if (err) {
849 				t3_free_sge_resources(sc);
850 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
851 				    err);
852 				return (err);
853 			}
854 		}
855 	}
856 
857 	return (0);
858 }
859 
860 static void
861 cxgb_teardown_interrupts(adapter_t *sc)
862 {
863 	int i;
864 
865 	for (i = 0; i < SGE_QSETS; i++) {
866 		if (sc->msix_intr_tag[i] == NULL) {
867 
868 			/* Should have been setup fully or not at all */
869 			KASSERT(sc->msix_irq_res[i] == NULL &&
870 				sc->msix_irq_rid[i] == 0,
871 				("%s: half-done interrupt (%d).", __func__, i));
872 
873 			continue;
874 		}
875 
876 		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
877 				  sc->msix_intr_tag[i]);
878 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
879 				     sc->msix_irq_res[i]);
880 
881 		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
882 		sc->msix_irq_rid[i] = 0;
883 	}
884 
885 	if (sc->intr_tag) {
886 		KASSERT(sc->irq_res != NULL,
887 			("%s: half-done interrupt.", __func__));
888 
889 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
890 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
891 				     sc->irq_res);
892 
893 		sc->irq_res = sc->intr_tag = NULL;
894 		sc->irq_rid = 0;
895 	}
896 }
897 
898 static int
899 cxgb_setup_interrupts(adapter_t *sc)
900 {
901 	struct resource *res;
902 	void *tag;
903 	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
904 
905 	sc->irq_rid = intr_flag ? 1 : 0;
906 	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
907 					     RF_SHAREABLE | RF_ACTIVE);
908 	if (sc->irq_res == NULL) {
909 		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
910 			      intr_flag, sc->irq_rid);
911 		err = EINVAL;
912 		sc->irq_rid = 0;
913 	} else {
914 		err = bus_setup_intr(sc->dev, sc->irq_res,
915 				     INTR_MPSAFE | INTR_TYPE_NET,
916 #ifdef INTR_FILTERS
917 				     NULL,
918 #endif
919 				     sc->cxgb_intr, sc, &sc->intr_tag);
920 
921 		if (err) {
922 			device_printf(sc->dev,
923 				      "Cannot set up interrupt (%x, %u, %d)\n",
924 				      intr_flag, sc->irq_rid, err);
925 			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
926 					     sc->irq_res);
927 			sc->irq_res = sc->intr_tag = NULL;
928 			sc->irq_rid = 0;
929 		}
930 	}
931 
932 	/* That's all for INTx or MSI */
933 	if (!(intr_flag & USING_MSIX) || err)
934 		return (err);
935 
936 	for (i = 0; i < sc->msi_count - 1; i++) {
937 		rid = i + 2;
938 		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
939 					     RF_SHAREABLE | RF_ACTIVE);
940 		if (res == NULL) {
941 			device_printf(sc->dev, "Cannot allocate interrupt "
942 				      "for message %d\n", rid);
943 			err = EINVAL;
944 			break;
945 		}
946 
947 		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
948 #ifdef INTR_FILTERS
949 				     NULL,
950 #endif
951 				     t3_intr_msix, &sc->sge.qs[i], &tag);
952 		if (err) {
953 			device_printf(sc->dev, "Cannot set up interrupt "
954 				      "for message %d (%d)\n", rid, err);
955 			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
956 			break;
957 		}
958 
959 		sc->msix_irq_rid[i] = rid;
960 		sc->msix_irq_res[i] = res;
961 		sc->msix_intr_tag[i] = tag;
962 	}
963 
964 	if (err)
965 		cxgb_teardown_interrupts(sc);
966 
967 	return (err);
968 }
969 
970 
971 static int
972 cxgb_port_probe(device_t dev)
973 {
974 	struct port_info *p;
975 	char buf[80];
976 	const char *desc;
977 
978 	p = device_get_softc(dev);
979 	desc = p->phy.desc;
980 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
981 	device_set_desc_copy(dev, buf);
982 	return (0);
983 }
984 
985 
986 static int
987 cxgb_makedev(struct port_info *pi)
988 {
989 
990 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
991 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
992 
993 	if (pi->port_cdev == NULL)
994 		return (ENOMEM);
995 
996 	pi->port_cdev->si_drv1 = (void *)pi;
997 
998 	return (0);
999 }
1000 
1001 #ifndef LRO_SUPPORTED
1002 #ifdef IFCAP_LRO
1003 #undef IFCAP_LRO
1004 #endif
1005 #define IFCAP_LRO 0x0
1006 #endif
1007 
1008 #ifdef TSO_SUPPORTED
1009 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
1010 /* Don't enable TSO6 yet */
1011 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
1012 #else
1013 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
1014 /* Don't enable TSO6 yet */
1015 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
1016 #define IFCAP_TSO4 0x0
1017 #define IFCAP_TSO6 0x0
1018 #define CSUM_TSO   0x0
1019 #endif
1020 
1021 
1022 static int
1023 cxgb_port_attach(device_t dev)
1024 {
1025 	struct port_info *p;
1026 	struct ifnet *ifp;
1027 	int err;
1028 	struct adapter *sc;
1029 
1030 
1031 	p = device_get_softc(dev);
1032 	sc = p->adapter;
1033 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1034 	    device_get_unit(device_get_parent(dev)), p->port_id);
1035 	PORT_LOCK_INIT(p, p->lockbuf);
1036 
1037 	/* Allocate an ifnet object and set it up */
1038 	ifp = p->ifp = if_alloc(IFT_ETHER);
1039 	if (ifp == NULL) {
1040 		device_printf(dev, "Cannot allocate ifnet\n");
1041 		return (ENOMEM);
1042 	}
1043 
1044 	/*
1045 	 * Note that there is currently no watchdog timer.
1046 	 */
1047 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1048 	ifp->if_init = cxgb_init;
1049 	ifp->if_softc = p;
1050 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1051 	ifp->if_ioctl = cxgb_ioctl;
1052 	ifp->if_start = cxgb_start;
1053 
1054 	ifp->if_snd.ifq_drv_maxlen = cxgb_snd_queue_len;
1055 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
1056 	IFQ_SET_READY(&ifp->if_snd);
1057 
1058 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
1059 	ifp->if_capabilities |= CXGB_CAP;
1060 	ifp->if_capenable |= CXGB_CAP_ENABLE;
1061 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
1062 	/*
1063 	 * disable TSO on 4-port - it isn't supported by the firmware yet
1064 	 */
1065 	if (p->adapter->params.nports > 2) {
1066 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1067 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
1068 		ifp->if_hwassist &= ~CSUM_TSO;
1069 	}
1070 
1071 	ether_ifattach(ifp, p->hw_addr);
1072 	ifp->if_transmit = cxgb_transmit;
1073 	ifp->if_qflush = cxgb_qflush;
1074 
1075 	/*
1076 	 * Only default to jumbo frames on 10GigE
1077 	 */
1078 	if (p->adapter->params.nports <= 2)
1079 		ifp->if_mtu = ETHERMTU_JUMBO;
1080 	if ((err = cxgb_makedev(p)) != 0) {
1081 		printf("makedev failed %d\n", err);
1082 		return (err);
1083 	}
1084 
1085 	/* Create a list of media supported by this port */
1086 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1087 	    cxgb_media_status);
1088 	cxgb_build_medialist(p);
1089 
1090 	t3_sge_init_port(p);
1091 
1092 	return (err);
1093 }
1094 
1095 /*
1096  * cxgb_port_detach() is called via the device_detach methods when
1097  * cxgb_free() calls the bus_generic_detach.  It is responsible for
1098  * removing the device from the view of the kernel, i.e. from all
1099  * interfaces lists etc.  This routine is only called when the driver is
1100  * being unloaded, not when the link goes down.
1101  */
1102 static int
1103 cxgb_port_detach(device_t dev)
1104 {
1105 	struct port_info *p;
1106 	struct adapter *sc;
1107 	int i;
1108 
1109 	p = device_get_softc(dev);
1110 	sc = p->adapter;
1111 
1112 	cxgb_begin_detach(p);
1113 
1114 	if (p->port_cdev != NULL)
1115 		destroy_dev(p->port_cdev);
1116 
1117 	cxgb_uninit_synchronized(p);
1118 	ether_ifdetach(p->ifp);
1119 
1120 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1121 		struct sge_qset *qs = &sc->sge.qs[i];
1122 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1123 
1124 		callout_drain(&txq->txq_watchdog);
1125 		callout_drain(&txq->txq_timer);
1126 	}
1127 
1128 	PORT_LOCK_DEINIT(p);
1129 	if_free(p->ifp);
1130 	p->ifp = NULL;
1131 
1132 	cxgb_end_op(p);
1133 	return (0);
1134 }
1135 
1136 void
1137 t3_fatal_err(struct adapter *sc)
1138 {
1139 	u_int fw_status[4];
1140 
1141 	if (sc->flags & FULL_INIT_DONE) {
1142 		t3_sge_stop(sc);
1143 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1144 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1145 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1146 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1147 		t3_intr_disable(sc);
1148 	}
1149 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1150 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1151 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1152 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1153 }
1154 
1155 int
1156 t3_os_find_pci_capability(adapter_t *sc, int cap)
1157 {
1158 	device_t dev;
1159 	struct pci_devinfo *dinfo;
1160 	pcicfgregs *cfg;
1161 	uint32_t status;
1162 	uint8_t ptr;
1163 
1164 	dev = sc->dev;
1165 	dinfo = device_get_ivars(dev);
1166 	cfg = &dinfo->cfg;
1167 
1168 	status = pci_read_config(dev, PCIR_STATUS, 2);
1169 	if (!(status & PCIM_STATUS_CAPPRESENT))
1170 		return (0);
1171 
1172 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1173 	case 0:
1174 	case 1:
1175 		ptr = PCIR_CAP_PTR;
1176 		break;
1177 	case 2:
1178 		ptr = PCIR_CAP_PTR_2;
1179 		break;
1180 	default:
1181 		return (0);
1182 		break;
1183 	}
1184 	ptr = pci_read_config(dev, ptr, 1);
1185 
1186 	while (ptr != 0) {
1187 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1188 			return (ptr);
1189 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1190 	}
1191 
1192 	return (0);
1193 }
1194 
1195 int
1196 t3_os_pci_save_state(struct adapter *sc)
1197 {
1198 	device_t dev;
1199 	struct pci_devinfo *dinfo;
1200 
1201 	dev = sc->dev;
1202 	dinfo = device_get_ivars(dev);
1203 
1204 	pci_cfg_save(dev, dinfo, 0);
1205 	return (0);
1206 }
1207 
1208 int
1209 t3_os_pci_restore_state(struct adapter *sc)
1210 {
1211 	device_t dev;
1212 	struct pci_devinfo *dinfo;
1213 
1214 	dev = sc->dev;
1215 	dinfo = device_get_ivars(dev);
1216 
1217 	pci_cfg_restore(dev, dinfo);
1218 	return (0);
1219 }
1220 
1221 /**
1222  *	t3_os_link_changed - handle link status changes
1223  *	@sc: the adapter associated with the link change
1224  *	@port_id: the port index whose link status has changed
1225  *	@link_status: the new status of the link
1226  *	@speed: the new speed setting
1227  *	@duplex: the new duplex setting
1228  *	@fc: the new flow-control setting
1229  *
1230  *	This is the OS-dependent handler for link status changes.  The OS
1231  *	neutral handler takes care of most of the processing for these events,
1232  *	then calls this handler for any OS-specific processing.
1233  */
1234 void
1235 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1236      int duplex, int fc, int mac_was_reset)
1237 {
1238 	struct port_info *pi = &adapter->port[port_id];
1239 	struct ifnet *ifp = pi->ifp;
1240 
1241 	/* no race with detach, so ifp should always be good */
1242 	KASSERT(ifp, ("%s: if detached.", __func__));
1243 
1244 	/* Reapply mac settings if they were lost due to a reset */
1245 	if (mac_was_reset) {
1246 		PORT_LOCK(pi);
1247 		cxgb_update_mac_settings(pi);
1248 		PORT_UNLOCK(pi);
1249 	}
1250 
1251 	if (link_status) {
1252 		ifp->if_baudrate = IF_Mbps(speed);
1253 		if_link_state_change(ifp, LINK_STATE_UP);
1254 	} else
1255 		if_link_state_change(ifp, LINK_STATE_DOWN);
1256 }
1257 
1258 /**
1259  *	t3_os_phymod_changed - handle PHY module changes
1260  *	@phy: the PHY reporting the module change
1261  *	@mod_type: new module type
1262  *
1263  *	This is the OS-dependent handler for PHY module changes.  It is
1264  *	invoked when a PHY module is removed or inserted for any OS-specific
1265  *	processing.
1266  */
1267 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1268 {
1269 	static const char *mod_str[] = {
1270 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1271 	};
1272 	struct port_info *pi = &adap->port[port_id];
1273 	int mod = pi->phy.modtype;
1274 
1275 	if (mod != pi->media.ifm_cur->ifm_data)
1276 		cxgb_build_medialist(pi);
1277 
1278 	if (mod == phy_modtype_none)
1279 		if_printf(pi->ifp, "PHY module unplugged\n");
1280 	else {
1281 		KASSERT(mod < ARRAY_SIZE(mod_str),
1282 			("invalid PHY module type %d", mod));
1283 		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1284 	}
1285 }
1286 
1287 /*
1288  * Interrupt-context handler for external (PHY) interrupts.
1289  */
1290 void
1291 t3_os_ext_intr_handler(adapter_t *sc)
1292 {
1293 	if (cxgb_debug)
1294 		printf("t3_os_ext_intr_handler\n");
1295 	/*
1296 	 * Schedule a task to handle external interrupts as they may be slow
1297 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1298 	 * interrupts in the meantime and let the task reenable them when
1299 	 * it's done.
1300 	 */
1301 	if (sc->slow_intr_mask) {
1302 		ADAPTER_LOCK(sc);
1303 		sc->slow_intr_mask &= ~F_T3DBG;
1304 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1305 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1306 		ADAPTER_UNLOCK(sc);
1307 	}
1308 }
1309 
1310 void
1311 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1312 {
1313 
1314 	/*
1315 	 * The ifnet might not be allocated before this gets called,
1316 	 * as this is called early on in attach by t3_prep_adapter
1317 	 * save the address off in the port structure
1318 	 */
1319 	if (cxgb_debug)
1320 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1321 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1322 }
1323 
1324 /*
1325  * Programs the XGMAC based on the settings in the ifnet.  These settings
1326  * include MTU, MAC address, mcast addresses, etc.
1327  */
1328 static void
1329 cxgb_update_mac_settings(struct port_info *p)
1330 {
1331 	struct ifnet *ifp = p->ifp;
1332 	struct t3_rx_mode rm;
1333 	struct cmac *mac = &p->mac;
1334 	int mtu, hwtagging;
1335 
1336 	PORT_LOCK_ASSERT_OWNED(p);
1337 
1338 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1339 
1340 	mtu = ifp->if_mtu;
1341 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1342 		mtu += ETHER_VLAN_ENCAP_LEN;
1343 
1344 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1345 
1346 	t3_mac_set_mtu(mac, mtu);
1347 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1348 	t3_mac_set_address(mac, 0, p->hw_addr);
1349 	t3_init_rx_mode(&rm, p);
1350 	t3_mac_set_rx_mode(mac, &rm);
1351 }
1352 
1353 
1354 static int
1355 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1356 			      unsigned long n)
1357 {
1358 	int attempts = 5;
1359 
1360 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1361 		if (!--attempts)
1362 			return (ETIMEDOUT);
1363 		t3_os_sleep(10);
1364 	}
1365 	return 0;
1366 }
1367 
1368 static int
1369 init_tp_parity(struct adapter *adap)
1370 {
1371 	int i;
1372 	struct mbuf *m;
1373 	struct cpl_set_tcb_field *greq;
1374 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1375 
1376 	t3_tp_set_offload_mode(adap, 1);
1377 
1378 	for (i = 0; i < 16; i++) {
1379 		struct cpl_smt_write_req *req;
1380 
1381 		m = m_gethdr(M_WAITOK, MT_DATA);
1382 		req = mtod(m, struct cpl_smt_write_req *);
1383 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1384 		memset(req, 0, sizeof(*req));
1385 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1386 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1387 		req->iff = i;
1388 		t3_mgmt_tx(adap, m);
1389 	}
1390 
1391 	for (i = 0; i < 2048; i++) {
1392 		struct cpl_l2t_write_req *req;
1393 
1394 		m = m_gethdr(M_WAITOK, MT_DATA);
1395 		req = mtod(m, struct cpl_l2t_write_req *);
1396 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1397 		memset(req, 0, sizeof(*req));
1398 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1399 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1400 		req->params = htonl(V_L2T_W_IDX(i));
1401 		t3_mgmt_tx(adap, m);
1402 	}
1403 
1404 	for (i = 0; i < 2048; i++) {
1405 		struct cpl_rte_write_req *req;
1406 
1407 		m = m_gethdr(M_WAITOK, MT_DATA);
1408 		req = mtod(m, struct cpl_rte_write_req *);
1409 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1410 		memset(req, 0, sizeof(*req));
1411 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1412 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1413 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1414 		t3_mgmt_tx(adap, m);
1415 	}
1416 
1417 	m = m_gethdr(M_WAITOK, MT_DATA);
1418 	greq = mtod(m, struct cpl_set_tcb_field *);
1419 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1420 	memset(greq, 0, sizeof(*greq));
1421 	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1422 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1423 	greq->mask = htobe64(1);
1424 	t3_mgmt_tx(adap, m);
1425 
1426 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1427 	t3_tp_set_offload_mode(adap, 0);
1428 	return (i);
1429 }
1430 
1431 /**
1432  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1433  *	@adap: the adapter
1434  *
1435  *	Sets up RSS to distribute packets to multiple receive queues.  We
1436  *	configure the RSS CPU lookup table to distribute to the number of HW
1437  *	receive queues, and the response queue lookup table to narrow that
1438  *	down to the response queues actually configured for each port.
1439  *	We always configure the RSS mapping for two ports since the mapping
1440  *	table has plenty of entries.
1441  */
1442 static void
1443 setup_rss(adapter_t *adap)
1444 {
1445 	int i;
1446 	u_int nq[2];
1447 	uint8_t cpus[SGE_QSETS + 1];
1448 	uint16_t rspq_map[RSS_TABLE_SIZE];
1449 
1450 	for (i = 0; i < SGE_QSETS; ++i)
1451 		cpus[i] = i;
1452 	cpus[SGE_QSETS] = 0xff;
1453 
1454 	nq[0] = nq[1] = 0;
1455 	for_each_port(adap, i) {
1456 		const struct port_info *pi = adap2pinfo(adap, i);
1457 
1458 		nq[pi->tx_chan] += pi->nqsets;
1459 	}
1460 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1461 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1462 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1463 	}
1464 
1465 	/* Calculate the reverse RSS map table */
1466 	for (i = 0; i < SGE_QSETS; ++i)
1467 		adap->rrss_map[i] = 0xff;
1468 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1469 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1470 			adap->rrss_map[rspq_map[i]] = i;
1471 
1472 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1473 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1474 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1475 	              cpus, rspq_map);
1476 
1477 }
1478 
1479 /*
1480  * Sends an mbuf to an offload queue driver
1481  * after dealing with any active network taps.
1482  */
1483 static inline int
1484 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1485 {
1486 	int ret;
1487 
1488 	ret = t3_offload_tx(tdev, m);
1489 	return (ret);
1490 }
1491 
1492 static int
1493 write_smt_entry(struct adapter *adapter, int idx)
1494 {
1495 	struct port_info *pi = &adapter->port[idx];
1496 	struct cpl_smt_write_req *req;
1497 	struct mbuf *m;
1498 
1499 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1500 		return (ENOMEM);
1501 
1502 	req = mtod(m, struct cpl_smt_write_req *);
1503 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1504 
1505 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1506 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1507 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1508 	req->iff = idx;
1509 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1510 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1511 
1512 	m_set_priority(m, 1);
1513 
1514 	offload_tx(&adapter->tdev, m);
1515 
1516 	return (0);
1517 }
1518 
1519 static int
1520 init_smt(struct adapter *adapter)
1521 {
1522 	int i;
1523 
1524 	for_each_port(adapter, i)
1525 		write_smt_entry(adapter, i);
1526 	return 0;
1527 }
1528 
1529 static void
1530 init_port_mtus(adapter_t *adapter)
1531 {
1532 	unsigned int mtus = ETHERMTU | (ETHERMTU << 16);
1533 
1534 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1535 }
1536 
1537 static void
1538 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1539 			      int hi, int port)
1540 {
1541 	struct mbuf *m;
1542 	struct mngt_pktsched_wr *req;
1543 
1544 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1545 	if (m) {
1546 		req = mtod(m, struct mngt_pktsched_wr *);
1547 		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1548 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1549 		req->sched = sched;
1550 		req->idx = qidx;
1551 		req->min = lo;
1552 		req->max = hi;
1553 		req->binding = port;
1554 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1555 		t3_mgmt_tx(adap, m);
1556 	}
1557 }
1558 
1559 static void
1560 bind_qsets(adapter_t *sc)
1561 {
1562 	int i, j;
1563 
1564 	for (i = 0; i < (sc)->params.nports; ++i) {
1565 		const struct port_info *pi = adap2pinfo(sc, i);
1566 
1567 		for (j = 0; j < pi->nqsets; ++j) {
1568 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1569 					  -1, pi->tx_chan);
1570 
1571 		}
1572 	}
1573 }
1574 
1575 static void
1576 update_tpeeprom(struct adapter *adap)
1577 {
1578 #ifdef FIRMWARE_LATEST
1579 	const struct firmware *tpeeprom;
1580 #else
1581 	struct firmware *tpeeprom;
1582 #endif
1583 
1584 	uint32_t version;
1585 	unsigned int major, minor;
1586 	int ret, len;
1587 	char rev, name[32];
1588 
1589 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1590 
1591 	major = G_TP_VERSION_MAJOR(version);
1592 	minor = G_TP_VERSION_MINOR(version);
1593 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1594 		return;
1595 
1596 	rev = t3rev2char(adap);
1597 	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1598 
1599 	tpeeprom = firmware_get(name);
1600 	if (tpeeprom == NULL) {
1601 		device_printf(adap->dev,
1602 			      "could not load TP EEPROM: unable to load %s\n",
1603 			      name);
1604 		return;
1605 	}
1606 
1607 	len = tpeeprom->datasize - 4;
1608 
1609 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1610 	if (ret)
1611 		goto release_tpeeprom;
1612 
1613 	if (len != TP_SRAM_LEN) {
1614 		device_printf(adap->dev,
1615 			      "%s length is wrong len=%d expected=%d\n", name,
1616 			      len, TP_SRAM_LEN);
1617 		return;
1618 	}
1619 
1620 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1621 	    TP_SRAM_OFFSET);
1622 
1623 	if (!ret) {
1624 		device_printf(adap->dev,
1625 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1626 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1627 	} else
1628 		device_printf(adap->dev,
1629 			      "Protocol SRAM image update in EEPROM failed\n");
1630 
1631 release_tpeeprom:
1632 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1633 
1634 	return;
1635 }
1636 
1637 static int
1638 update_tpsram(struct adapter *adap)
1639 {
1640 #ifdef FIRMWARE_LATEST
1641 	const struct firmware *tpsram;
1642 #else
1643 	struct firmware *tpsram;
1644 #endif
1645 	int ret;
1646 	char rev, name[32];
1647 
1648 	rev = t3rev2char(adap);
1649 	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1650 
1651 	update_tpeeprom(adap);
1652 
1653 	tpsram = firmware_get(name);
1654 	if (tpsram == NULL){
1655 		device_printf(adap->dev, "could not load TP SRAM\n");
1656 		return (EINVAL);
1657 	} else
1658 		device_printf(adap->dev, "updating TP SRAM\n");
1659 
1660 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1661 	if (ret)
1662 		goto release_tpsram;
1663 
1664 	ret = t3_set_proto_sram(adap, tpsram->data);
1665 	if (ret)
1666 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1667 
1668 release_tpsram:
1669 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1670 
1671 	return ret;
1672 }
1673 
1674 /**
1675  *	cxgb_up - enable the adapter
1676  *	@adap: adapter being enabled
1677  *
1678  *	Called when the first port is enabled, this function performs the
1679  *	actions necessary to make an adapter operational, such as completing
1680  *	the initialization of HW modules, and enabling interrupts.
1681  */
1682 static int
1683 cxgb_up(struct adapter *sc)
1684 {
1685 	int err = 0;
1686 
1687 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1688 	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1689 					   __func__, sc->open_device_map));
1690 
1691 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1692 
1693 		if ((sc->flags & FW_UPTODATE) == 0)
1694 			if ((err = upgrade_fw(sc)))
1695 				goto out;
1696 
1697 		if ((sc->flags & TPS_UPTODATE) == 0)
1698 			if ((err = update_tpsram(sc)))
1699 				goto out;
1700 
1701 		err = t3_init_hw(sc, 0);
1702 		if (err)
1703 			goto out;
1704 
1705 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1706 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1707 
1708 		err = setup_sge_qsets(sc);
1709 		if (err)
1710 			goto out;
1711 
1712 		setup_rss(sc);
1713 
1714 		t3_intr_clear(sc);
1715 		err = cxgb_setup_interrupts(sc);
1716 		if (err)
1717 			goto out;
1718 
1719 		t3_add_configured_sysctls(sc);
1720 		sc->flags |= FULL_INIT_DONE;
1721 	}
1722 
1723 	t3_intr_clear(sc);
1724 	t3_sge_start(sc);
1725 	t3_intr_enable(sc);
1726 
1727 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1728 	    is_offload(sc) && init_tp_parity(sc) == 0)
1729 		sc->flags |= TP_PARITY_INIT;
1730 
1731 	if (sc->flags & TP_PARITY_INIT) {
1732 		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1733 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1734 	}
1735 
1736 	if (!(sc->flags & QUEUES_BOUND)) {
1737 		bind_qsets(sc);
1738 		sc->flags |= QUEUES_BOUND;
1739 	}
1740 
1741 	t3_sge_reset_adapter(sc);
1742 out:
1743 	return (err);
1744 }
1745 
1746 /*
1747  * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1748  * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1749  * during controller_detach, not here.
1750  */
1751 static void
1752 cxgb_down(struct adapter *sc)
1753 {
1754 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1755 
1756 	t3_sge_stop(sc);
1757 	t3_intr_disable(sc);
1758 }
1759 
1760 static int
1761 offload_open(struct port_info *pi)
1762 {
1763 	struct adapter *sc = pi->adapter;
1764 	struct t3cdev *tdev = &sc->tdev;
1765 
1766 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1767 
1768 	setbit(&sc->open_device_map, OFFLOAD_DEVMAP_BIT);
1769 
1770 	t3_tp_set_offload_mode(sc, 1);
1771 	tdev->lldev = pi->ifp;
1772 	init_port_mtus(sc);
1773 	t3_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd,
1774 		     sc->params.rev == 0 ?  sc->port[0].ifp->if_mtu : 0xffff);
1775 	init_smt(sc);
1776 	cxgb_add_clients(tdev);
1777 
1778 	return (0);
1779 }
1780 
1781 static int
1782 offload_close(struct t3cdev *tdev)
1783 {
1784 	struct adapter *adapter = tdev2adap(tdev);
1785 
1786 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1787 		return (0);
1788 
1789 	/* Call back all registered clients */
1790 	cxgb_remove_clients(tdev);
1791 
1792 	tdev->lldev = NULL;
1793 	cxgb_set_dummy_ops(tdev);
1794 	t3_tp_set_offload_mode(adapter, 0);
1795 
1796 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1797 
1798 	return (0);
1799 }
1800 
1801 /*
1802  * Begin a synchronized operation.  If this call succeeds, it is guaranteed that
1803  * no one will remove the port or its ifp from underneath the caller.  Caller is
1804  * also granted exclusive access to open_device_map.
1805  *
1806  * operation here means init, uninit, detach, and ioctl service.
1807  *
1808  * May fail.
1809  * EINTR (ctrl-c pressed during ifconfig for example).
1810  * ENXIO (port is about to detach - due to kldunload for example).
1811  */
1812 int
1813 cxgb_begin_op(struct port_info *p, const char *wmsg)
1814 {
1815 	int rc = 0;
1816 	struct adapter *sc = p->adapter;
1817 
1818 	ADAPTER_LOCK(sc);
1819 
1820 	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1821 		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, wmsg, 0)) {
1822 			rc = EINTR;
1823 			goto done;
1824 		}
1825 	}
1826 
1827 	if (IS_DOOMED(p))
1828 		rc = ENXIO;
1829 	else if (!IS_BUSY(sc))
1830 		SET_BUSY(sc);
1831 	else {
1832 		KASSERT(0, ("%s: port %d, p->flags = %x , sc->flags = %x",
1833 			    __func__, p->port_id, p->flags, sc->flags));
1834 		rc = EDOOFUS;
1835 	}
1836 
1837 done:
1838 	ADAPTER_UNLOCK(sc);
1839 	return (rc);
1840 }
1841 
1842 /*
1843  * End a synchronized operation.  Read comment block above cxgb_begin_op.
1844  */
1845 int
1846 cxgb_end_op(struct port_info *p)
1847 {
1848 	struct adapter *sc = p->adapter;
1849 
1850 	ADAPTER_LOCK(sc);
1851 	KASSERT(IS_BUSY(sc), ("%s: not busy.", __func__));
1852 	CLR_BUSY(sc);
1853 	wakeup_one(&sc->flags);
1854 	ADAPTER_UNLOCK(sc);
1855 
1856 	return (0);
1857 }
1858 
1859 /*
1860  * Prepare for port detachment.  Detach is a special kind of synchronized
1861  * operation.  Also read comment before cxgb_begin_op.
1862  */
1863 static int
1864 cxgb_begin_detach(struct port_info *p)
1865 {
1866 	struct adapter *sc = p->adapter;
1867 
1868 	/*
1869 	 * Inform those waiting for this port that it is going to be destroyed
1870 	 * and they should not continue further.  (They'll return with ENXIO).
1871 	 */
1872 	ADAPTER_LOCK(sc);
1873 	SET_DOOMED(p);
1874 	wakeup(&sc->flags);
1875 	ADAPTER_UNLOCK(sc);
1876 
1877 	/*
1878 	 * Wait for in-progress operations.
1879 	 */
1880 	ADAPTER_LOCK(sc);
1881 	while (IS_BUSY(sc)) {
1882 		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1883 	}
1884 	SET_BUSY(sc);
1885 	ADAPTER_UNLOCK(sc);
1886 
1887 	return (0);
1888 }
1889 
1890 /*
1891  * if_init for cxgb ports.
1892  */
1893 static void
1894 cxgb_init(void *arg)
1895 {
1896 	struct port_info *p = arg;
1897 
1898 	if (cxgb_begin_op(p, "cxgbinit"))
1899 		return;
1900 
1901 	cxgb_init_synchronized(p);
1902 	cxgb_end_op(p);
1903 }
1904 
1905 static int
1906 cxgb_init_synchronized(struct port_info *p)
1907 {
1908 	struct adapter *sc = p->adapter;
1909 	struct ifnet *ifp = p->ifp;
1910 	struct cmac *mac = &p->mac;
1911 	int i, rc;
1912 
1913 	if (sc->open_device_map == 0) {
1914 		if ((rc = cxgb_up(sc)) != 0)
1915 			return (rc);
1916 
1917 		if (is_offload(sc) && !ofld_disable && offload_open(p))
1918 			log(LOG_WARNING,
1919 			    "Could not initialize offload capabilities\n");
1920 	}
1921 
1922 	PORT_LOCK(p);
1923 	t3_port_intr_enable(sc, p->port_id);
1924 	if (!mac->multiport)
1925 		t3_mac_init(mac);
1926 	cxgb_update_mac_settings(p);
1927 	t3_link_start(&p->phy, mac, &p->link_config);
1928 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1929 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1930 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1931 	PORT_UNLOCK(p);
1932 
1933 	t3_link_changed(sc, p->port_id);
1934 
1935 	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1936 		struct sge_qset *qs = &sc->sge.qs[i];
1937 		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1938 
1939 		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1940 				 txq->txq_watchdog.c_cpu);
1941 	}
1942 
1943 	/* all ok */
1944 	setbit(&sc->open_device_map, p->port_id);
1945 
1946 	return (0);
1947 }
1948 
1949 /*
1950  * Called on "ifconfig down", and from port_detach
1951  */
1952 static int
1953 cxgb_uninit_synchronized(struct port_info *pi)
1954 {
1955 	struct adapter *sc = pi->adapter;
1956 	struct ifnet *ifp = pi->ifp;
1957 
1958 	/*
1959 	 * Clear this port's bit from the open device map, and then drain all
1960 	 * the tasks that can access/manipulate this port's port_info or ifp.
1961 	 * We disable this port's interrupts here and so the the slow/ext
1962 	 * interrupt tasks won't be enqueued.  The tick task will continue to
1963 	 * be enqueued every second but the runs after this drain will not see
1964 	 * this port in the open device map.
1965 	 *
1966 	 * A well behaved task must take open_device_map into account and ignore
1967 	 * ports that are not open.
1968 	 */
1969 	clrbit(&sc->open_device_map, pi->port_id);
1970 	t3_port_intr_disable(sc, pi->port_id);
1971 	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1972 	taskqueue_drain(sc->tq, &sc->ext_intr_task);
1973 	taskqueue_drain(sc->tq, &sc->tick_task);
1974 
1975 	PORT_LOCK(pi);
1976 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1977 
1978 	/* disable pause frames */
1979 	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1980 
1981 	/* Reset RX FIFO HWM */
1982 	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1983 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1984 
1985 	DELAY(100 * 1000);
1986 
1987 	/* Wait for TXFIFO empty */
1988 	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1989 			F_TXFIFO_EMPTY, 1, 20, 5);
1990 
1991 	DELAY(100 * 1000);
1992 	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1993 
1994 
1995 	pi->phy.ops->power_down(&pi->phy, 1);
1996 
1997 	PORT_UNLOCK(pi);
1998 
1999 	pi->link_config.link_ok = 0;
2000 	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
2001 
2002 	if ((sc->open_device_map & PORT_MASK) == 0)
2003 		offload_close(&sc->tdev);
2004 
2005 	if (sc->open_device_map == 0)
2006 		cxgb_down(pi->adapter);
2007 
2008 	return (0);
2009 }
2010 
2011 #ifdef LRO_SUPPORTED
2012 /*
2013  * Mark lro enabled or disabled in all qsets for this port
2014  */
2015 static int
2016 cxgb_set_lro(struct port_info *p, int enabled)
2017 {
2018 	int i;
2019 	struct adapter *adp = p->adapter;
2020 	struct sge_qset *q;
2021 
2022 	PORT_LOCK_ASSERT_OWNED(p);
2023 	for (i = 0; i < p->nqsets; i++) {
2024 		q = &adp->sge.qs[p->first_qset + i];
2025 		q->lro.enabled = (enabled != 0);
2026 	}
2027 	return (0);
2028 }
2029 #endif
2030 
2031 static int
2032 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
2033 {
2034 	struct port_info *p = ifp->if_softc;
2035 	struct ifreq *ifr = (struct ifreq *)data;
2036 	int flags, error = 0, mtu, handle_unsynchronized = 0;
2037 	uint32_t mask;
2038 
2039 	if ((error = cxgb_begin_op(p, "cxgbioct")) != 0)
2040 		return (error);
2041 
2042 	/*
2043 	 * Only commands that should be handled within begin-op/end-op are
2044 	 * serviced in this switch statement.  See handle_unsynchronized.
2045 	 */
2046 	switch (command) {
2047 	case SIOCSIFMTU:
2048 		mtu = ifr->ifr_mtu;
2049 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
2050 			error = EINVAL;
2051 		} else {
2052 			ifp->if_mtu = mtu;
2053 			PORT_LOCK(p);
2054 			cxgb_update_mac_settings(p);
2055 			PORT_UNLOCK(p);
2056 		}
2057 
2058 		break;
2059 	case SIOCSIFFLAGS:
2060 		if (ifp->if_flags & IFF_UP) {
2061 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2062 				flags = p->if_flags;
2063 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
2064 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
2065 					PORT_LOCK(p);
2066 					cxgb_update_mac_settings(p);
2067 					PORT_UNLOCK(p);
2068 				}
2069 			} else
2070 				error = cxgb_init_synchronized(p);
2071 			p->if_flags = ifp->if_flags;
2072 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2073 			error = cxgb_uninit_synchronized(p);
2074 
2075 		break;
2076 	case SIOCADDMULTI:
2077 	case SIOCDELMULTI:
2078 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2079 			PORT_LOCK(p);
2080 			cxgb_update_mac_settings(p);
2081 			PORT_UNLOCK(p);
2082 		}
2083 
2084 		break;
2085 	case SIOCSIFCAP:
2086 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2087 		if (mask & IFCAP_TXCSUM) {
2088 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2089 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2090 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2091 				    | CSUM_IP | CSUM_TSO);
2092 			} else {
2093 				ifp->if_capenable |= IFCAP_TXCSUM;
2094 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2095 				    | CSUM_IP);
2096 			}
2097 		}
2098 		if (mask & IFCAP_RXCSUM) {
2099 			ifp->if_capenable ^= IFCAP_RXCSUM;
2100 		}
2101 		if (mask & IFCAP_TSO4) {
2102 			if (IFCAP_TSO4 & ifp->if_capenable) {
2103 				ifp->if_capenable &= ~IFCAP_TSO4;
2104 				ifp->if_hwassist &= ~CSUM_TSO;
2105 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2106 				ifp->if_capenable |= IFCAP_TSO4;
2107 				ifp->if_hwassist |= CSUM_TSO;
2108 			} else
2109 				error = EINVAL;
2110 		}
2111 #ifdef LRO_SUPPORTED
2112 		if (mask & IFCAP_LRO) {
2113 			ifp->if_capenable ^= IFCAP_LRO;
2114 
2115 			/* Safe to do this even if cxgb_up not called yet */
2116 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2117 		}
2118 #endif
2119 		if (mask & IFCAP_VLAN_HWTAGGING) {
2120 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2121 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2122 				PORT_LOCK(p);
2123 				cxgb_update_mac_settings(p);
2124 				PORT_UNLOCK(p);
2125 			}
2126 		}
2127 		if (mask & IFCAP_VLAN_MTU) {
2128 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2129 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2130 				PORT_LOCK(p);
2131 				cxgb_update_mac_settings(p);
2132 				PORT_UNLOCK(p);
2133 			}
2134 		}
2135 		if (mask & IFCAP_VLAN_HWCSUM) {
2136 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2137 		}
2138 
2139 #ifdef VLAN_CAPABILITIES
2140 		VLAN_CAPABILITIES(ifp);
2141 #endif
2142 		break;
2143 	default:
2144 		handle_unsynchronized = 1;
2145 		break;
2146 	}
2147 
2148 	/*
2149 	 * We don't want to call anything outside the driver while inside a
2150 	 * begin-op/end-op block.  If it calls us back (eg.  ether_ioctl may
2151 	 * call cxgb_init) we may deadlock if the state is already marked busy.
2152 	 *
2153 	 * XXX: this probably opens a small race window with kldunload...
2154 	 */
2155 	cxgb_end_op(p);
2156 
2157 	/* The IS_DOOMED check is racy, we're clutching at straws here */
2158 	if (handle_unsynchronized && !IS_DOOMED(p)) {
2159 		if (command == SIOCSIFMEDIA || command == SIOCGIFMEDIA)
2160 			error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2161 		else
2162 			error = ether_ioctl(ifp, command, data);
2163 	}
2164 
2165 	return (error);
2166 }
2167 
2168 static int
2169 cxgb_media_change(struct ifnet *ifp)
2170 {
2171 	return (EOPNOTSUPP);
2172 }
2173 
2174 /*
2175  * Translates phy->modtype to the correct Ethernet media subtype.
2176  */
2177 static int
2178 cxgb_ifm_type(int mod)
2179 {
2180 	switch (mod) {
2181 	case phy_modtype_sr:
2182 		return (IFM_10G_SR);
2183 	case phy_modtype_lr:
2184 		return (IFM_10G_LR);
2185 	case phy_modtype_lrm:
2186 		return (IFM_10G_LRM);
2187 	case phy_modtype_twinax:
2188 		return (IFM_10G_TWINAX);
2189 	case phy_modtype_twinax_long:
2190 		return (IFM_10G_TWINAX_LONG);
2191 	case phy_modtype_none:
2192 		return (IFM_NONE);
2193 	case phy_modtype_unknown:
2194 		return (IFM_UNKNOWN);
2195 	}
2196 
2197 	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2198 	return (IFM_UNKNOWN);
2199 }
2200 
2201 /*
2202  * Rebuilds the ifmedia list for this port, and sets the current media.
2203  */
2204 static void
2205 cxgb_build_medialist(struct port_info *p)
2206 {
2207 	struct cphy *phy = &p->phy;
2208 	struct ifmedia *media = &p->media;
2209 	int mod = phy->modtype;
2210 	int m = IFM_ETHER | IFM_FDX;
2211 
2212 	PORT_LOCK(p);
2213 
2214 	ifmedia_removeall(media);
2215 	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2216 		/* Copper (RJ45) */
2217 
2218 		if (phy->caps & SUPPORTED_10000baseT_Full)
2219 			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2220 
2221 		if (phy->caps & SUPPORTED_1000baseT_Full)
2222 			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2223 
2224 		if (phy->caps & SUPPORTED_100baseT_Full)
2225 			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2226 
2227 		if (phy->caps & SUPPORTED_10baseT_Full)
2228 			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2229 
2230 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2231 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2232 
2233 	} else if (phy->caps & SUPPORTED_TP) {
2234 		/* Copper (CX4) */
2235 
2236 		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2237 			("%s: unexpected cap 0x%x", __func__, phy->caps));
2238 
2239 		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2240 		ifmedia_set(media, m | IFM_10G_CX4);
2241 
2242 	} else if (phy->caps & SUPPORTED_FIBRE &&
2243 		   phy->caps & SUPPORTED_10000baseT_Full) {
2244 		/* 10G optical (but includes SFP+ twinax) */
2245 
2246 		m |= cxgb_ifm_type(mod);
2247 		if (IFM_SUBTYPE(m) == IFM_NONE)
2248 			m &= ~IFM_FDX;
2249 
2250 		ifmedia_add(media, m, mod, NULL);
2251 		ifmedia_set(media, m);
2252 
2253 	} else if (phy->caps & SUPPORTED_FIBRE &&
2254 		   phy->caps & SUPPORTED_1000baseT_Full) {
2255 		/* 1G optical */
2256 
2257 		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2258 		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2259 		ifmedia_set(media, m | IFM_1000_SX);
2260 
2261 	} else {
2262 		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2263 			    phy->caps));
2264 	}
2265 
2266 	PORT_UNLOCK(p);
2267 }
2268 
2269 static void
2270 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2271 {
2272 	struct port_info *p = ifp->if_softc;
2273 	struct ifmedia_entry *cur = p->media.ifm_cur;
2274 	int speed = p->link_config.speed;
2275 
2276 	if (cur->ifm_data != p->phy.modtype) {
2277 		cxgb_build_medialist(p);
2278 		cur = p->media.ifm_cur;
2279 	}
2280 
2281 	ifmr->ifm_status = IFM_AVALID;
2282 	if (!p->link_config.link_ok)
2283 		return;
2284 
2285 	ifmr->ifm_status |= IFM_ACTIVE;
2286 
2287 	/*
2288 	 * active and current will differ iff current media is autoselect.  That
2289 	 * can happen only for copper RJ45.
2290 	 */
2291 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2292 		return;
2293 	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2294 		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2295 
2296 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2297 	if (speed == SPEED_10000)
2298 		ifmr->ifm_active |= IFM_10G_T;
2299 	else if (speed == SPEED_1000)
2300 		ifmr->ifm_active |= IFM_1000_T;
2301 	else if (speed == SPEED_100)
2302 		ifmr->ifm_active |= IFM_100_TX;
2303 	else if (speed == SPEED_10)
2304 		ifmr->ifm_active |= IFM_10_T;
2305 	else
2306 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2307 			    speed));
2308 }
2309 
2310 static void
2311 cxgb_async_intr(void *data)
2312 {
2313 	adapter_t *sc = data;
2314 
2315 	if (cxgb_debug)
2316 		device_printf(sc->dev, "cxgb_async_intr\n");
2317 	/*
2318 	 * May need to sleep - defer to taskqueue
2319 	 */
2320 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2321 }
2322 
2323 static void
2324 cxgb_ext_intr_handler(void *arg, int count)
2325 {
2326 	adapter_t *sc = (adapter_t *)arg;
2327 
2328 	if (cxgb_debug)
2329 		printf("cxgb_ext_intr_handler\n");
2330 
2331 	t3_phy_intr_handler(sc);
2332 
2333 	/* Now reenable external interrupts */
2334 	ADAPTER_LOCK(sc);
2335 	if (sc->slow_intr_mask) {
2336 		sc->slow_intr_mask |= F_T3DBG;
2337 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2338 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2339 	}
2340 	ADAPTER_UNLOCK(sc);
2341 }
2342 
2343 static inline int
2344 link_poll_needed(struct port_info *p)
2345 {
2346 	struct cphy *phy = &p->phy;
2347 
2348 	if (phy->caps & POLL_LINK_1ST_TIME) {
2349 		p->phy.caps &= ~POLL_LINK_1ST_TIME;
2350 		return (1);
2351 	}
2352 
2353 	return (p->link_fault || !(phy->caps & SUPPORTED_LINK_IRQ));
2354 }
2355 
2356 static void
2357 check_link_status(adapter_t *sc)
2358 {
2359 	int i;
2360 
2361 	for (i = 0; i < (sc)->params.nports; ++i) {
2362 		struct port_info *p = &sc->port[i];
2363 
2364 		if (!isset(&sc->open_device_map, p->port_id))
2365 			continue;
2366 
2367 		if (link_poll_needed(p))
2368 			t3_link_changed(sc, i);
2369 	}
2370 }
2371 
2372 static void
2373 check_t3b2_mac(struct adapter *sc)
2374 {
2375 	int i;
2376 
2377 	if (sc->flags & CXGB_SHUTDOWN)
2378 		return;
2379 
2380 	for_each_port(sc, i) {
2381 		struct port_info *p = &sc->port[i];
2382 		int status;
2383 #ifdef INVARIANTS
2384 		struct ifnet *ifp = p->ifp;
2385 #endif
2386 
2387 		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2388 		    !p->link_config.link_ok)
2389 			continue;
2390 
2391 		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2392 			("%s: state mismatch (drv_flags %x, device_map %x)",
2393 			 __func__, ifp->if_drv_flags, sc->open_device_map));
2394 
2395 		PORT_LOCK(p);
2396 		status = t3b2_mac_watchdog_task(&p->mac);
2397 		if (status == 1)
2398 			p->mac.stats.num_toggled++;
2399 		else if (status == 2) {
2400 			struct cmac *mac = &p->mac;
2401 
2402 			cxgb_update_mac_settings(p);
2403 			t3_link_start(&p->phy, mac, &p->link_config);
2404 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2405 			t3_port_intr_enable(sc, p->port_id);
2406 			p->mac.stats.num_resets++;
2407 		}
2408 		PORT_UNLOCK(p);
2409 	}
2410 }
2411 
2412 static void
2413 cxgb_tick(void *arg)
2414 {
2415 	adapter_t *sc = (adapter_t *)arg;
2416 
2417 	if (sc->flags & CXGB_SHUTDOWN)
2418 		return;
2419 
2420 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2421 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2422 }
2423 
2424 static void
2425 cxgb_tick_handler(void *arg, int count)
2426 {
2427 	adapter_t *sc = (adapter_t *)arg;
2428 	const struct adapter_params *p = &sc->params;
2429 	int i;
2430 	uint32_t cause, reset;
2431 
2432 	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2433 		return;
2434 
2435 	check_link_status(sc);
2436 
2437 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2438 		check_t3b2_mac(sc);
2439 
2440 	cause = t3_read_reg(sc, A_SG_INT_CAUSE);
2441 	reset = 0;
2442 	if (cause & F_FLEMPTY) {
2443 		struct sge_qset *qs = &sc->sge.qs[0];
2444 
2445 		i = 0;
2446 		reset |= F_FLEMPTY;
2447 
2448 		cause = (t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) >>
2449 			 S_FL0EMPTY) & 0xffff;
2450 		while (cause) {
2451 			qs->fl[i].empty += (cause & 1);
2452 			if (i)
2453 				qs++;
2454 			i ^= 1;
2455 			cause >>= 1;
2456 		}
2457 	}
2458 	t3_write_reg(sc, A_SG_INT_CAUSE, reset);
2459 
2460 	for (i = 0; i < sc->params.nports; i++) {
2461 		struct port_info *pi = &sc->port[i];
2462 		struct ifnet *ifp = pi->ifp;
2463 		struct cmac *mac = &pi->mac;
2464 		struct mac_stats *mstats = &mac->stats;
2465 
2466 		if (!isset(&sc->open_device_map, pi->port_id))
2467 			continue;
2468 
2469 		PORT_LOCK(pi);
2470 		t3_mac_update_stats(mac);
2471 		PORT_UNLOCK(pi);
2472 
2473 		ifp->if_opackets =
2474 		    mstats->tx_frames_64 +
2475 		    mstats->tx_frames_65_127 +
2476 		    mstats->tx_frames_128_255 +
2477 		    mstats->tx_frames_256_511 +
2478 		    mstats->tx_frames_512_1023 +
2479 		    mstats->tx_frames_1024_1518 +
2480 		    mstats->tx_frames_1519_max;
2481 
2482 		ifp->if_ipackets =
2483 		    mstats->rx_frames_64 +
2484 		    mstats->rx_frames_65_127 +
2485 		    mstats->rx_frames_128_255 +
2486 		    mstats->rx_frames_256_511 +
2487 		    mstats->rx_frames_512_1023 +
2488 		    mstats->rx_frames_1024_1518 +
2489 		    mstats->rx_frames_1519_max;
2490 
2491 		ifp->if_obytes = mstats->tx_octets;
2492 		ifp->if_ibytes = mstats->rx_octets;
2493 		ifp->if_omcasts = mstats->tx_mcast_frames;
2494 		ifp->if_imcasts = mstats->rx_mcast_frames;
2495 
2496 		ifp->if_collisions =
2497 		    mstats->tx_total_collisions;
2498 
2499 		ifp->if_iqdrops = mstats->rx_cong_drops;
2500 
2501 		ifp->if_oerrors =
2502 		    mstats->tx_excess_collisions +
2503 		    mstats->tx_underrun +
2504 		    mstats->tx_len_errs +
2505 		    mstats->tx_mac_internal_errs +
2506 		    mstats->tx_excess_deferral +
2507 		    mstats->tx_fcs_errs;
2508 		ifp->if_ierrors =
2509 		    mstats->rx_jabber +
2510 		    mstats->rx_data_errs +
2511 		    mstats->rx_sequence_errs +
2512 		    mstats->rx_runt +
2513 		    mstats->rx_too_long +
2514 		    mstats->rx_mac_internal_errs +
2515 		    mstats->rx_short +
2516 		    mstats->rx_fcs_errs;
2517 
2518 		if (mac->multiport)
2519 			continue;
2520 
2521 		/* Count rx fifo overflows, once per second */
2522 		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2523 		reset = 0;
2524 		if (cause & F_RXFIFO_OVERFLOW) {
2525 			mac->stats.rx_fifo_ovfl++;
2526 			reset |= F_RXFIFO_OVERFLOW;
2527 		}
2528 		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2529 	}
2530 }
2531 
2532 static void
2533 touch_bars(device_t dev)
2534 {
2535 	/*
2536 	 * Don't enable yet
2537 	 */
2538 #if !defined(__LP64__) && 0
2539 	u32 v;
2540 
2541 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2542 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2543 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2544 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2545 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2546 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2547 #endif
2548 }
2549 
2550 static int
2551 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2552 {
2553 	uint8_t *buf;
2554 	int err = 0;
2555 	u32 aligned_offset, aligned_len, *p;
2556 	struct adapter *adapter = pi->adapter;
2557 
2558 
2559 	aligned_offset = offset & ~3;
2560 	aligned_len = (len + (offset & 3) + 3) & ~3;
2561 
2562 	if (aligned_offset != offset || aligned_len != len) {
2563 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2564 		if (!buf)
2565 			return (ENOMEM);
2566 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2567 		if (!err && aligned_len > 4)
2568 			err = t3_seeprom_read(adapter,
2569 					      aligned_offset + aligned_len - 4,
2570 					      (u32 *)&buf[aligned_len - 4]);
2571 		if (err)
2572 			goto out;
2573 		memcpy(buf + (offset & 3), data, len);
2574 	} else
2575 		buf = (uint8_t *)(uintptr_t)data;
2576 
2577 	err = t3_seeprom_wp(adapter, 0);
2578 	if (err)
2579 		goto out;
2580 
2581 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2582 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2583 		aligned_offset += 4;
2584 	}
2585 
2586 	if (!err)
2587 		err = t3_seeprom_wp(adapter, 1);
2588 out:
2589 	if (buf != data)
2590 		free(buf, M_DEVBUF);
2591 	return err;
2592 }
2593 
2594 
2595 static int
2596 in_range(int val, int lo, int hi)
2597 {
2598 	return val < 0 || (val <= hi && val >= lo);
2599 }
2600 
2601 static int
2602 cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2603 {
2604        return (0);
2605 }
2606 
2607 static int
2608 cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2609 {
2610        return (0);
2611 }
2612 
2613 static int
2614 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2615     int fflag, struct thread *td)
2616 {
2617 	int mmd, error = 0;
2618 	struct port_info *pi = dev->si_drv1;
2619 	adapter_t *sc = pi->adapter;
2620 
2621 #ifdef PRIV_SUPPORTED
2622 	if (priv_check(td, PRIV_DRIVER)) {
2623 		if (cxgb_debug)
2624 			printf("user does not have access to privileged ioctls\n");
2625 		return (EPERM);
2626 	}
2627 #else
2628 	if (suser(td)) {
2629 		if (cxgb_debug)
2630 			printf("user does not have access to privileged ioctls\n");
2631 		return (EPERM);
2632 	}
2633 #endif
2634 
2635 	switch (cmd) {
2636 	case CHELSIO_GET_MIIREG: {
2637 		uint32_t val;
2638 		struct cphy *phy = &pi->phy;
2639 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2640 
2641 		if (!phy->mdio_read)
2642 			return (EOPNOTSUPP);
2643 		if (is_10G(sc)) {
2644 			mmd = mid->phy_id >> 8;
2645 			if (!mmd)
2646 				mmd = MDIO_DEV_PCS;
2647 			else if (mmd > MDIO_DEV_VEND2)
2648 				return (EINVAL);
2649 
2650 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2651 					     mid->reg_num, &val);
2652 		} else
2653 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2654 					     mid->reg_num & 0x1f, &val);
2655 		if (error == 0)
2656 			mid->val_out = val;
2657 		break;
2658 	}
2659 	case CHELSIO_SET_MIIREG: {
2660 		struct cphy *phy = &pi->phy;
2661 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2662 
2663 		if (!phy->mdio_write)
2664 			return (EOPNOTSUPP);
2665 		if (is_10G(sc)) {
2666 			mmd = mid->phy_id >> 8;
2667 			if (!mmd)
2668 				mmd = MDIO_DEV_PCS;
2669 			else if (mmd > MDIO_DEV_VEND2)
2670 				return (EINVAL);
2671 
2672 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2673 					      mmd, mid->reg_num, mid->val_in);
2674 		} else
2675 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2676 					      mid->reg_num & 0x1f,
2677 					      mid->val_in);
2678 		break;
2679 	}
2680 	case CHELSIO_SETREG: {
2681 		struct ch_reg *edata = (struct ch_reg *)data;
2682 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2683 			return (EFAULT);
2684 		t3_write_reg(sc, edata->addr, edata->val);
2685 		break;
2686 	}
2687 	case CHELSIO_GETREG: {
2688 		struct ch_reg *edata = (struct ch_reg *)data;
2689 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2690 			return (EFAULT);
2691 		edata->val = t3_read_reg(sc, edata->addr);
2692 		break;
2693 	}
2694 	case CHELSIO_GET_SGE_CONTEXT: {
2695 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2696 		mtx_lock_spin(&sc->sge.reg_lock);
2697 		switch (ecntxt->cntxt_type) {
2698 		case CNTXT_TYPE_EGRESS:
2699 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2700 			    ecntxt->data);
2701 			break;
2702 		case CNTXT_TYPE_FL:
2703 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2704 			    ecntxt->data);
2705 			break;
2706 		case CNTXT_TYPE_RSP:
2707 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2708 			    ecntxt->data);
2709 			break;
2710 		case CNTXT_TYPE_CQ:
2711 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2712 			    ecntxt->data);
2713 			break;
2714 		default:
2715 			error = EINVAL;
2716 			break;
2717 		}
2718 		mtx_unlock_spin(&sc->sge.reg_lock);
2719 		break;
2720 	}
2721 	case CHELSIO_GET_SGE_DESC: {
2722 		struct ch_desc *edesc = (struct ch_desc *)data;
2723 		int ret;
2724 		if (edesc->queue_num >= SGE_QSETS * 6)
2725 			return (EINVAL);
2726 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2727 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2728 		if (ret < 0)
2729 			return (EINVAL);
2730 		edesc->size = ret;
2731 		break;
2732 	}
2733 	case CHELSIO_GET_QSET_PARAMS: {
2734 		struct qset_params *q;
2735 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2736 		int q1 = pi->first_qset;
2737 		int nqsets = pi->nqsets;
2738 		int i;
2739 
2740 		if (t->qset_idx >= nqsets)
2741 			return EINVAL;
2742 
2743 		i = q1 + t->qset_idx;
2744 		q = &sc->params.sge.qset[i];
2745 		t->rspq_size   = q->rspq_size;
2746 		t->txq_size[0] = q->txq_size[0];
2747 		t->txq_size[1] = q->txq_size[1];
2748 		t->txq_size[2] = q->txq_size[2];
2749 		t->fl_size[0]  = q->fl_size;
2750 		t->fl_size[1]  = q->jumbo_size;
2751 		t->polling     = q->polling;
2752 		t->lro         = q->lro;
2753 		t->intr_lat    = q->coalesce_usecs;
2754 		t->cong_thres  = q->cong_thres;
2755 		t->qnum        = i;
2756 
2757 		if (sc->flags & USING_MSIX)
2758 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2759 		else
2760 			t->vector = rman_get_start(sc->irq_res);
2761 
2762 		break;
2763 	}
2764 	case CHELSIO_GET_QSET_NUM: {
2765 		struct ch_reg *edata = (struct ch_reg *)data;
2766 		edata->val = pi->nqsets;
2767 		break;
2768 	}
2769 	case CHELSIO_LOAD_FW: {
2770 		uint8_t *fw_data;
2771 		uint32_t vers;
2772 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2773 
2774 		/*
2775 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2776 		 *
2777 		 * FW_UPTODATE is also set so the rest of the initialization
2778 		 * will not overwrite what was loaded here.  This gives you the
2779 		 * flexibility to load any firmware (and maybe shoot yourself in
2780 		 * the foot).
2781 		 */
2782 
2783 		ADAPTER_LOCK(sc);
2784 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2785 			ADAPTER_UNLOCK(sc);
2786 			return (EBUSY);
2787 		}
2788 
2789 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2790 		if (!fw_data)
2791 			error = ENOMEM;
2792 		else
2793 			error = copyin(t->buf, fw_data, t->len);
2794 
2795 		if (!error)
2796 			error = -t3_load_fw(sc, fw_data, t->len);
2797 
2798 		if (t3_get_fw_version(sc, &vers) == 0) {
2799 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2800 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2801 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2802 		}
2803 
2804 		if (!error)
2805 			sc->flags |= FW_UPTODATE;
2806 
2807 		free(fw_data, M_DEVBUF);
2808 		ADAPTER_UNLOCK(sc);
2809 		break;
2810 	}
2811 	case CHELSIO_LOAD_BOOT: {
2812 		uint8_t *boot_data;
2813 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2814 
2815 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2816 		if (!boot_data)
2817 			return ENOMEM;
2818 
2819 		error = copyin(t->buf, boot_data, t->len);
2820 		if (!error)
2821 			error = -t3_load_boot(sc, boot_data, t->len);
2822 
2823 		free(boot_data, M_DEVBUF);
2824 		break;
2825 	}
2826 	case CHELSIO_GET_PM: {
2827 		struct ch_pm *m = (struct ch_pm *)data;
2828 		struct tp_params *p = &sc->params.tp;
2829 
2830 		if (!is_offload(sc))
2831 			return (EOPNOTSUPP);
2832 
2833 		m->tx_pg_sz = p->tx_pg_size;
2834 		m->tx_num_pg = p->tx_num_pgs;
2835 		m->rx_pg_sz  = p->rx_pg_size;
2836 		m->rx_num_pg = p->rx_num_pgs;
2837 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2838 
2839 		break;
2840 	}
2841 	case CHELSIO_SET_PM: {
2842 		struct ch_pm *m = (struct ch_pm *)data;
2843 		struct tp_params *p = &sc->params.tp;
2844 
2845 		if (!is_offload(sc))
2846 			return (EOPNOTSUPP);
2847 		if (sc->flags & FULL_INIT_DONE)
2848 			return (EBUSY);
2849 
2850 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2851 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2852 			return (EINVAL);	/* not power of 2 */
2853 		if (!(m->rx_pg_sz & 0x14000))
2854 			return (EINVAL);	/* not 16KB or 64KB */
2855 		if (!(m->tx_pg_sz & 0x1554000))
2856 			return (EINVAL);
2857 		if (m->tx_num_pg == -1)
2858 			m->tx_num_pg = p->tx_num_pgs;
2859 		if (m->rx_num_pg == -1)
2860 			m->rx_num_pg = p->rx_num_pgs;
2861 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2862 			return (EINVAL);
2863 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2864 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2865 			return (EINVAL);
2866 
2867 		p->rx_pg_size = m->rx_pg_sz;
2868 		p->tx_pg_size = m->tx_pg_sz;
2869 		p->rx_num_pgs = m->rx_num_pg;
2870 		p->tx_num_pgs = m->tx_num_pg;
2871 		break;
2872 	}
2873 	case CHELSIO_SETMTUTAB: {
2874 		struct ch_mtus *m = (struct ch_mtus *)data;
2875 		int i;
2876 
2877 		if (!is_offload(sc))
2878 			return (EOPNOTSUPP);
2879 		if (offload_running(sc))
2880 			return (EBUSY);
2881 		if (m->nmtus != NMTUS)
2882 			return (EINVAL);
2883 		if (m->mtus[0] < 81)         /* accommodate SACK */
2884 			return (EINVAL);
2885 
2886 		/*
2887 		 * MTUs must be in ascending order
2888 		 */
2889 		for (i = 1; i < NMTUS; ++i)
2890 			if (m->mtus[i] < m->mtus[i - 1])
2891 				return (EINVAL);
2892 
2893 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2894 		break;
2895 	}
2896 	case CHELSIO_GETMTUTAB: {
2897 		struct ch_mtus *m = (struct ch_mtus *)data;
2898 
2899 		if (!is_offload(sc))
2900 			return (EOPNOTSUPP);
2901 
2902 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2903 		m->nmtus = NMTUS;
2904 		break;
2905 	}
2906 	case CHELSIO_GET_MEM: {
2907 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2908 		struct mc7 *mem;
2909 		uint8_t *useraddr;
2910 		u64 buf[32];
2911 
2912 		/*
2913 		 * Use these to avoid modifying len/addr in the the return
2914 		 * struct
2915 		 */
2916 		uint32_t len = t->len, addr = t->addr;
2917 
2918 		if (!is_offload(sc))
2919 			return (EOPNOTSUPP);
2920 		if (!(sc->flags & FULL_INIT_DONE))
2921 			return (EIO);         /* need the memory controllers */
2922 		if ((addr & 0x7) || (len & 0x7))
2923 			return (EINVAL);
2924 		if (t->mem_id == MEM_CM)
2925 			mem = &sc->cm;
2926 		else if (t->mem_id == MEM_PMRX)
2927 			mem = &sc->pmrx;
2928 		else if (t->mem_id == MEM_PMTX)
2929 			mem = &sc->pmtx;
2930 		else
2931 			return (EINVAL);
2932 
2933 		/*
2934 		 * Version scheme:
2935 		 * bits 0..9: chip version
2936 		 * bits 10..15: chip revision
2937 		 */
2938 		t->version = 3 | (sc->params.rev << 10);
2939 
2940 		/*
2941 		 * Read 256 bytes at a time as len can be large and we don't
2942 		 * want to use huge intermediate buffers.
2943 		 */
2944 		useraddr = (uint8_t *)t->buf;
2945 		while (len) {
2946 			unsigned int chunk = min(len, sizeof(buf));
2947 
2948 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2949 			if (error)
2950 				return (-error);
2951 			if (copyout(buf, useraddr, chunk))
2952 				return (EFAULT);
2953 			useraddr += chunk;
2954 			addr += chunk;
2955 			len -= chunk;
2956 		}
2957 		break;
2958 	}
2959 	case CHELSIO_READ_TCAM_WORD: {
2960 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2961 
2962 		if (!is_offload(sc))
2963 			return (EOPNOTSUPP);
2964 		if (!(sc->flags & FULL_INIT_DONE))
2965 			return (EIO);         /* need MC5 */
2966 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2967 		break;
2968 	}
2969 	case CHELSIO_SET_TRACE_FILTER: {
2970 		struct ch_trace *t = (struct ch_trace *)data;
2971 		const struct trace_params *tp;
2972 
2973 		tp = (const struct trace_params *)&t->sip;
2974 		if (t->config_tx)
2975 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2976 					       t->trace_tx);
2977 		if (t->config_rx)
2978 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2979 					       t->trace_rx);
2980 		break;
2981 	}
2982 	case CHELSIO_SET_PKTSCHED: {
2983 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2984 		if (sc->open_device_map == 0)
2985 			return (EAGAIN);
2986 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2987 		    p->binding);
2988 		break;
2989 	}
2990 	case CHELSIO_IFCONF_GETREGS: {
2991 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2992 		int reglen = cxgb_get_regs_len();
2993 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2994 		if (buf == NULL) {
2995 			return (ENOMEM);
2996 		}
2997 		if (regs->len > reglen)
2998 			regs->len = reglen;
2999 		else if (regs->len < reglen)
3000 			error = ENOBUFS;
3001 
3002 		if (!error) {
3003 			cxgb_get_regs(sc, regs, buf);
3004 			error = copyout(buf, regs->data, reglen);
3005 		}
3006 		free(buf, M_DEVBUF);
3007 
3008 		break;
3009 	}
3010 	case CHELSIO_SET_HW_SCHED: {
3011 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
3012 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
3013 
3014 		if ((sc->flags & FULL_INIT_DONE) == 0)
3015 			return (EAGAIN);       /* need TP to be initialized */
3016 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
3017 		    !in_range(t->channel, 0, 1) ||
3018 		    !in_range(t->kbps, 0, 10000000) ||
3019 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
3020 		    !in_range(t->flow_ipg, 0,
3021 			      dack_ticks_to_usec(sc, 0x7ff)))
3022 			return (EINVAL);
3023 
3024 		if (t->kbps >= 0) {
3025 			error = t3_config_sched(sc, t->kbps, t->sched);
3026 			if (error < 0)
3027 				return (-error);
3028 		}
3029 		if (t->class_ipg >= 0)
3030 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
3031 		if (t->flow_ipg >= 0) {
3032 			t->flow_ipg *= 1000;     /* us -> ns */
3033 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
3034 		}
3035 		if (t->mode >= 0) {
3036 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
3037 
3038 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3039 					 bit, t->mode ? bit : 0);
3040 		}
3041 		if (t->channel >= 0)
3042 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
3043 					 1 << t->sched, t->channel << t->sched);
3044 		break;
3045 	}
3046 	case CHELSIO_GET_EEPROM: {
3047 		int i;
3048 		struct ch_eeprom *e = (struct ch_eeprom *)data;
3049 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
3050 
3051 		if (buf == NULL) {
3052 			return (ENOMEM);
3053 		}
3054 		e->magic = EEPROM_MAGIC;
3055 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
3056 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
3057 
3058 		if (!error)
3059 			error = copyout(buf + e->offset, e->data, e->len);
3060 
3061 		free(buf, M_DEVBUF);
3062 		break;
3063 	}
3064 	case CHELSIO_CLEAR_STATS: {
3065 		if (!(sc->flags & FULL_INIT_DONE))
3066 			return EAGAIN;
3067 
3068 		PORT_LOCK(pi);
3069 		t3_mac_update_stats(&pi->mac);
3070 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3071 		PORT_UNLOCK(pi);
3072 		break;
3073 	}
3074 	case CHELSIO_GET_UP_LA: {
3075 		struct ch_up_la *la = (struct ch_up_la *)data;
3076 		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3077 		if (buf == NULL) {
3078 			return (ENOMEM);
3079 		}
3080 		if (la->bufsize < LA_BUFSIZE)
3081 			error = ENOBUFS;
3082 
3083 		if (!error)
3084 			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3085 					      &la->bufsize, buf);
3086 		if (!error)
3087 			error = copyout(buf, la->data, la->bufsize);
3088 
3089 		free(buf, M_DEVBUF);
3090 		break;
3091 	}
3092 	case CHELSIO_GET_UP_IOQS: {
3093 		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3094 		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3095 		uint32_t *v;
3096 
3097 		if (buf == NULL) {
3098 			return (ENOMEM);
3099 		}
3100 		if (ioqs->bufsize < IOQS_BUFSIZE)
3101 			error = ENOBUFS;
3102 
3103 		if (!error)
3104 			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3105 
3106 		if (!error) {
3107 			v = (uint32_t *)buf;
3108 
3109 			ioqs->bufsize -= 4 * sizeof(uint32_t);
3110 			ioqs->ioq_rx_enable = *v++;
3111 			ioqs->ioq_tx_enable = *v++;
3112 			ioqs->ioq_rx_status = *v++;
3113 			ioqs->ioq_tx_status = *v++;
3114 
3115 			error = copyout(v, ioqs->data, ioqs->bufsize);
3116 		}
3117 
3118 		free(buf, M_DEVBUF);
3119 		break;
3120 	}
3121 	default:
3122 		return (EOPNOTSUPP);
3123 		break;
3124 	}
3125 
3126 	return (error);
3127 }
3128 
3129 static __inline void
3130 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3131     unsigned int end)
3132 {
3133 	uint32_t *p = (uint32_t *)(buf + start);
3134 
3135 	for ( ; start <= end; start += sizeof(uint32_t))
3136 		*p++ = t3_read_reg(ap, start);
3137 }
3138 
3139 #define T3_REGMAP_SIZE (3 * 1024)
3140 static int
3141 cxgb_get_regs_len(void)
3142 {
3143 	return T3_REGMAP_SIZE;
3144 }
3145 
3146 static void
3147 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3148 {
3149 
3150 	/*
3151 	 * Version scheme:
3152 	 * bits 0..9: chip version
3153 	 * bits 10..15: chip revision
3154 	 * bit 31: set for PCIe cards
3155 	 */
3156 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3157 
3158 	/*
3159 	 * We skip the MAC statistics registers because they are clear-on-read.
3160 	 * Also reading multi-register stats would need to synchronize with the
3161 	 * periodic mac stats accumulation.  Hard to justify the complexity.
3162 	 */
3163 	memset(buf, 0, cxgb_get_regs_len());
3164 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3165 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3166 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3167 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3168 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3169 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3170 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3171 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3172 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3173 }
3174 
3175 
3176 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
3177