xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision a9148abd9da5db2f1c682fb17bed791845fc41c9)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2008, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 #ifdef IFNET_MULTIQUEUE
86 #include <machine/intr_machdep.h>
87 #endif
88 
89 static int cxgb_setup_msix(adapter_t *, int);
90 static void cxgb_teardown_msix(adapter_t *);
91 static void cxgb_init(void *);
92 static void cxgb_init_locked(struct port_info *);
93 static void cxgb_stop_locked(struct port_info *);
94 static void cxgb_set_rxmode(struct port_info *);
95 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96 static int cxgb_media_change(struct ifnet *);
97 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
98 static int setup_sge_qsets(adapter_t *);
99 static void cxgb_async_intr(void *);
100 static void cxgb_ext_intr_handler(void *, int);
101 static void cxgb_tick_handler(void *, int);
102 static void cxgb_down_locked(struct adapter *sc);
103 static void cxgb_tick(void *);
104 static void setup_rss(adapter_t *sc);
105 
106 /* Attachment glue for the PCI controller end of the device.  Each port of
107  * the device is attached separately, as defined later.
108  */
109 static int cxgb_controller_probe(device_t);
110 static int cxgb_controller_attach(device_t);
111 static int cxgb_controller_detach(device_t);
112 static void cxgb_free(struct adapter *);
113 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
114     unsigned int end);
115 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
116 static int cxgb_get_regs_len(void);
117 static int offload_open(struct port_info *pi);
118 static void touch_bars(device_t dev);
119 static int offload_close(struct t3cdev *tdev);
120 static void cxgb_link_start(struct port_info *p);
121 
122 static device_method_t cxgb_controller_methods[] = {
123 	DEVMETHOD(device_probe,		cxgb_controller_probe),
124 	DEVMETHOD(device_attach,	cxgb_controller_attach),
125 	DEVMETHOD(device_detach,	cxgb_controller_detach),
126 
127 	/* bus interface */
128 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
129 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
130 
131 	{ 0, 0 }
132 };
133 
134 static driver_t cxgb_controller_driver = {
135 	"cxgbc",
136 	cxgb_controller_methods,
137 	sizeof(struct adapter)
138 };
139 
140 static devclass_t	cxgb_controller_devclass;
141 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
142 
143 /*
144  * Attachment glue for the ports.  Attachment is done directly to the
145  * controller device.
146  */
147 static int cxgb_port_probe(device_t);
148 static int cxgb_port_attach(device_t);
149 static int cxgb_port_detach(device_t);
150 
151 static device_method_t cxgb_port_methods[] = {
152 	DEVMETHOD(device_probe,		cxgb_port_probe),
153 	DEVMETHOD(device_attach,	cxgb_port_attach),
154 	DEVMETHOD(device_detach,	cxgb_port_detach),
155 	{ 0, 0 }
156 };
157 
158 static driver_t cxgb_port_driver = {
159 	"cxgb",
160 	cxgb_port_methods,
161 	0
162 };
163 
164 static d_ioctl_t cxgb_extension_ioctl;
165 static d_open_t cxgb_extension_open;
166 static d_close_t cxgb_extension_close;
167 
168 static struct cdevsw cxgb_cdevsw = {
169        .d_version =    D_VERSION,
170        .d_flags =      0,
171        .d_open =       cxgb_extension_open,
172        .d_close =      cxgb_extension_close,
173        .d_ioctl =      cxgb_extension_ioctl,
174        .d_name =       "cxgb",
175 };
176 
177 static devclass_t	cxgb_port_devclass;
178 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
179 
180 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
181 
182 /*
183  * The driver uses the best interrupt scheme available on a platform in the
184  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
185  * of these schemes the driver may consider as follows:
186  *
187  * msi = 2: choose from among all three options
188  * msi = 1 : only consider MSI and pin interrupts
189  * msi = 0: force pin interrupts
190  */
191 static int msi_allowed = 2;
192 
193 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
194 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
195 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
196     "MSI-X, MSI, INTx selector");
197 
198 /*
199  * The driver enables offload as a default.
200  * To disable it, use ofld_disable = 1.
201  */
202 static int ofld_disable = 0;
203 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
204 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
205     "disable ULP offload");
206 
207 /*
208  * The driver uses an auto-queue algorithm by default.
209  * To disable it and force a single queue-set per port, use singleq = 1.
210  */
211 static int singleq = 0;
212 TUNABLE_INT("hw.cxgb.singleq", &singleq);
213 SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0,
214     "use a single queue-set per port");
215 
216 
217 /*
218  * The driver uses an auto-queue algorithm by default.
219  * To disable it and force a single queue-set per port, use singleq = 1.
220  */
221 static int force_fw_update = 0;
222 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
223 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
224     "update firmware even if up to date");
225 
226 int cxgb_use_16k_clusters = 1;
227 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
228 SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
229     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
230 
231 enum {
232 	MAX_TXQ_ENTRIES      = 16384,
233 	MAX_CTRL_TXQ_ENTRIES = 1024,
234 	MAX_RSPQ_ENTRIES     = 16384,
235 	MAX_RX_BUFFERS       = 16384,
236 	MAX_RX_JUMBO_BUFFERS = 16384,
237 	MIN_TXQ_ENTRIES      = 4,
238 	MIN_CTRL_TXQ_ENTRIES = 4,
239 	MIN_RSPQ_ENTRIES     = 32,
240 	MIN_FL_ENTRIES       = 32,
241 	MIN_FL_JUMBO_ENTRIES = 32
242 };
243 
244 struct filter_info {
245 	u32 sip;
246 	u32 sip_mask;
247 	u32 dip;
248 	u16 sport;
249 	u16 dport;
250 	u32 vlan:12;
251 	u32 vlan_prio:3;
252 	u32 mac_hit:1;
253 	u32 mac_idx:4;
254 	u32 mac_vld:1;
255 	u32 pkt_type:2;
256 	u32 report_filter_id:1;
257 	u32 pass:1;
258 	u32 rss:1;
259 	u32 qset:3;
260 	u32 locked:1;
261 	u32 valid:1;
262 };
263 
264 enum { FILTER_NO_VLAN_PRI = 7 };
265 
266 #define EEPROM_MAGIC 0x38E2F10C
267 
268 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
269 
270 /* Table for probing the cards.  The desc field isn't actually used */
271 struct cxgb_ident {
272 	uint16_t	vendor;
273 	uint16_t	device;
274 	int		index;
275 	char		*desc;
276 } cxgb_identifiers[] = {
277 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
278 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
279 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
280 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
281 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
282 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
283 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
286 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
287 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
288 	{0, 0, 0, NULL}
289 };
290 
291 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
292 
293 
294 static __inline char
295 t3rev2char(struct adapter *adapter)
296 {
297 	char rev = 'z';
298 
299 	switch(adapter->params.rev) {
300 	case T3_REV_A:
301 		rev = 'a';
302 		break;
303 	case T3_REV_B:
304 	case T3_REV_B2:
305 		rev = 'b';
306 		break;
307 	case T3_REV_C:
308 		rev = 'c';
309 		break;
310 	}
311 	return rev;
312 }
313 
314 static struct cxgb_ident *
315 cxgb_get_ident(device_t dev)
316 {
317 	struct cxgb_ident *id;
318 
319 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
320 		if ((id->vendor == pci_get_vendor(dev)) &&
321 		    (id->device == pci_get_device(dev))) {
322 			return (id);
323 		}
324 	}
325 	return (NULL);
326 }
327 
328 static const struct adapter_info *
329 cxgb_get_adapter_info(device_t dev)
330 {
331 	struct cxgb_ident *id;
332 	const struct adapter_info *ai;
333 
334 	id = cxgb_get_ident(dev);
335 	if (id == NULL)
336 		return (NULL);
337 
338 	ai = t3_get_adapter_info(id->index);
339 
340 	return (ai);
341 }
342 
343 static int
344 cxgb_controller_probe(device_t dev)
345 {
346 	const struct adapter_info *ai;
347 	char *ports, buf[80];
348 	int nports;
349 	struct adapter *sc = device_get_softc(dev);
350 
351 	ai = cxgb_get_adapter_info(dev);
352 	if (ai == NULL)
353 		return (ENXIO);
354 
355 	nports = ai->nports0 + ai->nports1;
356 	if (nports == 1)
357 		ports = "port";
358 	else
359 		ports = "ports";
360 
361 	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
362 	    ai->desc, is_offload(sc) ? "R" : "",
363 	    sc->params.rev, nports, ports);
364 	device_set_desc_copy(dev, buf);
365 	return (BUS_PROBE_DEFAULT);
366 }
367 
368 #define FW_FNAME "cxgb_t3fw"
369 #define TPEEPROM_NAME "t3b_tp_eeprom"
370 #define TPSRAM_NAME "t3b_protocol_sram"
371 
372 static int
373 upgrade_fw(adapter_t *sc)
374 {
375 #ifdef FIRMWARE_LATEST
376 	const struct firmware *fw;
377 #else
378 	struct firmware *fw;
379 #endif
380 	int status;
381 
382 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
383 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
384 		return (ENOENT);
385 	} else
386 		device_printf(sc->dev, "updating firmware on card\n");
387 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
388 
389 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
390 
391 	firmware_put(fw, FIRMWARE_UNLOAD);
392 
393 	return (status);
394 }
395 
396 static int
397 cxgb_controller_attach(device_t dev)
398 {
399 	device_t child;
400 	const struct adapter_info *ai;
401 	struct adapter *sc;
402 	int i, error = 0;
403 	uint32_t vers;
404 	int port_qsets = 1;
405 #ifdef MSI_SUPPORTED
406 	int msi_needed, reg;
407 #endif
408 	int must_load = 0;
409 	sc = device_get_softc(dev);
410 	sc->dev = dev;
411 	sc->msi_count = 0;
412 	ai = cxgb_get_adapter_info(dev);
413 
414 	/*
415 	 * XXX not really related but a recent addition
416 	 */
417 #ifdef MSI_SUPPORTED
418 	/* find the PCIe link width and set max read request to 4KB*/
419 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
420 		uint16_t lnk, pectl;
421 		lnk = pci_read_config(dev, reg + 0x12, 2);
422 		sc->link_width = (lnk >> 4) & 0x3f;
423 
424 		pectl = pci_read_config(dev, reg + 0x8, 2);
425 		pectl = (pectl & ~0x7000) | (5 << 12);
426 		pci_write_config(dev, reg + 0x8, pectl, 2);
427 	}
428 
429 	if (sc->link_width != 0 && sc->link_width <= 4 &&
430 	    (ai->nports0 + ai->nports1) <= 2) {
431 		device_printf(sc->dev,
432 		    "PCIe x%d Link, expect reduced performance\n",
433 		    sc->link_width);
434 	}
435 #endif
436 	touch_bars(dev);
437 	pci_enable_busmaster(dev);
438 	/*
439 	 * Allocate the registers and make them available to the driver.
440 	 * The registers that we care about for NIC mode are in BAR 0
441 	 */
442 	sc->regs_rid = PCIR_BAR(0);
443 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
444 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
445 		device_printf(dev, "Cannot allocate BAR region 0\n");
446 		return (ENXIO);
447 	}
448 	sc->udbs_rid = PCIR_BAR(2);
449 	if ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
450            &sc->udbs_rid, RF_ACTIVE)) == NULL) {
451 		device_printf(dev, "Cannot allocate BAR region 1\n");
452 		error = ENXIO;
453 		goto out;
454        }
455 
456 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
457 	    device_get_unit(dev));
458 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
459 
460 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
461 	    device_get_unit(dev));
462 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
463 	    device_get_unit(dev));
464 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
465 	    device_get_unit(dev));
466 
467 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
468 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
469 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
470 
471 	sc->bt = rman_get_bustag(sc->regs_res);
472 	sc->bh = rman_get_bushandle(sc->regs_res);
473 	sc->mmio_len = rman_get_size(sc->regs_res);
474 
475 	if (t3_prep_adapter(sc, ai, 1) < 0) {
476 		printf("prep adapter failed\n");
477 		error = ENODEV;
478 		goto out;
479 	}
480         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
481 	 * enough messages for the queue sets.  If that fails, try falling
482 	 * back to MSI.  If that fails, then try falling back to the legacy
483 	 * interrupt pin model.
484 	 */
485 #ifdef MSI_SUPPORTED
486 
487 	sc->msix_regs_rid = 0x20;
488 	if ((msi_allowed >= 2) &&
489 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
490 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
491 
492 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
493 
494 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
495 		    (sc->msi_count != msi_needed)) {
496 			device_printf(dev, "msix allocation failed - msi_count = %d"
497 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
498 			    msi_needed, error);
499 			sc->msi_count = 0;
500 			pci_release_msi(dev);
501 			bus_release_resource(dev, SYS_RES_MEMORY,
502 			    sc->msix_regs_rid, sc->msix_regs_res);
503 			sc->msix_regs_res = NULL;
504 		} else {
505 			sc->flags |= USING_MSIX;
506 			sc->cxgb_intr = t3_intr_msix;
507 		}
508 	}
509 
510 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
511 		sc->msi_count = 1;
512 		if (pci_alloc_msi(dev, &sc->msi_count)) {
513 			device_printf(dev, "alloc msi failed - will try INTx\n");
514 			sc->msi_count = 0;
515 			pci_release_msi(dev);
516 		} else {
517 			sc->flags |= USING_MSI;
518 			sc->irq_rid = 1;
519 			sc->cxgb_intr = t3_intr_msi;
520 		}
521 	}
522 #endif
523 	if (sc->msi_count == 0) {
524 		device_printf(dev, "using line interrupts\n");
525 		sc->irq_rid = 0;
526 		sc->cxgb_intr = t3b_intr;
527 	}
528 
529 	if ((sc->flags & USING_MSIX) && !singleq)
530 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
531 
532 	/* Create a private taskqueue thread for handling driver events */
533 #ifdef TASKQUEUE_CURRENT
534 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
535 	    taskqueue_thread_enqueue, &sc->tq);
536 #else
537 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
538 	    taskqueue_thread_enqueue, &sc->tq);
539 #endif
540 	if (sc->tq == NULL) {
541 		device_printf(dev, "failed to allocate controller task queue\n");
542 		goto out;
543 	}
544 
545 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
546 	    device_get_nameunit(dev));
547 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
548 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
549 
550 
551 	/* Create a periodic callout for checking adapter status */
552 	callout_init(&sc->cxgb_tick_ch, TRUE);
553 
554 	if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) {
555 		/*
556 		 * Warn user that a firmware update will be attempted in init.
557 		 */
558 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
559 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
560 		sc->flags &= ~FW_UPTODATE;
561 	} else {
562 		sc->flags |= FW_UPTODATE;
563 	}
564 
565 	if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) {
566 		/*
567 		 * Warn user that a firmware update will be attempted in init.
568 		 */
569 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
570 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
571 		sc->flags &= ~TPS_UPTODATE;
572 	} else {
573 		sc->flags |= TPS_UPTODATE;
574 	}
575 
576 	/*
577 	 * Create a child device for each MAC.  The ethernet attachment
578 	 * will be done in these children.
579 	 */
580 	for (i = 0; i < (sc)->params.nports; i++) {
581 		struct port_info *pi;
582 
583 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
584 			device_printf(dev, "failed to add child port\n");
585 			error = EINVAL;
586 			goto out;
587 		}
588 		pi = &sc->port[i];
589 		pi->adapter = sc;
590 		pi->nqsets = port_qsets;
591 		pi->first_qset = i*port_qsets;
592 		pi->port_id = i;
593 		pi->tx_chan = i >= ai->nports0;
594 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
595 		sc->rxpkt_map[pi->txpkt_intf] = i;
596 		sc->port[i].tx_chan = i >= ai->nports0;
597 		sc->portdev[i] = child;
598 		device_set_softc(child, pi);
599 	}
600 	if ((error = bus_generic_attach(dev)) != 0)
601 		goto out;
602 
603 	/* initialize sge private state */
604 	t3_sge_init_adapter(sc);
605 
606 	t3_led_ready(sc);
607 
608 	cxgb_offload_init();
609 	if (is_offload(sc)) {
610 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
611 		cxgb_adapter_ofld(sc);
612         }
613 	error = t3_get_fw_version(sc, &vers);
614 	if (error)
615 		goto out;
616 
617 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
618 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
619 	    G_FW_VERSION_MICRO(vers));
620 
621 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
622 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
623 	t3_add_attach_sysctls(sc);
624 out:
625 	if (error)
626 		cxgb_free(sc);
627 
628 	return (error);
629 }
630 
631 static int
632 cxgb_controller_detach(device_t dev)
633 {
634 	struct adapter *sc;
635 
636 	sc = device_get_softc(dev);
637 
638 	cxgb_free(sc);
639 
640 	return (0);
641 }
642 
643 static void
644 cxgb_free(struct adapter *sc)
645 {
646 	int i;
647 
648 	ADAPTER_LOCK(sc);
649 	sc->flags |= CXGB_SHUTDOWN;
650 	ADAPTER_UNLOCK(sc);
651 	cxgb_pcpu_shutdown_threads(sc);
652 	ADAPTER_LOCK(sc);
653 
654 /*
655  * drops the lock
656  */
657 	cxgb_down_locked(sc);
658 
659 #ifdef MSI_SUPPORTED
660 	if (sc->flags & (USING_MSI | USING_MSIX)) {
661 		device_printf(sc->dev, "releasing msi message(s)\n");
662 		pci_release_msi(sc->dev);
663 	} else {
664 		device_printf(sc->dev, "no msi message to release\n");
665 	}
666 #endif
667 	if (sc->msix_regs_res != NULL) {
668 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
669 		    sc->msix_regs_res);
670 	}
671 
672 	t3_sge_deinit_sw(sc);
673 	/*
674 	 * Wait for last callout
675 	 */
676 
677 	DELAY(hz*100);
678 
679 	for (i = 0; i < (sc)->params.nports; ++i) {
680 		if (sc->portdev[i] != NULL)
681 			device_delete_child(sc->dev, sc->portdev[i]);
682 	}
683 
684 	bus_generic_detach(sc->dev);
685 	if (sc->tq != NULL) {
686 		taskqueue_free(sc->tq);
687 		sc->tq = NULL;
688 	}
689 
690 	if (is_offload(sc)) {
691 		cxgb_adapter_unofld(sc);
692 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
693 			offload_close(&sc->tdev);
694 		else
695 			printf("cxgb_free: DEVMAP_BIT not set\n");
696 	} else
697 		printf("not offloading set\n");
698 #ifdef notyet
699 	if (sc->flags & CXGB_OFLD_INIT)
700 		cxgb_offload_deactivate(sc);
701 #endif
702 	free(sc->filters, M_DEVBUF);
703 	t3_sge_free(sc);
704 
705 	cxgb_offload_exit();
706 
707 	if (sc->udbs_res != NULL)
708 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
709 		    sc->udbs_res);
710 
711 	if (sc->regs_res != NULL)
712 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
713 		    sc->regs_res);
714 
715 	MTX_DESTROY(&sc->mdio_lock);
716 	MTX_DESTROY(&sc->sge.reg_lock);
717 	MTX_DESTROY(&sc->elmer_lock);
718 	ADAPTER_LOCK_DEINIT(sc);
719 }
720 
721 /**
722  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
723  *	@sc: the controller softc
724  *
725  *	Determines how many sets of SGE queues to use and initializes them.
726  *	We support multiple queue sets per port if we have MSI-X, otherwise
727  *	just one queue set per port.
728  */
729 static int
730 setup_sge_qsets(adapter_t *sc)
731 {
732 	int i, j, err, irq_idx = 0, qset_idx = 0;
733 	u_int ntxq = SGE_TXQ_PER_SET;
734 
735 	if ((err = t3_sge_alloc(sc)) != 0) {
736 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
737 		return (err);
738 	}
739 
740 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
741 		irq_idx = -1;
742 
743 	for (i = 0; i < (sc)->params.nports; i++) {
744 		struct port_info *pi = &sc->port[i];
745 
746 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
747 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
748 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
749 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
750 			if (err) {
751 				t3_free_sge_resources(sc);
752 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
753 				    err);
754 				return (err);
755 			}
756 		}
757 	}
758 
759 	return (0);
760 }
761 
762 static void
763 cxgb_teardown_msix(adapter_t *sc)
764 {
765 	int i, nqsets;
766 
767 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
768 		nqsets += sc->port[i].nqsets;
769 
770 	for (i = 0; i < nqsets; i++) {
771 		if (sc->msix_intr_tag[i] != NULL) {
772 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
773 			    sc->msix_intr_tag[i]);
774 			sc->msix_intr_tag[i] = NULL;
775 		}
776 		if (sc->msix_irq_res[i] != NULL) {
777 			bus_release_resource(sc->dev, SYS_RES_IRQ,
778 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
779 			sc->msix_irq_res[i] = NULL;
780 		}
781 	}
782 }
783 
784 static int
785 cxgb_setup_msix(adapter_t *sc, int msix_count)
786 {
787 	int i, j, k, nqsets, rid;
788 
789 	/* The first message indicates link changes and error conditions */
790 	sc->irq_rid = 1;
791 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
792 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
793 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
794 		return (EINVAL);
795 	}
796 
797 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
798 #ifdef INTR_FILTERS
799 		NULL,
800 #endif
801 		cxgb_async_intr, sc, &sc->intr_tag)) {
802 		device_printf(sc->dev, "Cannot set up interrupt\n");
803 		return (EINVAL);
804 	}
805 	for (i = k = 0; i < (sc)->params.nports; i++) {
806 		nqsets = sc->port[i].nqsets;
807 		for (j = 0; j < nqsets; j++, k++) {
808 			struct sge_qset *qs = &sc->sge.qs[k];
809 
810 			rid = k + 2;
811 			if (cxgb_debug)
812 				printf("rid=%d ", rid);
813 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
814 			    sc->dev, SYS_RES_IRQ, &rid,
815 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
816 				device_printf(sc->dev, "Cannot allocate "
817 				    "interrupt for message %d\n", rid);
818 				return (EINVAL);
819 			}
820 			sc->msix_irq_rid[k] = rid;
821 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
822 				INTR_MPSAFE|INTR_TYPE_NET,
823 #ifdef INTR_FILTERS
824 				NULL,
825 #endif
826 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
827 				device_printf(sc->dev, "Cannot set up "
828 				    "interrupt for message %d\n", rid);
829 				return (EINVAL);
830 			}
831 #ifdef IFNET_MULTIQUEUE
832 			if (singleq == 0) {
833 				int vector = rman_get_start(sc->msix_irq_res[k]);
834 				if (bootverbose)
835 					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
836 				intr_bind(vector, k % mp_ncpus);
837 			}
838 #endif
839 		}
840 	}
841 
842 	return (0);
843 }
844 
845 static int
846 cxgb_port_probe(device_t dev)
847 {
848 	struct port_info *p;
849 	char buf[80];
850 	const char *desc;
851 
852 	p = device_get_softc(dev);
853 	desc = p->phy.desc;
854 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
855 	device_set_desc_copy(dev, buf);
856 	return (0);
857 }
858 
859 
860 static int
861 cxgb_makedev(struct port_info *pi)
862 {
863 
864 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
865 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
866 
867 	if (pi->port_cdev == NULL)
868 		return (ENOMEM);
869 
870 	pi->port_cdev->si_drv1 = (void *)pi;
871 
872 	return (0);
873 }
874 
875 #ifndef LRO_SUPPORTED
876 #ifdef IFCAP_LRO
877 #undef IFCAP_LRO
878 #endif
879 #define IFCAP_LRO 0x0
880 #endif
881 
882 #ifdef TSO_SUPPORTED
883 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
884 /* Don't enable TSO6 yet */
885 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
886 #else
887 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
888 /* Don't enable TSO6 yet */
889 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
890 #define IFCAP_TSO4 0x0
891 #define IFCAP_TSO6 0x0
892 #define CSUM_TSO   0x0
893 #endif
894 
895 
896 static int
897 cxgb_port_attach(device_t dev)
898 {
899 	struct port_info *p;
900 	struct ifnet *ifp;
901 	int err, media_flags;
902 	struct adapter *sc;
903 
904 
905 	p = device_get_softc(dev);
906 	sc = p->adapter;
907 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
908 	    device_get_unit(device_get_parent(dev)), p->port_id);
909 	PORT_LOCK_INIT(p, p->lockbuf);
910 
911 	/* Allocate an ifnet object and set it up */
912 	ifp = p->ifp = if_alloc(IFT_ETHER);
913 	if (ifp == NULL) {
914 		device_printf(dev, "Cannot allocate ifnet\n");
915 		return (ENOMEM);
916 	}
917 
918 	/*
919 	 * Note that there is currently no watchdog timer.
920 	 */
921 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
922 	ifp->if_init = cxgb_init;
923 	ifp->if_softc = p;
924 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
925 	ifp->if_ioctl = cxgb_ioctl;
926 	ifp->if_start = cxgb_start;
927 
928 #if 0
929 #ifdef IFNET_MULTIQUEUE
930 	ifp->if_flags |= IFF_MULTIQ;
931 	ifp->if_mq_start = cxgb_pcpu_start;
932 #endif
933 #endif
934 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
935 	ifp->if_watchdog = NULL;
936 
937 	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
938 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
939 	IFQ_SET_READY(&ifp->if_snd);
940 
941 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
942 	ifp->if_capabilities |= CXGB_CAP;
943 	ifp->if_capenable |= CXGB_CAP_ENABLE;
944 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
945 	/*
946 	 * disable TSO on 4-port - it isn't supported by the firmware yet
947 	 */
948 	if (p->adapter->params.nports > 2) {
949 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
950 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
951 		ifp->if_hwassist &= ~CSUM_TSO;
952 	}
953 
954 	ether_ifattach(ifp, p->hw_addr);
955 	/*
956 	 * Only default to jumbo frames on 10GigE
957 	 */
958 	if (p->adapter->params.nports <= 2)
959 		ifp->if_mtu = ETHERMTU_JUMBO;
960 	if ((err = cxgb_makedev(p)) != 0) {
961 		printf("makedev failed %d\n", err);
962 		return (err);
963 	}
964 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
965 	    cxgb_media_status);
966 
967 	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
968 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
969 	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
970 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
971 	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
972 		media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX;
973 	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
974 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
975 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
976 			    0, NULL);
977 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
978 			    0, NULL);
979 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
980 			    0, NULL);
981 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
982 			    0, NULL);
983 		media_flags = 0;
984 	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
985 		/*
986 		 * XXX: This is not very accurate.  Fix when common code
987 		 * returns more specific value - eg 1000BASE-SX, LX, etc.
988 		 */
989 		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
990 	} else {
991 	        printf("unsupported media type %s\n", p->phy.desc);
992 		return (ENXIO);
993 	}
994 	if (media_flags) {
995 		ifmedia_add(&p->media, media_flags, 0, NULL);
996 		ifmedia_set(&p->media, media_flags);
997 	} else {
998 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
999 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1000 	}
1001 
1002 	/* Get the latest mac address, User can use a LAA */
1003 	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1004 	t3_sge_init_port(p);
1005 #if defined(LINK_ATTACH)
1006 	cxgb_link_start(p);
1007 	t3_link_changed(sc, p->port_id);
1008 #endif
1009 	return (0);
1010 }
1011 
1012 static int
1013 cxgb_port_detach(device_t dev)
1014 {
1015 	struct port_info *p;
1016 
1017 	p = device_get_softc(dev);
1018 
1019 	PORT_LOCK(p);
1020 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1021 		cxgb_stop_locked(p);
1022 	PORT_UNLOCK(p);
1023 
1024 	ether_ifdetach(p->ifp);
1025 	printf("waiting for callout to stop ...");
1026 	DELAY(1000000);
1027 	printf("done\n");
1028 	/*
1029 	 * the lock may be acquired in ifdetach
1030 	 */
1031 	PORT_LOCK_DEINIT(p);
1032 	if_free(p->ifp);
1033 
1034 	if (p->port_cdev != NULL)
1035 		destroy_dev(p->port_cdev);
1036 
1037 	return (0);
1038 }
1039 
1040 void
1041 t3_fatal_err(struct adapter *sc)
1042 {
1043 	u_int fw_status[4];
1044 
1045 	if (sc->flags & FULL_INIT_DONE) {
1046 		t3_sge_stop(sc);
1047 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1048 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1049 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1050 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1051 		t3_intr_disable(sc);
1052 	}
1053 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1054 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1055 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1056 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1057 }
1058 
1059 int
1060 t3_os_find_pci_capability(adapter_t *sc, int cap)
1061 {
1062 	device_t dev;
1063 	struct pci_devinfo *dinfo;
1064 	pcicfgregs *cfg;
1065 	uint32_t status;
1066 	uint8_t ptr;
1067 
1068 	dev = sc->dev;
1069 	dinfo = device_get_ivars(dev);
1070 	cfg = &dinfo->cfg;
1071 
1072 	status = pci_read_config(dev, PCIR_STATUS, 2);
1073 	if (!(status & PCIM_STATUS_CAPPRESENT))
1074 		return (0);
1075 
1076 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1077 	case 0:
1078 	case 1:
1079 		ptr = PCIR_CAP_PTR;
1080 		break;
1081 	case 2:
1082 		ptr = PCIR_CAP_PTR_2;
1083 		break;
1084 	default:
1085 		return (0);
1086 		break;
1087 	}
1088 	ptr = pci_read_config(dev, ptr, 1);
1089 
1090 	while (ptr != 0) {
1091 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1092 			return (ptr);
1093 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1094 	}
1095 
1096 	return (0);
1097 }
1098 
1099 int
1100 t3_os_pci_save_state(struct adapter *sc)
1101 {
1102 	device_t dev;
1103 	struct pci_devinfo *dinfo;
1104 
1105 	dev = sc->dev;
1106 	dinfo = device_get_ivars(dev);
1107 
1108 	pci_cfg_save(dev, dinfo, 0);
1109 	return (0);
1110 }
1111 
1112 int
1113 t3_os_pci_restore_state(struct adapter *sc)
1114 {
1115 	device_t dev;
1116 	struct pci_devinfo *dinfo;
1117 
1118 	dev = sc->dev;
1119 	dinfo = device_get_ivars(dev);
1120 
1121 	pci_cfg_restore(dev, dinfo);
1122 	return (0);
1123 }
1124 
1125 /**
1126  *	t3_os_link_changed - handle link status changes
1127  *	@adapter: the adapter associated with the link change
1128  *	@port_id: the port index whose limk status has changed
1129  *	@link_status: the new status of the link
1130  *	@speed: the new speed setting
1131  *	@duplex: the new duplex setting
1132  *	@fc: the new flow-control setting
1133  *
1134  *	This is the OS-dependent handler for link status changes.  The OS
1135  *	neutral handler takes care of most of the processing for these events,
1136  *	then calls this handler for any OS-specific processing.
1137  */
1138 void
1139 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1140      int duplex, int fc)
1141 {
1142 	struct port_info *pi = &adapter->port[port_id];
1143 	struct cmac *mac = &adapter->port[port_id].mac;
1144 
1145 	if (link_status) {
1146 		DELAY(10);
1147 		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1148 			/* Clear errors created by MAC enable */
1149 			t3_set_reg_field(adapter,
1150 					 A_XGM_STAT_CTRL + pi->mac.offset,
1151 					 F_CLRSTATS, 1);
1152 		if_link_state_change(pi->ifp, LINK_STATE_UP);
1153 
1154 	} else {
1155 		pi->phy.ops->power_down(&pi->phy, 1);
1156 		t3_mac_disable(mac, MAC_DIRECTION_RX);
1157 		t3_link_start(&pi->phy, mac, &pi->link_config);
1158 		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1159 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1160 	}
1161 }
1162 
1163 /**
1164  *	t3_os_phymod_changed - handle PHY module changes
1165  *	@phy: the PHY reporting the module change
1166  *	@mod_type: new module type
1167  *
1168  *	This is the OS-dependent handler for PHY module changes.  It is
1169  *	invoked when a PHY module is removed or inserted for any OS-specific
1170  *	processing.
1171  */
1172 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1173 {
1174 	static const char *mod_str[] = {
1175 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1176 	};
1177 
1178 	struct port_info *pi = &adap->port[port_id];
1179 
1180 	if (pi->phy.modtype == phy_modtype_none)
1181 		device_printf(adap->dev, "PHY module unplugged\n");
1182 	else {
1183 		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1184 		    ("invalid PHY module type %d", pi->phy.modtype));
1185 		device_printf(adap->dev, "%s PHY module inserted\n",
1186 		    mod_str[pi->phy.modtype]);
1187 	}
1188 }
1189 
1190 /*
1191  * Interrupt-context handler for external (PHY) interrupts.
1192  */
1193 void
1194 t3_os_ext_intr_handler(adapter_t *sc)
1195 {
1196 	if (cxgb_debug)
1197 		printf("t3_os_ext_intr_handler\n");
1198 	/*
1199 	 * Schedule a task to handle external interrupts as they may be slow
1200 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1201 	 * interrupts in the meantime and let the task reenable them when
1202 	 * it's done.
1203 	 */
1204 	ADAPTER_LOCK(sc);
1205 	if (sc->slow_intr_mask) {
1206 		sc->slow_intr_mask &= ~F_T3DBG;
1207 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1208 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1209 	}
1210 	ADAPTER_UNLOCK(sc);
1211 }
1212 
1213 void
1214 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1215 {
1216 
1217 	/*
1218 	 * The ifnet might not be allocated before this gets called,
1219 	 * as this is called early on in attach by t3_prep_adapter
1220 	 * save the address off in the port structure
1221 	 */
1222 	if (cxgb_debug)
1223 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1224 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1225 }
1226 
1227 /**
1228  *	link_start - enable a port
1229  *	@p: the port to enable
1230  *
1231  *	Performs the MAC and PHY actions needed to enable a port.
1232  */
1233 static void
1234 cxgb_link_start(struct port_info *p)
1235 {
1236 	struct ifnet *ifp;
1237 	struct t3_rx_mode rm;
1238 	struct cmac *mac = &p->mac;
1239 	int mtu, hwtagging;
1240 
1241 	ifp = p->ifp;
1242 
1243 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1244 
1245 	mtu = ifp->if_mtu;
1246 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1247 		mtu += ETHER_VLAN_ENCAP_LEN;
1248 
1249 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1250 
1251 	t3_init_rx_mode(&rm, p);
1252 	if (!mac->multiport)
1253 		t3_mac_reset(mac);
1254 	t3_mac_set_mtu(mac, mtu);
1255 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1256 	t3_mac_set_address(mac, 0, p->hw_addr);
1257 	t3_mac_set_rx_mode(mac, &rm);
1258 	t3_link_start(&p->phy, mac, &p->link_config);
1259 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1260 }
1261 
1262 
1263 static int
1264 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1265 			      unsigned long n)
1266 {
1267 	int attempts = 5;
1268 
1269 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1270 		if (!--attempts)
1271 			return (ETIMEDOUT);
1272 		t3_os_sleep(10);
1273 	}
1274 	return 0;
1275 }
1276 
1277 static int
1278 init_tp_parity(struct adapter *adap)
1279 {
1280 	int i;
1281 	struct mbuf *m;
1282 	struct cpl_set_tcb_field *greq;
1283 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1284 
1285 	t3_tp_set_offload_mode(adap, 1);
1286 
1287 	for (i = 0; i < 16; i++) {
1288 		struct cpl_smt_write_req *req;
1289 
1290 		m = m_gethdr(M_WAITOK, MT_DATA);
1291 		req = mtod(m, struct cpl_smt_write_req *);
1292 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1293 		memset(req, 0, sizeof(*req));
1294 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1295 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1296 		req->iff = i;
1297 		t3_mgmt_tx(adap, m);
1298 	}
1299 
1300 	for (i = 0; i < 2048; i++) {
1301 		struct cpl_l2t_write_req *req;
1302 
1303 		m = m_gethdr(M_WAITOK, MT_DATA);
1304 		req = mtod(m, struct cpl_l2t_write_req *);
1305 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1306 		memset(req, 0, sizeof(*req));
1307 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1308 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1309 		req->params = htonl(V_L2T_W_IDX(i));
1310 		t3_mgmt_tx(adap, m);
1311 	}
1312 
1313 	for (i = 0; i < 2048; i++) {
1314 		struct cpl_rte_write_req *req;
1315 
1316 		m = m_gethdr(M_WAITOK, MT_DATA);
1317 		req = mtod(m, struct cpl_rte_write_req *);
1318 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1319 		memset(req, 0, sizeof(*req));
1320 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1321 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1322 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1323 		t3_mgmt_tx(adap, m);
1324 	}
1325 
1326 	m = m_gethdr(M_WAITOK, MT_DATA);
1327 	greq = mtod(m, struct cpl_set_tcb_field *);
1328 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1329 	memset(greq, 0, sizeof(*greq));
1330 	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1331 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1332 	greq->mask = htobe64(1);
1333 	t3_mgmt_tx(adap, m);
1334 
1335 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1336 	t3_tp_set_offload_mode(adap, 0);
1337 	return (i);
1338 }
1339 
1340 /**
1341  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1342  *	@adap: the adapter
1343  *
1344  *	Sets up RSS to distribute packets to multiple receive queues.  We
1345  *	configure the RSS CPU lookup table to distribute to the number of HW
1346  *	receive queues, and the response queue lookup table to narrow that
1347  *	down to the response queues actually configured for each port.
1348  *	We always configure the RSS mapping for two ports since the mapping
1349  *	table has plenty of entries.
1350  */
1351 static void
1352 setup_rss(adapter_t *adap)
1353 {
1354 	int i;
1355 	u_int nq[2];
1356 	uint8_t cpus[SGE_QSETS + 1];
1357 	uint16_t rspq_map[RSS_TABLE_SIZE];
1358 
1359 	for (i = 0; i < SGE_QSETS; ++i)
1360 		cpus[i] = i;
1361 	cpus[SGE_QSETS] = 0xff;
1362 
1363 	nq[0] = nq[1] = 0;
1364 	for_each_port(adap, i) {
1365 		const struct port_info *pi = adap2pinfo(adap, i);
1366 
1367 		nq[pi->tx_chan] += pi->nqsets;
1368 	}
1369 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1370 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1371 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1372 	}
1373 	/* Calculate the reverse RSS map table */
1374 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1375 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1376 			adap->rrss_map[rspq_map[i]] = i;
1377 
1378 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1379 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1380 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1381 	              cpus, rspq_map);
1382 
1383 }
1384 
1385 /*
1386  * Sends an mbuf to an offload queue driver
1387  * after dealing with any active network taps.
1388  */
1389 static inline int
1390 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1391 {
1392 	int ret;
1393 
1394 	ret = t3_offload_tx(tdev, m);
1395 	return (ret);
1396 }
1397 
1398 static int
1399 write_smt_entry(struct adapter *adapter, int idx)
1400 {
1401 	struct port_info *pi = &adapter->port[idx];
1402 	struct cpl_smt_write_req *req;
1403 	struct mbuf *m;
1404 
1405 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1406 		return (ENOMEM);
1407 
1408 	req = mtod(m, struct cpl_smt_write_req *);
1409 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1410 
1411 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1412 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1413 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1414 	req->iff = idx;
1415 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1416 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1417 
1418 	m_set_priority(m, 1);
1419 
1420 	offload_tx(&adapter->tdev, m);
1421 
1422 	return (0);
1423 }
1424 
1425 static int
1426 init_smt(struct adapter *adapter)
1427 {
1428 	int i;
1429 
1430 	for_each_port(adapter, i)
1431 		write_smt_entry(adapter, i);
1432 	return 0;
1433 }
1434 
1435 static void
1436 init_port_mtus(adapter_t *adapter)
1437 {
1438 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1439 
1440 	if (adapter->port[1].ifp)
1441 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1442 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1443 }
1444 
1445 static void
1446 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1447 			      int hi, int port)
1448 {
1449 	struct mbuf *m;
1450 	struct mngt_pktsched_wr *req;
1451 
1452 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1453 	if (m) {
1454 		req = mtod(m, struct mngt_pktsched_wr *);
1455 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1456 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1457 		req->sched = sched;
1458 		req->idx = qidx;
1459 		req->min = lo;
1460 		req->max = hi;
1461 		req->binding = port;
1462 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1463 		t3_mgmt_tx(adap, m);
1464 	}
1465 }
1466 
1467 static void
1468 bind_qsets(adapter_t *sc)
1469 {
1470 	int i, j;
1471 
1472 	cxgb_pcpu_startup_threads(sc);
1473 	for (i = 0; i < (sc)->params.nports; ++i) {
1474 		const struct port_info *pi = adap2pinfo(sc, i);
1475 
1476 		for (j = 0; j < pi->nqsets; ++j) {
1477 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1478 					  -1, pi->tx_chan);
1479 
1480 		}
1481 	}
1482 }
1483 
1484 static void
1485 update_tpeeprom(struct adapter *adap)
1486 {
1487 #ifdef FIRMWARE_LATEST
1488 	const struct firmware *tpeeprom;
1489 #else
1490 	struct firmware *tpeeprom;
1491 #endif
1492 
1493 	uint32_t version;
1494 	unsigned int major, minor;
1495 	int ret, len;
1496 	char rev;
1497 
1498 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1499 
1500 	major = G_TP_VERSION_MAJOR(version);
1501 	minor = G_TP_VERSION_MINOR(version);
1502 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1503 		return;
1504 
1505 	rev = t3rev2char(adap);
1506 
1507 	tpeeprom = firmware_get(TPEEPROM_NAME);
1508 	if (tpeeprom == NULL) {
1509 		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1510 		    TPEEPROM_NAME);
1511 		return;
1512 	}
1513 
1514 	len = tpeeprom->datasize - 4;
1515 
1516 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1517 	if (ret)
1518 		goto release_tpeeprom;
1519 
1520 	if (len != TP_SRAM_LEN) {
1521 		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN);
1522 		return;
1523 	}
1524 
1525 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1526 	    TP_SRAM_OFFSET);
1527 
1528 	if (!ret) {
1529 		device_printf(adap->dev,
1530 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1531 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1532 	} else
1533 		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1534 
1535 release_tpeeprom:
1536 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1537 
1538 	return;
1539 }
1540 
1541 static int
1542 update_tpsram(struct adapter *adap)
1543 {
1544 #ifdef FIRMWARE_LATEST
1545 	const struct firmware *tpsram;
1546 #else
1547 	struct firmware *tpsram;
1548 #endif
1549 	int ret;
1550 	char rev;
1551 
1552 	rev = t3rev2char(adap);
1553 	if (!rev)
1554 		return 0;
1555 
1556 	update_tpeeprom(adap);
1557 
1558 	tpsram = firmware_get(TPSRAM_NAME);
1559 	if (tpsram == NULL){
1560 		device_printf(adap->dev, "could not load TP SRAM\n");
1561 		return (EINVAL);
1562 	} else
1563 		device_printf(adap->dev, "updating TP SRAM\n");
1564 
1565 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1566 	if (ret)
1567 		goto release_tpsram;
1568 
1569 	ret = t3_set_proto_sram(adap, tpsram->data);
1570 	if (ret)
1571 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1572 
1573 release_tpsram:
1574 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1575 
1576 	return ret;
1577 }
1578 
1579 /**
1580  *	cxgb_up - enable the adapter
1581  *	@adap: adapter being enabled
1582  *
1583  *	Called when the first port is enabled, this function performs the
1584  *	actions necessary to make an adapter operational, such as completing
1585  *	the initialization of HW modules, and enabling interrupts.
1586  *
1587  */
1588 static int
1589 cxgb_up(struct adapter *sc)
1590 {
1591 	int err = 0;
1592 
1593 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1594 
1595 		if ((sc->flags & FW_UPTODATE) == 0)
1596 			if ((err = upgrade_fw(sc)))
1597 				goto out;
1598 		if ((sc->flags & TPS_UPTODATE) == 0)
1599 			if ((err = update_tpsram(sc)))
1600 				goto out;
1601 		err = t3_init_hw(sc, 0);
1602 		if (err)
1603 			goto out;
1604 
1605 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1606 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1607 
1608 		err = setup_sge_qsets(sc);
1609 		if (err)
1610 			goto out;
1611 
1612 		setup_rss(sc);
1613 		t3_add_configured_sysctls(sc);
1614 		sc->flags |= FULL_INIT_DONE;
1615 	}
1616 
1617 	t3_intr_clear(sc);
1618 
1619 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1620 	if ((sc->flags & USING_MSIX) == 0) {
1621 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1622 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1623 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1624 			    sc->irq_rid);
1625 			err = EINVAL;
1626 			goto out;
1627 		}
1628 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1629 
1630 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1631 #ifdef INTR_FILTERS
1632 			NULL,
1633 #endif
1634 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1635 			device_printf(sc->dev, "Cannot set up interrupt\n");
1636 			err = EINVAL;
1637 			goto irq_err;
1638 		}
1639 	} else {
1640 		cxgb_setup_msix(sc, sc->msi_count);
1641 	}
1642 
1643 	t3_sge_start(sc);
1644 	t3_intr_enable(sc);
1645 
1646 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1647 	    is_offload(sc) && init_tp_parity(sc) == 0)
1648 		sc->flags |= TP_PARITY_INIT;
1649 
1650 	if (sc->flags & TP_PARITY_INIT) {
1651 		t3_write_reg(sc, A_TP_INT_CAUSE,
1652 				F_CMCACHEPERR | F_ARPLUTPERR);
1653 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1654 	}
1655 
1656 
1657 	if (!(sc->flags & QUEUES_BOUND)) {
1658 		bind_qsets(sc);
1659 		sc->flags |= QUEUES_BOUND;
1660 	}
1661 out:
1662 	return (err);
1663 irq_err:
1664 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1665 	goto out;
1666 }
1667 
1668 
1669 /*
1670  * Release resources when all the ports and offloading have been stopped.
1671  */
1672 static void
1673 cxgb_down_locked(struct adapter *sc)
1674 {
1675 
1676 	t3_sge_stop(sc);
1677 	t3_intr_disable(sc);
1678 
1679 	if (sc->intr_tag != NULL) {
1680 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1681 		sc->intr_tag = NULL;
1682 	}
1683 	if (sc->irq_res != NULL) {
1684 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1685 		    sc->irq_rid, sc->irq_res);
1686 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1687 		    sc->irq_res);
1688 		sc->irq_res = NULL;
1689 	}
1690 
1691 	if (sc->flags & USING_MSIX)
1692 		cxgb_teardown_msix(sc);
1693 
1694 	callout_stop(&sc->cxgb_tick_ch);
1695 	callout_stop(&sc->sge_timer_ch);
1696 	callout_drain(&sc->cxgb_tick_ch);
1697 	callout_drain(&sc->sge_timer_ch);
1698 
1699 	if (sc->tq != NULL) {
1700 		printf("draining slow intr\n");
1701 
1702 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1703 			printf("draining ext intr\n");
1704 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1705 		printf("draining tick task\n");
1706 		taskqueue_drain(sc->tq, &sc->tick_task);
1707 	}
1708 	ADAPTER_UNLOCK(sc);
1709 }
1710 
1711 static int
1712 offload_open(struct port_info *pi)
1713 {
1714 	struct adapter *adapter = pi->adapter;
1715 	struct t3cdev *tdev = &adapter->tdev;
1716 
1717 	int adap_up = adapter->open_device_map & PORT_MASK;
1718 	int err = 0;
1719 
1720 	if (atomic_cmpset_int(&adapter->open_device_map,
1721 		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1722 		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1723 		return (0);
1724 
1725 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1726 		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
1727 		    adapter->open_device_map);
1728 	ADAPTER_LOCK(pi->adapter);
1729 	if (!adap_up)
1730 		err = cxgb_up(adapter);
1731 	ADAPTER_UNLOCK(pi->adapter);
1732 	if (err)
1733 		return (err);
1734 
1735 	t3_tp_set_offload_mode(adapter, 1);
1736 	tdev->lldev = pi->ifp;
1737 
1738 	init_port_mtus(adapter);
1739 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1740 		     adapter->params.b_wnd,
1741 		     adapter->params.rev == 0 ?
1742 		       adapter->port[0].ifp->if_mtu : 0xffff);
1743 	init_smt(adapter);
1744 	/* Call back all registered clients */
1745 	cxgb_add_clients(tdev);
1746 
1747 	/* restore them in case the offload module has changed them */
1748 	if (err) {
1749 		t3_tp_set_offload_mode(adapter, 0);
1750 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1751 		cxgb_set_dummy_ops(tdev);
1752 	}
1753 	return (err);
1754 }
1755 
1756 static int
1757 offload_close(struct t3cdev *tdev)
1758 {
1759 	struct adapter *adapter = tdev2adap(tdev);
1760 
1761 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1762 		return (0);
1763 
1764 	/* Call back all registered clients */
1765 	cxgb_remove_clients(tdev);
1766 
1767 	tdev->lldev = NULL;
1768 	cxgb_set_dummy_ops(tdev);
1769 	t3_tp_set_offload_mode(adapter, 0);
1770 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1771 
1772 	ADAPTER_LOCK(adapter);
1773 	if (!adapter->open_device_map)
1774 		cxgb_down_locked(adapter);
1775 	else
1776 		ADAPTER_UNLOCK(adapter);
1777 	return (0);
1778 }
1779 
1780 
1781 static void
1782 cxgb_init(void *arg)
1783 {
1784 	struct port_info *p = arg;
1785 
1786 	PORT_LOCK(p);
1787 	cxgb_init_locked(p);
1788 	PORT_UNLOCK(p);
1789 }
1790 
1791 static void
1792 cxgb_init_locked(struct port_info *p)
1793 {
1794 	struct ifnet *ifp;
1795 	adapter_t *sc = p->adapter;
1796 	int err;
1797 
1798 	PORT_LOCK_ASSERT_OWNED(p);
1799 	ifp = p->ifp;
1800 
1801 	ADAPTER_LOCK(p->adapter);
1802 	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1803 		ADAPTER_UNLOCK(p->adapter);
1804 		cxgb_stop_locked(p);
1805 		return;
1806 	}
1807 	if (p->adapter->open_device_map == 0) {
1808 		t3_intr_clear(sc);
1809 	}
1810 	setbit(&p->adapter->open_device_map, p->port_id);
1811 	ADAPTER_UNLOCK(p->adapter);
1812 
1813 	if (is_offload(sc) && !ofld_disable) {
1814 		err = offload_open(p);
1815 		if (err)
1816 			log(LOG_WARNING,
1817 			    "Could not initialize offload capabilities\n");
1818 	}
1819 #if !defined(LINK_ATTACH)
1820 	cxgb_link_start(p);
1821 	t3_link_changed(sc, p->port_id);
1822 #endif
1823 	ifp->if_baudrate = p->link_config.speed * 1000000;
1824 
1825 	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1826 	t3_port_intr_enable(sc, p->port_id);
1827 
1828 	t3_sge_reset_adapter(sc);
1829 
1830 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1831 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1832 }
1833 
1834 static void
1835 cxgb_set_rxmode(struct port_info *p)
1836 {
1837 	struct t3_rx_mode rm;
1838 	struct cmac *mac = &p->mac;
1839 
1840 	t3_init_rx_mode(&rm, p);
1841 	mtx_lock(&p->adapter->mdio_lock);
1842 	t3_mac_set_rx_mode(mac, &rm);
1843 	mtx_unlock(&p->adapter->mdio_lock);
1844 }
1845 
1846 static void
1847 cxgb_stop_locked(struct port_info *pi)
1848 {
1849 	struct ifnet *ifp;
1850 
1851 	PORT_LOCK_ASSERT_OWNED(pi);
1852 	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1853 
1854 	ifp = pi->ifp;
1855 	t3_port_intr_disable(pi->adapter, pi->port_id);
1856 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1857 
1858 	/* disable pause frames */
1859 	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1860 			 F_TXPAUSEEN, 0);
1861 
1862 	/* Reset RX FIFO HWM */
1863         t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1864 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1865 
1866 
1867 	ADAPTER_LOCK(pi->adapter);
1868 	clrbit(&pi->adapter->open_device_map, pi->port_id);
1869 
1870 	if (pi->adapter->open_device_map == 0) {
1871 		cxgb_down_locked(pi->adapter);
1872 	} else
1873 		ADAPTER_UNLOCK(pi->adapter);
1874 
1875 #if !defined(LINK_ATTACH)
1876 	DELAY(100);
1877 
1878 	/* Wait for TXFIFO empty */
1879 	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1880 			F_TXFIFO_EMPTY, 1, 20, 5);
1881 
1882 	DELAY(100);
1883 	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1884 
1885 	pi->phy.ops->power_down(&pi->phy, 1);
1886 #endif
1887 
1888 }
1889 
1890 static int
1891 cxgb_set_mtu(struct port_info *p, int mtu)
1892 {
1893 	struct ifnet *ifp = p->ifp;
1894 	int error = 0;
1895 
1896 	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1897 		error = EINVAL;
1898 	else if (ifp->if_mtu != mtu) {
1899 		PORT_LOCK(p);
1900 		ifp->if_mtu = mtu;
1901 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1902 			cxgb_stop_locked(p);
1903 			cxgb_init_locked(p);
1904 		}
1905 		PORT_UNLOCK(p);
1906 	}
1907 	return (error);
1908 }
1909 
1910 #ifdef LRO_SUPPORTED
1911 /*
1912  * Mark lro enabled or disabled in all qsets for this port
1913  */
1914 static int
1915 cxgb_set_lro(struct port_info *p, int enabled)
1916 {
1917 	int i;
1918 	struct adapter *adp = p->adapter;
1919 	struct sge_qset *q;
1920 
1921 	PORT_LOCK_ASSERT_OWNED(p);
1922 	for (i = 0; i < p->nqsets; i++) {
1923 		q = &adp->sge.qs[p->first_qset + i];
1924 		q->lro.enabled = (enabled != 0);
1925 	}
1926 	return (0);
1927 }
1928 #endif
1929 
1930 static int
1931 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1932 {
1933 	struct port_info *p = ifp->if_softc;
1934 	struct ifaddr *ifa = (struct ifaddr *)data;
1935 	struct ifreq *ifr = (struct ifreq *)data;
1936 	int flags, error = 0, reinit = 0;
1937 	uint32_t mask;
1938 
1939 	/*
1940 	 * XXX need to check that we aren't in the middle of an unload
1941 	 */
1942 	switch (command) {
1943 	case SIOCSIFMTU:
1944 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1945 		break;
1946 	case SIOCSIFADDR:
1947 		if (ifa->ifa_addr->sa_family == AF_INET) {
1948 			ifp->if_flags |= IFF_UP;
1949 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1950 				PORT_LOCK(p);
1951 				cxgb_init_locked(p);
1952 				PORT_UNLOCK(p);
1953 			}
1954 			arp_ifinit(ifp, ifa);
1955 		} else
1956 			error = ether_ioctl(ifp, command, data);
1957 		break;
1958 	case SIOCSIFFLAGS:
1959 		PORT_LOCK(p);
1960 		if (ifp->if_flags & IFF_UP) {
1961 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1962 				flags = p->if_flags;
1963 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1964 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1965 					cxgb_set_rxmode(p);
1966 			} else
1967 				cxgb_init_locked(p);
1968 			p->if_flags = ifp->if_flags;
1969 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1970 			cxgb_stop_locked(p);
1971 
1972 		PORT_UNLOCK(p);
1973 		break;
1974 	case SIOCADDMULTI:
1975 	case SIOCDELMULTI:
1976 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1977 			cxgb_set_rxmode(p);
1978 		}
1979 		break;
1980 	case SIOCSIFMEDIA:
1981 	case SIOCGIFMEDIA:
1982 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
1983 		break;
1984 	case SIOCSIFCAP:
1985 		PORT_LOCK(p);
1986 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1987 		if (mask & IFCAP_TXCSUM) {
1988 			if (IFCAP_TXCSUM & ifp->if_capenable) {
1989 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
1990 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
1991 				    | CSUM_IP | CSUM_TSO);
1992 			} else {
1993 				ifp->if_capenable |= IFCAP_TXCSUM;
1994 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
1995 				    | CSUM_IP);
1996 			}
1997 		}
1998 		if (mask & IFCAP_RXCSUM) {
1999 			ifp->if_capenable ^= IFCAP_RXCSUM;
2000 		}
2001 		if (mask & IFCAP_TSO4) {
2002 			if (IFCAP_TSO4 & ifp->if_capenable) {
2003 				ifp->if_capenable &= ~IFCAP_TSO4;
2004 				ifp->if_hwassist &= ~CSUM_TSO;
2005 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2006 				ifp->if_capenable |= IFCAP_TSO4;
2007 				ifp->if_hwassist |= CSUM_TSO;
2008 			} else {
2009 				if (cxgb_debug)
2010 					printf("cxgb requires tx checksum offload"
2011 					    " be enabled to use TSO\n");
2012 				error = EINVAL;
2013 			}
2014 		}
2015 #ifdef LRO_SUPPORTED
2016 		if (mask & IFCAP_LRO) {
2017 			ifp->if_capenable ^= IFCAP_LRO;
2018 
2019 			/* Safe to do this even if cxgb_up not called yet */
2020 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2021 		}
2022 #endif
2023 		if (mask & IFCAP_VLAN_HWTAGGING) {
2024 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2025 			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2026 		}
2027 		if (mask & IFCAP_VLAN_MTU) {
2028 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2029 			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2030 		}
2031 		if (mask & IFCAP_VLAN_HWCSUM) {
2032 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2033 		}
2034 		if (reinit) {
2035 			cxgb_stop_locked(p);
2036 			cxgb_init_locked(p);
2037 		}
2038 		PORT_UNLOCK(p);
2039 
2040 #ifdef VLAN_CAPABILITIES
2041 		VLAN_CAPABILITIES(ifp);
2042 #endif
2043 		break;
2044 	default:
2045 		error = ether_ioctl(ifp, command, data);
2046 		break;
2047 	}
2048 	return (error);
2049 }
2050 
2051 static int
2052 cxgb_media_change(struct ifnet *ifp)
2053 {
2054 	if_printf(ifp, "media change not supported\n");
2055 	return (ENXIO);
2056 }
2057 
2058 static void
2059 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2060 {
2061 	struct port_info *p = ifp->if_softc;
2062 
2063 	ifmr->ifm_status = IFM_AVALID;
2064 	ifmr->ifm_active = IFM_ETHER;
2065 
2066 	if (!p->link_config.link_ok)
2067 		return;
2068 
2069 	ifmr->ifm_status |= IFM_ACTIVE;
2070 
2071 	switch (p->link_config.speed) {
2072 	case 10:
2073 		ifmr->ifm_active |= IFM_10_T;
2074 		break;
2075 	case 100:
2076 		ifmr->ifm_active |= IFM_100_TX;
2077 			break;
2078 	case 1000:
2079 		ifmr->ifm_active |= IFM_1000_T;
2080 		break;
2081 	}
2082 
2083 	if (p->link_config.duplex)
2084 		ifmr->ifm_active |= IFM_FDX;
2085 	else
2086 		ifmr->ifm_active |= IFM_HDX;
2087 }
2088 
2089 static void
2090 cxgb_async_intr(void *data)
2091 {
2092 	adapter_t *sc = data;
2093 
2094 	if (cxgb_debug)
2095 		device_printf(sc->dev, "cxgb_async_intr\n");
2096 	/*
2097 	 * May need to sleep - defer to taskqueue
2098 	 */
2099 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2100 }
2101 
2102 static void
2103 cxgb_ext_intr_handler(void *arg, int count)
2104 {
2105 	adapter_t *sc = (adapter_t *)arg;
2106 
2107 	if (cxgb_debug)
2108 		printf("cxgb_ext_intr_handler\n");
2109 
2110 	t3_phy_intr_handler(sc);
2111 
2112 	/* Now reenable external interrupts */
2113 	ADAPTER_LOCK(sc);
2114 	if (sc->slow_intr_mask) {
2115 		sc->slow_intr_mask |= F_T3DBG;
2116 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2117 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2118 	}
2119 	ADAPTER_UNLOCK(sc);
2120 }
2121 
2122 static void
2123 check_link_status(adapter_t *sc)
2124 {
2125 	int i;
2126 
2127 	for (i = 0; i < (sc)->params.nports; ++i) {
2128 		struct port_info *p = &sc->port[i];
2129 
2130 		if (!(p->phy.caps & SUPPORTED_IRQ))
2131 			t3_link_changed(sc, i);
2132 		p->ifp->if_baudrate = p->link_config.speed * 1000000;
2133 	}
2134 }
2135 
2136 static void
2137 check_t3b2_mac(struct adapter *adapter)
2138 {
2139 	int i;
2140 
2141 	if(adapter->flags & CXGB_SHUTDOWN)
2142 		return;
2143 
2144 	for_each_port(adapter, i) {
2145 		struct port_info *p = &adapter->port[i];
2146 		struct ifnet *ifp = p->ifp;
2147 		int status;
2148 
2149 		if(adapter->flags & CXGB_SHUTDOWN)
2150 			return;
2151 
2152 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2153 			continue;
2154 
2155 		status = 0;
2156 		PORT_LOCK(p);
2157 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2158 			status = t3b2_mac_watchdog_task(&p->mac);
2159 		if (status == 1)
2160 			p->mac.stats.num_toggled++;
2161 		else if (status == 2) {
2162 			struct cmac *mac = &p->mac;
2163 			int mtu = ifp->if_mtu;
2164 
2165 			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2166 				mtu += ETHER_VLAN_ENCAP_LEN;
2167 			t3_mac_set_mtu(mac, mtu);
2168 			t3_mac_set_address(mac, 0, p->hw_addr);
2169 			cxgb_set_rxmode(p);
2170 			t3_link_start(&p->phy, mac, &p->link_config);
2171 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2172 			t3_port_intr_enable(adapter, p->port_id);
2173 			p->mac.stats.num_resets++;
2174 		}
2175 		PORT_UNLOCK(p);
2176 	}
2177 }
2178 
2179 static void
2180 cxgb_tick(void *arg)
2181 {
2182 	adapter_t *sc = (adapter_t *)arg;
2183 
2184 	if(sc->flags & CXGB_SHUTDOWN)
2185 		return;
2186 
2187 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2188 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2189 }
2190 
2191 static void
2192 cxgb_tick_handler(void *arg, int count)
2193 {
2194 	adapter_t *sc = (adapter_t *)arg;
2195 	const struct adapter_params *p = &sc->params;
2196 	int i;
2197 
2198 	if(sc->flags & CXGB_SHUTDOWN)
2199 		return;
2200 
2201 	ADAPTER_LOCK(sc);
2202 	if (p->linkpoll_period)
2203 		check_link_status(sc);
2204 
2205 	sc->check_task_cnt++;
2206 
2207 	/*
2208 	 * adapter lock can currently only be acquired after the
2209 	 * port lock
2210 	 */
2211 	ADAPTER_UNLOCK(sc);
2212 
2213 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2214 		check_t3b2_mac(sc);
2215 
2216 	/* Update MAC stats if it's time to do so */
2217 	if (!p->linkpoll_period ||
2218 	    (sc->check_task_cnt * p->linkpoll_period) / 10 >=
2219 	    p->stats_update_period) {
2220 		for_each_port(sc, i) {
2221 			struct port_info *port = &sc->port[i];
2222 			PORT_LOCK(port);
2223 			t3_mac_update_stats(&port->mac);
2224 			PORT_UNLOCK(port);
2225 		}
2226 		sc->check_task_cnt = 0;
2227 	}
2228 }
2229 
2230 static void
2231 touch_bars(device_t dev)
2232 {
2233 	/*
2234 	 * Don't enable yet
2235 	 */
2236 #if !defined(__LP64__) && 0
2237 	u32 v;
2238 
2239 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2240 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2241 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2242 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2243 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2244 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2245 #endif
2246 }
2247 
2248 static int
2249 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2250 {
2251 	uint8_t *buf;
2252 	int err = 0;
2253 	u32 aligned_offset, aligned_len, *p;
2254 	struct adapter *adapter = pi->adapter;
2255 
2256 
2257 	aligned_offset = offset & ~3;
2258 	aligned_len = (len + (offset & 3) + 3) & ~3;
2259 
2260 	if (aligned_offset != offset || aligned_len != len) {
2261 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2262 		if (!buf)
2263 			return (ENOMEM);
2264 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2265 		if (!err && aligned_len > 4)
2266 			err = t3_seeprom_read(adapter,
2267 					      aligned_offset + aligned_len - 4,
2268 					      (u32 *)&buf[aligned_len - 4]);
2269 		if (err)
2270 			goto out;
2271 		memcpy(buf + (offset & 3), data, len);
2272 	} else
2273 		buf = (uint8_t *)(uintptr_t)data;
2274 
2275 	err = t3_seeprom_wp(adapter, 0);
2276 	if (err)
2277 		goto out;
2278 
2279 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2280 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2281 		aligned_offset += 4;
2282 	}
2283 
2284 	if (!err)
2285 		err = t3_seeprom_wp(adapter, 1);
2286 out:
2287 	if (buf != data)
2288 		free(buf, M_DEVBUF);
2289 	return err;
2290 }
2291 
2292 
2293 static int
2294 in_range(int val, int lo, int hi)
2295 {
2296 	return val < 0 || (val <= hi && val >= lo);
2297 }
2298 
2299 static int
2300 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2301 {
2302        return (0);
2303 }
2304 
2305 static int
2306 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2307 {
2308        return (0);
2309 }
2310 
2311 static int
2312 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2313     int fflag, struct thread *td)
2314 {
2315 	int mmd, error = 0;
2316 	struct port_info *pi = dev->si_drv1;
2317 	adapter_t *sc = pi->adapter;
2318 
2319 #ifdef PRIV_SUPPORTED
2320 	if (priv_check(td, PRIV_DRIVER)) {
2321 		if (cxgb_debug)
2322 			printf("user does not have access to privileged ioctls\n");
2323 		return (EPERM);
2324 	}
2325 #else
2326 	if (suser(td)) {
2327 		if (cxgb_debug)
2328 			printf("user does not have access to privileged ioctls\n");
2329 		return (EPERM);
2330 	}
2331 #endif
2332 
2333 	switch (cmd) {
2334 	case CHELSIO_GET_MIIREG: {
2335 		uint32_t val;
2336 		struct cphy *phy = &pi->phy;
2337 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2338 
2339 		if (!phy->mdio_read)
2340 			return (EOPNOTSUPP);
2341 		if (is_10G(sc)) {
2342 			mmd = mid->phy_id >> 8;
2343 			if (!mmd)
2344 				mmd = MDIO_DEV_PCS;
2345 			else if (mmd > MDIO_DEV_XGXS)
2346 				return (EINVAL);
2347 
2348 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2349 					     mid->reg_num, &val);
2350 		} else
2351 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2352 					     mid->reg_num & 0x1f, &val);
2353 		if (error == 0)
2354 			mid->val_out = val;
2355 		break;
2356 	}
2357 	case CHELSIO_SET_MIIREG: {
2358 		struct cphy *phy = &pi->phy;
2359 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2360 
2361 		if (!phy->mdio_write)
2362 			return (EOPNOTSUPP);
2363 		if (is_10G(sc)) {
2364 			mmd = mid->phy_id >> 8;
2365 			if (!mmd)
2366 				mmd = MDIO_DEV_PCS;
2367 			else if (mmd > MDIO_DEV_XGXS)
2368 				return (EINVAL);
2369 
2370 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2371 					      mmd, mid->reg_num, mid->val_in);
2372 		} else
2373 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2374 					      mid->reg_num & 0x1f,
2375 					      mid->val_in);
2376 		break;
2377 	}
2378 	case CHELSIO_SETREG: {
2379 		struct ch_reg *edata = (struct ch_reg *)data;
2380 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2381 			return (EFAULT);
2382 		t3_write_reg(sc, edata->addr, edata->val);
2383 		break;
2384 	}
2385 	case CHELSIO_GETREG: {
2386 		struct ch_reg *edata = (struct ch_reg *)data;
2387 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2388 			return (EFAULT);
2389 		edata->val = t3_read_reg(sc, edata->addr);
2390 		break;
2391 	}
2392 	case CHELSIO_GET_SGE_CONTEXT: {
2393 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2394 		mtx_lock_spin(&sc->sge.reg_lock);
2395 		switch (ecntxt->cntxt_type) {
2396 		case CNTXT_TYPE_EGRESS:
2397 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2398 			    ecntxt->data);
2399 			break;
2400 		case CNTXT_TYPE_FL:
2401 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2402 			    ecntxt->data);
2403 			break;
2404 		case CNTXT_TYPE_RSP:
2405 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2406 			    ecntxt->data);
2407 			break;
2408 		case CNTXT_TYPE_CQ:
2409 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2410 			    ecntxt->data);
2411 			break;
2412 		default:
2413 			error = EINVAL;
2414 			break;
2415 		}
2416 		mtx_unlock_spin(&sc->sge.reg_lock);
2417 		break;
2418 	}
2419 	case CHELSIO_GET_SGE_DESC: {
2420 		struct ch_desc *edesc = (struct ch_desc *)data;
2421 		int ret;
2422 		if (edesc->queue_num >= SGE_QSETS * 6)
2423 			return (EINVAL);
2424 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2425 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2426 		if (ret < 0)
2427 			return (EINVAL);
2428 		edesc->size = ret;
2429 		break;
2430 	}
2431 	case CHELSIO_GET_QSET_PARAMS: {
2432 		struct qset_params *q;
2433 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2434 		int q1 = pi->first_qset;
2435 		int nqsets = pi->nqsets;
2436 		int i;
2437 
2438 		if (t->qset_idx >= nqsets)
2439 			return EINVAL;
2440 
2441 		i = q1 + t->qset_idx;
2442 		q = &sc->params.sge.qset[i];
2443 		t->rspq_size   = q->rspq_size;
2444 		t->txq_size[0] = q->txq_size[0];
2445 		t->txq_size[1] = q->txq_size[1];
2446 		t->txq_size[2] = q->txq_size[2];
2447 		t->fl_size[0]  = q->fl_size;
2448 		t->fl_size[1]  = q->jumbo_size;
2449 		t->polling     = q->polling;
2450 		t->lro         = q->lro;
2451 		t->intr_lat    = q->coalesce_usecs;
2452 		t->cong_thres  = q->cong_thres;
2453 		t->qnum        = i;
2454 
2455 		if (sc->flags & USING_MSIX)
2456 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2457 		else
2458 			t->vector = rman_get_start(sc->irq_res);
2459 
2460 		break;
2461 	}
2462 	case CHELSIO_GET_QSET_NUM: {
2463 		struct ch_reg *edata = (struct ch_reg *)data;
2464 		edata->val = pi->nqsets;
2465 		break;
2466 	}
2467 	case CHELSIO_LOAD_FW: {
2468 		uint8_t *fw_data;
2469 		uint32_t vers;
2470 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2471 
2472 		/*
2473 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2474 		 *
2475 		 * FW_UPTODATE is also set so the rest of the initialization
2476 		 * will not overwrite what was loaded here.  This gives you the
2477 		 * flexibility to load any firmware (and maybe shoot yourself in
2478 		 * the foot).
2479 		 */
2480 
2481 		ADAPTER_LOCK(sc);
2482 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2483 			ADAPTER_UNLOCK(sc);
2484 			return (EBUSY);
2485 		}
2486 
2487 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2488 		if (!fw_data)
2489 			error = ENOMEM;
2490 		else
2491 			error = copyin(t->buf, fw_data, t->len);
2492 
2493 		if (!error)
2494 			error = -t3_load_fw(sc, fw_data, t->len);
2495 
2496 		if (t3_get_fw_version(sc, &vers) == 0) {
2497 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2498 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2499 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2500 		}
2501 
2502 		if (!error)
2503 			sc->flags |= FW_UPTODATE;
2504 
2505 		free(fw_data, M_DEVBUF);
2506 		ADAPTER_UNLOCK(sc);
2507 		break;
2508 	}
2509 	case CHELSIO_LOAD_BOOT: {
2510 		uint8_t *boot_data;
2511 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2512 
2513 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2514 		if (!boot_data)
2515 			return ENOMEM;
2516 
2517 		error = copyin(t->buf, boot_data, t->len);
2518 		if (!error)
2519 			error = -t3_load_boot(sc, boot_data, t->len);
2520 
2521 		free(boot_data, M_DEVBUF);
2522 		break;
2523 	}
2524 	case CHELSIO_GET_PM: {
2525 		struct ch_pm *m = (struct ch_pm *)data;
2526 		struct tp_params *p = &sc->params.tp;
2527 
2528 		if (!is_offload(sc))
2529 			return (EOPNOTSUPP);
2530 
2531 		m->tx_pg_sz = p->tx_pg_size;
2532 		m->tx_num_pg = p->tx_num_pgs;
2533 		m->rx_pg_sz  = p->rx_pg_size;
2534 		m->rx_num_pg = p->rx_num_pgs;
2535 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2536 
2537 		break;
2538 	}
2539 	case CHELSIO_SET_PM: {
2540 		struct ch_pm *m = (struct ch_pm *)data;
2541 		struct tp_params *p = &sc->params.tp;
2542 
2543 		if (!is_offload(sc))
2544 			return (EOPNOTSUPP);
2545 		if (sc->flags & FULL_INIT_DONE)
2546 			return (EBUSY);
2547 
2548 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2549 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2550 			return (EINVAL);	/* not power of 2 */
2551 		if (!(m->rx_pg_sz & 0x14000))
2552 			return (EINVAL);	/* not 16KB or 64KB */
2553 		if (!(m->tx_pg_sz & 0x1554000))
2554 			return (EINVAL);
2555 		if (m->tx_num_pg == -1)
2556 			m->tx_num_pg = p->tx_num_pgs;
2557 		if (m->rx_num_pg == -1)
2558 			m->rx_num_pg = p->rx_num_pgs;
2559 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2560 			return (EINVAL);
2561 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2562 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2563 			return (EINVAL);
2564 
2565 		p->rx_pg_size = m->rx_pg_sz;
2566 		p->tx_pg_size = m->tx_pg_sz;
2567 		p->rx_num_pgs = m->rx_num_pg;
2568 		p->tx_num_pgs = m->tx_num_pg;
2569 		break;
2570 	}
2571 	case CHELSIO_SETMTUTAB: {
2572 		struct ch_mtus *m = (struct ch_mtus *)data;
2573 		int i;
2574 
2575 		if (!is_offload(sc))
2576 			return (EOPNOTSUPP);
2577 		if (offload_running(sc))
2578 			return (EBUSY);
2579 		if (m->nmtus != NMTUS)
2580 			return (EINVAL);
2581 		if (m->mtus[0] < 81)         /* accommodate SACK */
2582 			return (EINVAL);
2583 
2584 		/*
2585 		 * MTUs must be in ascending order
2586 		 */
2587 		for (i = 1; i < NMTUS; ++i)
2588 			if (m->mtus[i] < m->mtus[i - 1])
2589 				return (EINVAL);
2590 
2591 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2592 		break;
2593 	}
2594 	case CHELSIO_GETMTUTAB: {
2595 		struct ch_mtus *m = (struct ch_mtus *)data;
2596 
2597 		if (!is_offload(sc))
2598 			return (EOPNOTSUPP);
2599 
2600 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2601 		m->nmtus = NMTUS;
2602 		break;
2603 	}
2604 	case CHELSIO_GET_MEM: {
2605 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2606 		struct mc7 *mem;
2607 		uint8_t *useraddr;
2608 		u64 buf[32];
2609 
2610 		/*
2611 		 * Use these to avoid modifying len/addr in the the return
2612 		 * struct
2613 		 */
2614 		uint32_t len = t->len, addr = t->addr;
2615 
2616 		if (!is_offload(sc))
2617 			return (EOPNOTSUPP);
2618 		if (!(sc->flags & FULL_INIT_DONE))
2619 			return (EIO);         /* need the memory controllers */
2620 		if ((addr & 0x7) || (len & 0x7))
2621 			return (EINVAL);
2622 		if (t->mem_id == MEM_CM)
2623 			mem = &sc->cm;
2624 		else if (t->mem_id == MEM_PMRX)
2625 			mem = &sc->pmrx;
2626 		else if (t->mem_id == MEM_PMTX)
2627 			mem = &sc->pmtx;
2628 		else
2629 			return (EINVAL);
2630 
2631 		/*
2632 		 * Version scheme:
2633 		 * bits 0..9: chip version
2634 		 * bits 10..15: chip revision
2635 		 */
2636 		t->version = 3 | (sc->params.rev << 10);
2637 
2638 		/*
2639 		 * Read 256 bytes at a time as len can be large and we don't
2640 		 * want to use huge intermediate buffers.
2641 		 */
2642 		useraddr = (uint8_t *)t->buf;
2643 		while (len) {
2644 			unsigned int chunk = min(len, sizeof(buf));
2645 
2646 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2647 			if (error)
2648 				return (-error);
2649 			if (copyout(buf, useraddr, chunk))
2650 				return (EFAULT);
2651 			useraddr += chunk;
2652 			addr += chunk;
2653 			len -= chunk;
2654 		}
2655 		break;
2656 	}
2657 	case CHELSIO_READ_TCAM_WORD: {
2658 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2659 
2660 		if (!is_offload(sc))
2661 			return (EOPNOTSUPP);
2662 		if (!(sc->flags & FULL_INIT_DONE))
2663 			return (EIO);         /* need MC5 */
2664 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2665 		break;
2666 	}
2667 	case CHELSIO_SET_TRACE_FILTER: {
2668 		struct ch_trace *t = (struct ch_trace *)data;
2669 		const struct trace_params *tp;
2670 
2671 		tp = (const struct trace_params *)&t->sip;
2672 		if (t->config_tx)
2673 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2674 					       t->trace_tx);
2675 		if (t->config_rx)
2676 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2677 					       t->trace_rx);
2678 		break;
2679 	}
2680 	case CHELSIO_SET_PKTSCHED: {
2681 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2682 		if (sc->open_device_map == 0)
2683 			return (EAGAIN);
2684 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2685 		    p->binding);
2686 		break;
2687 	}
2688 	case CHELSIO_IFCONF_GETREGS: {
2689 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2690 		int reglen = cxgb_get_regs_len();
2691 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2692 		if (buf == NULL) {
2693 			return (ENOMEM);
2694 		}
2695 		if (regs->len > reglen)
2696 			regs->len = reglen;
2697 		else if (regs->len < reglen)
2698 			error = E2BIG;
2699 
2700 		if (!error) {
2701 			cxgb_get_regs(sc, regs, buf);
2702 			error = copyout(buf, regs->data, reglen);
2703 		}
2704 		free(buf, M_DEVBUF);
2705 
2706 		break;
2707 	}
2708 	case CHELSIO_SET_HW_SCHED: {
2709 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2710 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2711 
2712 		if ((sc->flags & FULL_INIT_DONE) == 0)
2713 			return (EAGAIN);       /* need TP to be initialized */
2714 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2715 		    !in_range(t->channel, 0, 1) ||
2716 		    !in_range(t->kbps, 0, 10000000) ||
2717 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2718 		    !in_range(t->flow_ipg, 0,
2719 			      dack_ticks_to_usec(sc, 0x7ff)))
2720 			return (EINVAL);
2721 
2722 		if (t->kbps >= 0) {
2723 			error = t3_config_sched(sc, t->kbps, t->sched);
2724 			if (error < 0)
2725 				return (-error);
2726 		}
2727 		if (t->class_ipg >= 0)
2728 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2729 		if (t->flow_ipg >= 0) {
2730 			t->flow_ipg *= 1000;     /* us -> ns */
2731 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2732 		}
2733 		if (t->mode >= 0) {
2734 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2735 
2736 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2737 					 bit, t->mode ? bit : 0);
2738 		}
2739 		if (t->channel >= 0)
2740 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2741 					 1 << t->sched, t->channel << t->sched);
2742 		break;
2743 	}
2744 	case CHELSIO_GET_EEPROM: {
2745 		int i;
2746 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2747 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2748 
2749 		if (buf == NULL) {
2750 			return (ENOMEM);
2751 		}
2752 		e->magic = EEPROM_MAGIC;
2753 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2754 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2755 
2756 		if (!error)
2757 			error = copyout(buf + e->offset, e->data, e->len);
2758 
2759 		free(buf, M_DEVBUF);
2760 		break;
2761 	}
2762 	case CHELSIO_CLEAR_STATS: {
2763 		if (!(sc->flags & FULL_INIT_DONE))
2764 			return EAGAIN;
2765 
2766 		PORT_LOCK(pi);
2767 		t3_mac_update_stats(&pi->mac);
2768 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2769 		PORT_UNLOCK(pi);
2770 		break;
2771 	}
2772 	default:
2773 		return (EOPNOTSUPP);
2774 		break;
2775 	}
2776 
2777 	return (error);
2778 }
2779 
2780 static __inline void
2781 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2782     unsigned int end)
2783 {
2784 	uint32_t *p = (uint32_t *)(buf + start);
2785 
2786 	for ( ; start <= end; start += sizeof(uint32_t))
2787 		*p++ = t3_read_reg(ap, start);
2788 }
2789 
2790 #define T3_REGMAP_SIZE (3 * 1024)
2791 static int
2792 cxgb_get_regs_len(void)
2793 {
2794 	return T3_REGMAP_SIZE;
2795 }
2796 
2797 static void
2798 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
2799 {
2800 
2801 	/*
2802 	 * Version scheme:
2803 	 * bits 0..9: chip version
2804 	 * bits 10..15: chip revision
2805 	 * bit 31: set for PCIe cards
2806 	 */
2807 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2808 
2809 	/*
2810 	 * We skip the MAC statistics registers because they are clear-on-read.
2811 	 * Also reading multi-register stats would need to synchronize with the
2812 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2813 	 */
2814 	memset(buf, 0, cxgb_get_regs_len());
2815 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2816 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2817 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2818 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2819 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2820 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2821 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2822 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2823 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2824 }
2825 
2826 
2827 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
2828