xref: /freebsd/sys/dev/cxgb/cxgb_main.c (revision b3aaa0cc21c63d388230c7ef2a80abd631ff20d5)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2008, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bus.h>
37 #include <sys/module.h>
38 #include <sys/pciio.h>
39 #include <sys/conf.h>
40 #include <machine/bus.h>
41 #include <machine/resource.h>
42 #include <sys/bus_dma.h>
43 #include <sys/ktr.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/linker.h>
48 #include <sys/firmware.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/smp.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/queue.h>
55 #include <sys/taskqueue.h>
56 #include <sys/proc.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_media.h>
64 #include <net/if_types.h>
65 #include <net/if_vlan_var.h>
66 
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/if_ether.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip.h>
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pci_private.h>
78 
79 #include <cxgb_include.h>
80 
81 #ifdef PRIV_SUPPORTED
82 #include <sys/priv.h>
83 #endif
84 
85 static int cxgb_setup_msix(adapter_t *, int);
86 static void cxgb_teardown_msix(adapter_t *);
87 static void cxgb_init(void *);
88 static void cxgb_init_locked(struct port_info *);
89 static void cxgb_stop_locked(struct port_info *);
90 static void cxgb_set_rxmode(struct port_info *);
91 static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
92 static int cxgb_media_change(struct ifnet *);
93 static int cxgb_ifm_type(int);
94 static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
95 static int setup_sge_qsets(adapter_t *);
96 static void cxgb_async_intr(void *);
97 static void cxgb_ext_intr_handler(void *, int);
98 static void cxgb_tick_handler(void *, int);
99 static void cxgb_down_locked(struct adapter *sc);
100 static void cxgb_tick(void *);
101 static void setup_rss(adapter_t *sc);
102 
103 /* Attachment glue for the PCI controller end of the device.  Each port of
104  * the device is attached separately, as defined later.
105  */
106 static int cxgb_controller_probe(device_t);
107 static int cxgb_controller_attach(device_t);
108 static int cxgb_controller_detach(device_t);
109 static void cxgb_free(struct adapter *);
110 static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
111     unsigned int end);
112 static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
113 static int cxgb_get_regs_len(void);
114 static int offload_open(struct port_info *pi);
115 static void touch_bars(device_t dev);
116 static int offload_close(struct t3cdev *tdev);
117 static void cxgb_link_start(struct port_info *p);
118 
119 static device_method_t cxgb_controller_methods[] = {
120 	DEVMETHOD(device_probe,		cxgb_controller_probe),
121 	DEVMETHOD(device_attach,	cxgb_controller_attach),
122 	DEVMETHOD(device_detach,	cxgb_controller_detach),
123 
124 	/* bus interface */
125 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
126 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
127 
128 	{ 0, 0 }
129 };
130 
131 static driver_t cxgb_controller_driver = {
132 	"cxgbc",
133 	cxgb_controller_methods,
134 	sizeof(struct adapter)
135 };
136 
137 static devclass_t	cxgb_controller_devclass;
138 DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, 0, 0);
139 
140 /*
141  * Attachment glue for the ports.  Attachment is done directly to the
142  * controller device.
143  */
144 static int cxgb_port_probe(device_t);
145 static int cxgb_port_attach(device_t);
146 static int cxgb_port_detach(device_t);
147 
148 static device_method_t cxgb_port_methods[] = {
149 	DEVMETHOD(device_probe,		cxgb_port_probe),
150 	DEVMETHOD(device_attach,	cxgb_port_attach),
151 	DEVMETHOD(device_detach,	cxgb_port_detach),
152 	{ 0, 0 }
153 };
154 
155 static driver_t cxgb_port_driver = {
156 	"cxgb",
157 	cxgb_port_methods,
158 	0
159 };
160 
161 static d_ioctl_t cxgb_extension_ioctl;
162 static d_open_t cxgb_extension_open;
163 static d_close_t cxgb_extension_close;
164 
165 static struct cdevsw cxgb_cdevsw = {
166        .d_version =    D_VERSION,
167        .d_flags =      0,
168        .d_open =       cxgb_extension_open,
169        .d_close =      cxgb_extension_close,
170        .d_ioctl =      cxgb_extension_ioctl,
171        .d_name =       "cxgb",
172 };
173 
174 static devclass_t	cxgb_port_devclass;
175 DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
176 
177 #define SGE_MSIX_COUNT (SGE_QSETS + 1)
178 
179 /*
180  * The driver uses the best interrupt scheme available on a platform in the
181  * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
182  * of these schemes the driver may consider as follows:
183  *
184  * msi = 2: choose from among all three options
185  * msi = 1 : only consider MSI and pin interrupts
186  * msi = 0: force pin interrupts
187  */
188 static int msi_allowed = 2;
189 
190 TUNABLE_INT("hw.cxgb.msi_allowed", &msi_allowed);
191 SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters");
192 SYSCTL_UINT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
193     "MSI-X, MSI, INTx selector");
194 
195 /*
196  * The driver enables offload as a default.
197  * To disable it, use ofld_disable = 1.
198  */
199 static int ofld_disable = 0;
200 TUNABLE_INT("hw.cxgb.ofld_disable", &ofld_disable);
201 SYSCTL_UINT(_hw_cxgb, OID_AUTO, ofld_disable, CTLFLAG_RDTUN, &ofld_disable, 0,
202     "disable ULP offload");
203 
204 /*
205  * The driver uses an auto-queue algorithm by default.
206  * To disable it and force a single queue-set per port, use multiq = 0
207  */
208 static int multiq = 1;
209 TUNABLE_INT("hw.cxgb.multiq", &multiq);
210 SYSCTL_UINT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
211     "use min(ncpus/ports, 8) queue-sets per port");
212 
213 /*
214  * By default the driver will not update the firmware unless
215  * it was compiled against a newer version
216  *
217  */
218 static int force_fw_update = 0;
219 TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update);
220 SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
221     "update firmware even if up to date");
222 
223 int cxgb_use_16k_clusters = 1;
224 TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters);
225 SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
226     &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
227 
228 enum {
229 	MAX_TXQ_ENTRIES      = 16384,
230 	MAX_CTRL_TXQ_ENTRIES = 1024,
231 	MAX_RSPQ_ENTRIES     = 16384,
232 	MAX_RX_BUFFERS       = 16384,
233 	MAX_RX_JUMBO_BUFFERS = 16384,
234 	MIN_TXQ_ENTRIES      = 4,
235 	MIN_CTRL_TXQ_ENTRIES = 4,
236 	MIN_RSPQ_ENTRIES     = 32,
237 	MIN_FL_ENTRIES       = 32,
238 	MIN_FL_JUMBO_ENTRIES = 32
239 };
240 
241 struct filter_info {
242 	u32 sip;
243 	u32 sip_mask;
244 	u32 dip;
245 	u16 sport;
246 	u16 dport;
247 	u32 vlan:12;
248 	u32 vlan_prio:3;
249 	u32 mac_hit:1;
250 	u32 mac_idx:4;
251 	u32 mac_vld:1;
252 	u32 pkt_type:2;
253 	u32 report_filter_id:1;
254 	u32 pass:1;
255 	u32 rss:1;
256 	u32 qset:3;
257 	u32 locked:1;
258 	u32 valid:1;
259 };
260 
261 enum { FILTER_NO_VLAN_PRI = 7 };
262 
263 #define EEPROM_MAGIC 0x38E2F10C
264 
265 #define PORT_MASK ((1 << MAX_NPORTS) - 1)
266 
267 /* Table for probing the cards.  The desc field isn't actually used */
268 struct cxgb_ident {
269 	uint16_t	vendor;
270 	uint16_t	device;
271 	int		index;
272 	char		*desc;
273 } cxgb_identifiers[] = {
274 	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
275 	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
276 	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
277 	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
278 	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
279 	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
280 	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
281 	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
282 	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
283 	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
284 	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
285 	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "N310E"},
286 	{0, 0, 0, NULL}
287 };
288 
289 static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
290 
291 
292 static __inline char
293 t3rev2char(struct adapter *adapter)
294 {
295 	char rev = 'z';
296 
297 	switch(adapter->params.rev) {
298 	case T3_REV_A:
299 		rev = 'a';
300 		break;
301 	case T3_REV_B:
302 	case T3_REV_B2:
303 		rev = 'b';
304 		break;
305 	case T3_REV_C:
306 		rev = 'c';
307 		break;
308 	}
309 	return rev;
310 }
311 
312 static struct cxgb_ident *
313 cxgb_get_ident(device_t dev)
314 {
315 	struct cxgb_ident *id;
316 
317 	for (id = cxgb_identifiers; id->desc != NULL; id++) {
318 		if ((id->vendor == pci_get_vendor(dev)) &&
319 		    (id->device == pci_get_device(dev))) {
320 			return (id);
321 		}
322 	}
323 	return (NULL);
324 }
325 
326 static const struct adapter_info *
327 cxgb_get_adapter_info(device_t dev)
328 {
329 	struct cxgb_ident *id;
330 	const struct adapter_info *ai;
331 
332 	id = cxgb_get_ident(dev);
333 	if (id == NULL)
334 		return (NULL);
335 
336 	ai = t3_get_adapter_info(id->index);
337 
338 	return (ai);
339 }
340 
341 static int
342 cxgb_controller_probe(device_t dev)
343 {
344 	const struct adapter_info *ai;
345 	char *ports, buf[80];
346 	int nports;
347 	struct adapter *sc = device_get_softc(dev);
348 
349 	ai = cxgb_get_adapter_info(dev);
350 	if (ai == NULL)
351 		return (ENXIO);
352 
353 	nports = ai->nports0 + ai->nports1;
354 	if (nports == 1)
355 		ports = "port";
356 	else
357 		ports = "ports";
358 
359 	snprintf(buf, sizeof(buf), "%s %sNIC, rev: %d nports: %d %s",
360 	    ai->desc, is_offload(sc) ? "R" : "",
361 	    sc->params.rev, nports, ports);
362 	device_set_desc_copy(dev, buf);
363 	return (BUS_PROBE_DEFAULT);
364 }
365 
366 #define FW_FNAME "cxgb_t3fw"
367 #define TPEEPROM_NAME "t3b_tp_eeprom"
368 #define TPSRAM_NAME "t3b_protocol_sram"
369 
370 static int
371 upgrade_fw(adapter_t *sc)
372 {
373 #ifdef FIRMWARE_LATEST
374 	const struct firmware *fw;
375 #else
376 	struct firmware *fw;
377 #endif
378 	int status;
379 
380 	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
381 		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
382 		return (ENOENT);
383 	} else
384 		device_printf(sc->dev, "updating firmware on card\n");
385 	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
386 
387 	device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status);
388 
389 	firmware_put(fw, FIRMWARE_UNLOAD);
390 
391 	return (status);
392 }
393 
394 static int
395 cxgb_controller_attach(device_t dev)
396 {
397 	device_t child;
398 	const struct adapter_info *ai;
399 	struct adapter *sc;
400 	int i, error = 0;
401 	uint32_t vers;
402 	int port_qsets = 1;
403 #ifdef MSI_SUPPORTED
404 	int msi_needed, reg;
405 #endif
406 	int must_load = 0;
407 	char buf[80];
408 
409 	sc = device_get_softc(dev);
410 	sc->dev = dev;
411 	sc->msi_count = 0;
412 	ai = cxgb_get_adapter_info(dev);
413 
414 	/*
415 	 * XXX not really related but a recent addition
416 	 */
417 #ifdef MSI_SUPPORTED
418 	/* find the PCIe link width and set max read request to 4KB*/
419 	if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
420 		uint16_t lnk, pectl;
421 		lnk = pci_read_config(dev, reg + 0x12, 2);
422 		sc->link_width = (lnk >> 4) & 0x3f;
423 
424 		pectl = pci_read_config(dev, reg + 0x8, 2);
425 		pectl = (pectl & ~0x7000) | (5 << 12);
426 		pci_write_config(dev, reg + 0x8, pectl, 2);
427 	}
428 
429 	if (sc->link_width != 0 && sc->link_width <= 4 &&
430 	    (ai->nports0 + ai->nports1) <= 2) {
431 		device_printf(sc->dev,
432 		    "PCIe x%d Link, expect reduced performance\n",
433 		    sc->link_width);
434 	}
435 #endif
436 	touch_bars(dev);
437 	pci_enable_busmaster(dev);
438 	/*
439 	 * Allocate the registers and make them available to the driver.
440 	 * The registers that we care about for NIC mode are in BAR 0
441 	 */
442 	sc->regs_rid = PCIR_BAR(0);
443 	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
444 	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
445 		device_printf(dev, "Cannot allocate BAR region 0\n");
446 		return (ENXIO);
447 	}
448 	sc->udbs_rid = PCIR_BAR(2);
449 	sc->udbs_res = NULL;
450 	if (is_offload(sc) &&
451 	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
452 		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
453 		device_printf(dev, "Cannot allocate BAR region 1\n");
454 		error = ENXIO;
455 		goto out;
456 	}
457 
458 	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
459 	    device_get_unit(dev));
460 	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
461 
462 	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
463 	    device_get_unit(dev));
464 	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
465 	    device_get_unit(dev));
466 	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
467 	    device_get_unit(dev));
468 
469 	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
470 	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
471 	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
472 
473 	sc->bt = rman_get_bustag(sc->regs_res);
474 	sc->bh = rman_get_bushandle(sc->regs_res);
475 	sc->mmio_len = rman_get_size(sc->regs_res);
476 
477 	if (t3_prep_adapter(sc, ai, 1) < 0) {
478 		printf("prep adapter failed\n");
479 		error = ENODEV;
480 		goto out;
481 	}
482         /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
483 	 * enough messages for the queue sets.  If that fails, try falling
484 	 * back to MSI.  If that fails, then try falling back to the legacy
485 	 * interrupt pin model.
486 	 */
487 #ifdef MSI_SUPPORTED
488 
489 	sc->msix_regs_rid = 0x20;
490 	if ((msi_allowed >= 2) &&
491 	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
492 	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
493 
494 		msi_needed = sc->msi_count = SGE_MSIX_COUNT;
495 
496 		if (((error = pci_alloc_msix(dev, &sc->msi_count)) != 0) ||
497 		    (sc->msi_count != msi_needed)) {
498 			device_printf(dev, "msix allocation failed - msi_count = %d"
499 			    " msi_needed=%d will try msi err=%d\n", sc->msi_count,
500 			    msi_needed, error);
501 			sc->msi_count = 0;
502 			pci_release_msi(dev);
503 			bus_release_resource(dev, SYS_RES_MEMORY,
504 			    sc->msix_regs_rid, sc->msix_regs_res);
505 			sc->msix_regs_res = NULL;
506 		} else {
507 			sc->flags |= USING_MSIX;
508 			sc->cxgb_intr = t3_intr_msix;
509 		}
510 	}
511 
512 	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
513 		sc->msi_count = 1;
514 		if (pci_alloc_msi(dev, &sc->msi_count)) {
515 			device_printf(dev, "alloc msi failed - will try INTx\n");
516 			sc->msi_count = 0;
517 			pci_release_msi(dev);
518 		} else {
519 			sc->flags |= USING_MSI;
520 			sc->irq_rid = 1;
521 			sc->cxgb_intr = t3_intr_msi;
522 		}
523 	}
524 #endif
525 	if (sc->msi_count == 0) {
526 		device_printf(dev, "using line interrupts\n");
527 		sc->irq_rid = 0;
528 		sc->cxgb_intr = t3b_intr;
529 	}
530 
531 	if ((sc->flags & USING_MSIX) && multiq)
532 		port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus);
533 
534 	/* Create a private taskqueue thread for handling driver events */
535 #ifdef TASKQUEUE_CURRENT
536 	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
537 	    taskqueue_thread_enqueue, &sc->tq);
538 #else
539 	sc->tq = taskqueue_create_fast("cxgb_taskq", M_NOWAIT,
540 	    taskqueue_thread_enqueue, &sc->tq);
541 #endif
542 	if (sc->tq == NULL) {
543 		device_printf(dev, "failed to allocate controller task queue\n");
544 		goto out;
545 	}
546 
547 	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
548 	    device_get_nameunit(dev));
549 	TASK_INIT(&sc->ext_intr_task, 0, cxgb_ext_intr_handler, sc);
550 	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
551 
552 
553 	/* Create a periodic callout for checking adapter status */
554 	callout_init(&sc->cxgb_tick_ch, TRUE);
555 
556 	if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) {
557 		/*
558 		 * Warn user that a firmware update will be attempted in init.
559 		 */
560 		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
561 		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
562 		sc->flags &= ~FW_UPTODATE;
563 	} else {
564 		sc->flags |= FW_UPTODATE;
565 	}
566 
567 	if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) {
568 		/*
569 		 * Warn user that a firmware update will be attempted in init.
570 		 */
571 		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
572 		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
573 		sc->flags &= ~TPS_UPTODATE;
574 	} else {
575 		sc->flags |= TPS_UPTODATE;
576 	}
577 
578 	/*
579 	 * Create a child device for each MAC.  The ethernet attachment
580 	 * will be done in these children.
581 	 */
582 	for (i = 0; i < (sc)->params.nports; i++) {
583 		struct port_info *pi;
584 
585 		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
586 			device_printf(dev, "failed to add child port\n");
587 			error = EINVAL;
588 			goto out;
589 		}
590 		pi = &sc->port[i];
591 		pi->adapter = sc;
592 		pi->nqsets = port_qsets;
593 		pi->first_qset = i*port_qsets;
594 		pi->port_id = i;
595 		pi->tx_chan = i >= ai->nports0;
596 		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
597 		sc->rxpkt_map[pi->txpkt_intf] = i;
598 		sc->port[i].tx_chan = i >= ai->nports0;
599 		sc->portdev[i] = child;
600 		device_set_softc(child, pi);
601 	}
602 	if ((error = bus_generic_attach(dev)) != 0)
603 		goto out;
604 
605 	/* initialize sge private state */
606 	t3_sge_init_adapter(sc);
607 
608 	t3_led_ready(sc);
609 
610 	cxgb_offload_init();
611 	if (is_offload(sc)) {
612 		setbit(&sc->registered_device_map, OFFLOAD_DEVMAP_BIT);
613 		cxgb_adapter_ofld(sc);
614         }
615 	error = t3_get_fw_version(sc, &vers);
616 	if (error)
617 		goto out;
618 
619 	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
620 	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
621 	    G_FW_VERSION_MICRO(vers));
622 
623 	snprintf(buf, sizeof(buf), "%s\t E/C: %s S/N: %s",
624 		 ai->desc,
625 		 sc->params.vpd.ec, sc->params.vpd.sn);
626 	device_set_desc_copy(dev, buf);
627 
628 	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
629 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
630 	t3_add_attach_sysctls(sc);
631 out:
632 	if (error)
633 		cxgb_free(sc);
634 
635 	return (error);
636 }
637 
638 static int
639 cxgb_controller_detach(device_t dev)
640 {
641 	struct adapter *sc;
642 
643 	sc = device_get_softc(dev);
644 
645 	cxgb_free(sc);
646 
647 	return (0);
648 }
649 
650 static void
651 cxgb_free(struct adapter *sc)
652 {
653 	int i;
654 
655 	ADAPTER_LOCK(sc);
656 	sc->flags |= CXGB_SHUTDOWN;
657 	ADAPTER_UNLOCK(sc);
658 	cxgb_pcpu_shutdown_threads(sc);
659 	ADAPTER_LOCK(sc);
660 
661 /*
662  * drops the lock
663  */
664 	cxgb_down_locked(sc);
665 
666 #ifdef MSI_SUPPORTED
667 	if (sc->flags & (USING_MSI | USING_MSIX)) {
668 		device_printf(sc->dev, "releasing msi message(s)\n");
669 		pci_release_msi(sc->dev);
670 	} else {
671 		device_printf(sc->dev, "no msi message to release\n");
672 	}
673 #endif
674 	if (sc->msix_regs_res != NULL) {
675 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
676 		    sc->msix_regs_res);
677 	}
678 
679 	t3_sge_deinit_sw(sc);
680 	/*
681 	 * Wait for last callout
682 	 */
683 
684 	DELAY(hz*100);
685 
686 	for (i = 0; i < (sc)->params.nports; ++i) {
687 		if (sc->portdev[i] != NULL)
688 			device_delete_child(sc->dev, sc->portdev[i]);
689 	}
690 
691 	bus_generic_detach(sc->dev);
692 	if (sc->tq != NULL) {
693 		taskqueue_free(sc->tq);
694 		sc->tq = NULL;
695 	}
696 
697 	if (is_offload(sc)) {
698 		cxgb_adapter_unofld(sc);
699 		if (isset(&sc->open_device_map,	OFFLOAD_DEVMAP_BIT))
700 			offload_close(&sc->tdev);
701 		else
702 			printf("cxgb_free: DEVMAP_BIT not set\n");
703 	} else
704 		printf("not offloading set\n");
705 #ifdef notyet
706 	if (sc->flags & CXGB_OFLD_INIT)
707 		cxgb_offload_deactivate(sc);
708 #endif
709 	free(sc->filters, M_DEVBUF);
710 	t3_sge_free(sc);
711 
712 	cxgb_offload_exit();
713 
714 	if (sc->udbs_res != NULL)
715 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
716 		    sc->udbs_res);
717 
718 	if (sc->regs_res != NULL)
719 		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
720 		    sc->regs_res);
721 
722 	MTX_DESTROY(&sc->mdio_lock);
723 	MTX_DESTROY(&sc->sge.reg_lock);
724 	MTX_DESTROY(&sc->elmer_lock);
725 	ADAPTER_LOCK_DEINIT(sc);
726 }
727 
728 /**
729  *	setup_sge_qsets - configure SGE Tx/Rx/response queues
730  *	@sc: the controller softc
731  *
732  *	Determines how many sets of SGE queues to use and initializes them.
733  *	We support multiple queue sets per port if we have MSI-X, otherwise
734  *	just one queue set per port.
735  */
736 static int
737 setup_sge_qsets(adapter_t *sc)
738 {
739 	int i, j, err, irq_idx = 0, qset_idx = 0;
740 	u_int ntxq = SGE_TXQ_PER_SET;
741 
742 	if ((err = t3_sge_alloc(sc)) != 0) {
743 		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
744 		return (err);
745 	}
746 
747 	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
748 		irq_idx = -1;
749 
750 	for (i = 0; i < (sc)->params.nports; i++) {
751 		struct port_info *pi = &sc->port[i];
752 
753 		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
754 			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
755 			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
756 			    &sc->params.sge.qset[qset_idx], ntxq, pi);
757 			if (err) {
758 				t3_free_sge_resources(sc);
759 				device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
760 				    err);
761 				return (err);
762 			}
763 		}
764 	}
765 
766 	return (0);
767 }
768 
769 static void
770 cxgb_teardown_msix(adapter_t *sc)
771 {
772 	int i, nqsets;
773 
774 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
775 		nqsets += sc->port[i].nqsets;
776 
777 	for (i = 0; i < nqsets; i++) {
778 		if (sc->msix_intr_tag[i] != NULL) {
779 			bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
780 			    sc->msix_intr_tag[i]);
781 			sc->msix_intr_tag[i] = NULL;
782 		}
783 		if (sc->msix_irq_res[i] != NULL) {
784 			bus_release_resource(sc->dev, SYS_RES_IRQ,
785 			    sc->msix_irq_rid[i], sc->msix_irq_res[i]);
786 			sc->msix_irq_res[i] = NULL;
787 		}
788 	}
789 }
790 
791 static int
792 cxgb_setup_msix(adapter_t *sc, int msix_count)
793 {
794 	int i, j, k, nqsets, rid;
795 
796 	/* The first message indicates link changes and error conditions */
797 	sc->irq_rid = 1;
798 	if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
799 	   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
800 		device_printf(sc->dev, "Cannot allocate msix interrupt\n");
801 		return (EINVAL);
802 	}
803 
804 	if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
805 #ifdef INTR_FILTERS
806 		NULL,
807 #endif
808 		cxgb_async_intr, sc, &sc->intr_tag)) {
809 		device_printf(sc->dev, "Cannot set up interrupt\n");
810 		return (EINVAL);
811 	}
812 	for (i = k = 0; i < (sc)->params.nports; i++) {
813 		nqsets = sc->port[i].nqsets;
814 		for (j = 0; j < nqsets; j++, k++) {
815 			struct sge_qset *qs = &sc->sge.qs[k];
816 
817 			rid = k + 2;
818 			if (cxgb_debug)
819 				printf("rid=%d ", rid);
820 			if ((sc->msix_irq_res[k] = bus_alloc_resource_any(
821 			    sc->dev, SYS_RES_IRQ, &rid,
822 			    RF_SHAREABLE | RF_ACTIVE)) == NULL) {
823 				device_printf(sc->dev, "Cannot allocate "
824 				    "interrupt for message %d\n", rid);
825 				return (EINVAL);
826 			}
827 			sc->msix_irq_rid[k] = rid;
828 			if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
829 				INTR_MPSAFE|INTR_TYPE_NET,
830 #ifdef INTR_FILTERS
831 				NULL,
832 #endif
833 				t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
834 				device_printf(sc->dev, "Cannot set up "
835 				    "interrupt for message %d\n", rid);
836 				return (EINVAL);
837 
838 			}
839 #if 0
840 #ifdef IFNET_MULTIQUEUE
841 			if (multiq) {
842 				int vector = rman_get_start(sc->msix_irq_res[k]);
843 				if (bootverbose)
844 					device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus);
845 				intr_bind(vector, k % mp_ncpus);
846 			}
847 #endif
848 #endif
849 		}
850 	}
851 
852 	return (0);
853 }
854 
855 static int
856 cxgb_port_probe(device_t dev)
857 {
858 	struct port_info *p;
859 	char buf[80];
860 	const char *desc;
861 
862 	p = device_get_softc(dev);
863 	desc = p->phy.desc;
864 	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
865 	device_set_desc_copy(dev, buf);
866 	return (0);
867 }
868 
869 
870 static int
871 cxgb_makedev(struct port_info *pi)
872 {
873 
874 	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
875 	    UID_ROOT, GID_WHEEL, 0600, if_name(pi->ifp));
876 
877 	if (pi->port_cdev == NULL)
878 		return (ENOMEM);
879 
880 	pi->port_cdev->si_drv1 = (void *)pi;
881 
882 	return (0);
883 }
884 
885 #ifndef LRO_SUPPORTED
886 #ifdef IFCAP_LRO
887 #undef IFCAP_LRO
888 #endif
889 #define IFCAP_LRO 0x0
890 #endif
891 
892 #ifdef TSO_SUPPORTED
893 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO)
894 /* Don't enable TSO6 yet */
895 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM | IFCAP_TSO4 | IFCAP_JUMBO_MTU | IFCAP_LRO)
896 #else
897 #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_JUMBO_MTU)
898 /* Don't enable TSO6 yet */
899 #define CXGB_CAP_ENABLE (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM |  IFCAP_JUMBO_MTU)
900 #define IFCAP_TSO4 0x0
901 #define IFCAP_TSO6 0x0
902 #define CSUM_TSO   0x0
903 #endif
904 
905 
906 static int
907 cxgb_port_attach(device_t dev)
908 {
909 	struct port_info *p;
910 	struct ifnet *ifp;
911 	int err, media_flags;
912 	struct adapter *sc;
913 
914 
915 	p = device_get_softc(dev);
916 	sc = p->adapter;
917 	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
918 	    device_get_unit(device_get_parent(dev)), p->port_id);
919 	PORT_LOCK_INIT(p, p->lockbuf);
920 
921 	/* Allocate an ifnet object and set it up */
922 	ifp = p->ifp = if_alloc(IFT_ETHER);
923 	if (ifp == NULL) {
924 		device_printf(dev, "Cannot allocate ifnet\n");
925 		return (ENOMEM);
926 	}
927 
928 	/*
929 	 * Note that there is currently no watchdog timer.
930 	 */
931 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
932 	ifp->if_init = cxgb_init;
933 	ifp->if_softc = p;
934 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
935 	ifp->if_ioctl = cxgb_ioctl;
936 	ifp->if_start = cxgb_start;
937 
938 
939 	ifp->if_timer = 0;	/* Disable ifnet watchdog */
940 	ifp->if_watchdog = NULL;
941 
942 	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
943 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
944 	IFQ_SET_READY(&ifp->if_snd);
945 
946 	ifp->if_hwassist = ifp->if_capabilities = ifp->if_capenable = 0;
947 	ifp->if_capabilities |= CXGB_CAP;
948 	ifp->if_capenable |= CXGB_CAP_ENABLE;
949 	ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO);
950 	/*
951 	 * disable TSO on 4-port - it isn't supported by the firmware yet
952 	 */
953 	if (p->adapter->params.nports > 2) {
954 		ifp->if_capabilities &= ~(IFCAP_TSO4 | IFCAP_TSO6);
955 		ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TSO6);
956 		ifp->if_hwassist &= ~CSUM_TSO;
957 	}
958 
959 	ether_ifattach(ifp, p->hw_addr);
960 #ifdef IFNET_MULTIQUEUE
961 	ifp->if_transmit = cxgb_pcpu_transmit;
962 #endif
963 	/*
964 	 * Only default to jumbo frames on 10GigE
965 	 */
966 	if (p->adapter->params.nports <= 2)
967 		ifp->if_mtu = ETHERMTU_JUMBO;
968 	if ((err = cxgb_makedev(p)) != 0) {
969 		printf("makedev failed %d\n", err);
970 		return (err);
971 	}
972 	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
973 	    cxgb_media_status);
974 
975 	if (!strcmp(p->phy.desc,	"10GBASE-CX4")) {
976 		media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX;
977 	} else if (!strcmp(p->phy.desc, "10GBASE-SR")) {
978 		media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX;
979 	} else if (!strcmp(p->phy.desc, "10GBASE-R")) {
980 		media_flags = cxgb_ifm_type(p->phy.modtype);
981 	} else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) {
982 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL);
983 		ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX,
984 			    0, NULL);
985 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX,
986 			    0, NULL);
987 		ifmedia_add(&p->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
988 			    0, NULL);
989 		ifmedia_add(&p->media, IFM_ETHER | IFM_1000_T | IFM_FDX,
990 			    0, NULL);
991 		media_flags = 0;
992 	} else if (!strcmp(p->phy.desc, "1000BASE-X")) {
993 		/*
994 		 * XXX: This is not very accurate.  Fix when common code
995 		 * returns more specific value - eg 1000BASE-SX, LX, etc.
996 		 *
997 		 * XXX: In the meantime, don't lie. Consider setting IFM_AUTO
998 		 * instead of SX.
999 		 */
1000 		media_flags = IFM_ETHER | IFM_1000_SX | IFM_FDX;
1001 	} else {
1002 	        printf("unsupported media type %s\n", p->phy.desc);
1003 		return (ENXIO);
1004 	}
1005 	if (media_flags) {
1006 		/*
1007 		 * Note the modtype on which we based our flags.  If modtype
1008 		 * changes, we'll redo the ifmedia for this ifp.  modtype may
1009 		 * change when transceivers are plugged in/out, and in other
1010 		 * situations.
1011 		 */
1012 		ifmedia_add(&p->media, media_flags, p->phy.modtype, NULL);
1013 		ifmedia_set(&p->media, media_flags);
1014 	} else {
1015 		ifmedia_add(&p->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1016 		ifmedia_set(&p->media, IFM_ETHER | IFM_AUTO);
1017 	}
1018 
1019 	/* Get the latest mac address, User can use a LAA */
1020 	bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN);
1021 	t3_sge_init_port(p);
1022 #if defined(LINK_ATTACH)
1023 	cxgb_link_start(p);
1024 	t3_link_changed(sc, p->port_id);
1025 #endif
1026 	return (0);
1027 }
1028 
1029 static int
1030 cxgb_port_detach(device_t dev)
1031 {
1032 	struct port_info *p;
1033 
1034 	p = device_get_softc(dev);
1035 
1036 	PORT_LOCK(p);
1037 	if (p->ifp->if_drv_flags & IFF_DRV_RUNNING)
1038 		cxgb_stop_locked(p);
1039 	PORT_UNLOCK(p);
1040 
1041 	ether_ifdetach(p->ifp);
1042 	printf("waiting for callout to stop ...");
1043 	DELAY(1000000);
1044 	printf("done\n");
1045 	/*
1046 	 * the lock may be acquired in ifdetach
1047 	 */
1048 	PORT_LOCK_DEINIT(p);
1049 	if_free(p->ifp);
1050 
1051 	if (p->port_cdev != NULL)
1052 		destroy_dev(p->port_cdev);
1053 
1054 	return (0);
1055 }
1056 
1057 void
1058 t3_fatal_err(struct adapter *sc)
1059 {
1060 	u_int fw_status[4];
1061 
1062 	if (sc->flags & FULL_INIT_DONE) {
1063 		t3_sge_stop(sc);
1064 		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1065 		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1066 		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1067 		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1068 		t3_intr_disable(sc);
1069 	}
1070 	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1071 	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1072 		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1073 		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1074 }
1075 
1076 int
1077 t3_os_find_pci_capability(adapter_t *sc, int cap)
1078 {
1079 	device_t dev;
1080 	struct pci_devinfo *dinfo;
1081 	pcicfgregs *cfg;
1082 	uint32_t status;
1083 	uint8_t ptr;
1084 
1085 	dev = sc->dev;
1086 	dinfo = device_get_ivars(dev);
1087 	cfg = &dinfo->cfg;
1088 
1089 	status = pci_read_config(dev, PCIR_STATUS, 2);
1090 	if (!(status & PCIM_STATUS_CAPPRESENT))
1091 		return (0);
1092 
1093 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1094 	case 0:
1095 	case 1:
1096 		ptr = PCIR_CAP_PTR;
1097 		break;
1098 	case 2:
1099 		ptr = PCIR_CAP_PTR_2;
1100 		break;
1101 	default:
1102 		return (0);
1103 		break;
1104 	}
1105 	ptr = pci_read_config(dev, ptr, 1);
1106 
1107 	while (ptr != 0) {
1108 		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1109 			return (ptr);
1110 		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1111 	}
1112 
1113 	return (0);
1114 }
1115 
1116 int
1117 t3_os_pci_save_state(struct adapter *sc)
1118 {
1119 	device_t dev;
1120 	struct pci_devinfo *dinfo;
1121 
1122 	dev = sc->dev;
1123 	dinfo = device_get_ivars(dev);
1124 
1125 	pci_cfg_save(dev, dinfo, 0);
1126 	return (0);
1127 }
1128 
1129 int
1130 t3_os_pci_restore_state(struct adapter *sc)
1131 {
1132 	device_t dev;
1133 	struct pci_devinfo *dinfo;
1134 
1135 	dev = sc->dev;
1136 	dinfo = device_get_ivars(dev);
1137 
1138 	pci_cfg_restore(dev, dinfo);
1139 	return (0);
1140 }
1141 
1142 /**
1143  *	t3_os_link_changed - handle link status changes
1144  *	@adapter: the adapter associated with the link change
1145  *	@port_id: the port index whose limk status has changed
1146  *	@link_status: the new status of the link
1147  *	@speed: the new speed setting
1148  *	@duplex: the new duplex setting
1149  *	@fc: the new flow-control setting
1150  *
1151  *	This is the OS-dependent handler for link status changes.  The OS
1152  *	neutral handler takes care of most of the processing for these events,
1153  *	then calls this handler for any OS-specific processing.
1154  */
1155 void
1156 t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1157      int duplex, int fc)
1158 {
1159 	struct port_info *pi = &adapter->port[port_id];
1160 	struct cmac *mac = &adapter->port[port_id].mac;
1161 
1162 	if (link_status) {
1163 		DELAY(10);
1164 		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1165 			/* Clear errors created by MAC enable */
1166 			t3_set_reg_field(adapter,
1167 					 A_XGM_STAT_CTRL + pi->mac.offset,
1168 					 F_CLRSTATS, 1);
1169 		if_link_state_change(pi->ifp, LINK_STATE_UP);
1170 
1171 	} else {
1172 		pi->phy.ops->power_down(&pi->phy, 1);
1173 		t3_mac_disable(mac, MAC_DIRECTION_RX);
1174 		t3_link_start(&pi->phy, mac, &pi->link_config);
1175 		t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1176 		if_link_state_change(pi->ifp, LINK_STATE_DOWN);
1177 	}
1178 }
1179 
1180 /**
1181  *	t3_os_phymod_changed - handle PHY module changes
1182  *	@phy: the PHY reporting the module change
1183  *	@mod_type: new module type
1184  *
1185  *	This is the OS-dependent handler for PHY module changes.  It is
1186  *	invoked when a PHY module is removed or inserted for any OS-specific
1187  *	processing.
1188  */
1189 void t3_os_phymod_changed(struct adapter *adap, int port_id)
1190 {
1191 	static const char *mod_str[] = {
1192 		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX", "unknown"
1193 	};
1194 
1195 	struct port_info *pi = &adap->port[port_id];
1196 
1197 	if (pi->phy.modtype == phy_modtype_none)
1198 		device_printf(adap->dev, "PHY module unplugged\n");
1199 	else {
1200 		KASSERT(pi->phy.modtype < ARRAY_SIZE(mod_str),
1201 		    ("invalid PHY module type %d", pi->phy.modtype));
1202 		device_printf(adap->dev, "%s PHY module inserted\n",
1203 		    mod_str[pi->phy.modtype]);
1204 	}
1205 }
1206 
1207 /*
1208  * Interrupt-context handler for external (PHY) interrupts.
1209  */
1210 void
1211 t3_os_ext_intr_handler(adapter_t *sc)
1212 {
1213 	if (cxgb_debug)
1214 		printf("t3_os_ext_intr_handler\n");
1215 	/*
1216 	 * Schedule a task to handle external interrupts as they may be slow
1217 	 * and we use a mutex to protect MDIO registers.  We disable PHY
1218 	 * interrupts in the meantime and let the task reenable them when
1219 	 * it's done.
1220 	 */
1221 	ADAPTER_LOCK(sc);
1222 	if (sc->slow_intr_mask) {
1223 		sc->slow_intr_mask &= ~F_T3DBG;
1224 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
1225 		taskqueue_enqueue(sc->tq, &sc->ext_intr_task);
1226 	}
1227 	ADAPTER_UNLOCK(sc);
1228 }
1229 
1230 void
1231 t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1232 {
1233 
1234 	/*
1235 	 * The ifnet might not be allocated before this gets called,
1236 	 * as this is called early on in attach by t3_prep_adapter
1237 	 * save the address off in the port structure
1238 	 */
1239 	if (cxgb_debug)
1240 		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1241 	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1242 }
1243 
1244 /**
1245  *	link_start - enable a port
1246  *	@p: the port to enable
1247  *
1248  *	Performs the MAC and PHY actions needed to enable a port.
1249  */
1250 static void
1251 cxgb_link_start(struct port_info *p)
1252 {
1253 	struct ifnet *ifp;
1254 	struct t3_rx_mode rm;
1255 	struct cmac *mac = &p->mac;
1256 	int mtu, hwtagging;
1257 
1258 	ifp = p->ifp;
1259 
1260 	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1261 
1262 	mtu = ifp->if_mtu;
1263 	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1264 		mtu += ETHER_VLAN_ENCAP_LEN;
1265 
1266 	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1267 
1268 	t3_init_rx_mode(&rm, p);
1269 	if (!mac->multiport)
1270 		t3_mac_reset(mac);
1271 	t3_mac_set_mtu(mac, mtu);
1272 	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1273 	t3_mac_set_address(mac, 0, p->hw_addr);
1274 	t3_mac_set_rx_mode(mac, &rm);
1275 	t3_link_start(&p->phy, mac, &p->link_config);
1276 	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1277 }
1278 
1279 
1280 static int
1281 await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1282 			      unsigned long n)
1283 {
1284 	int attempts = 5;
1285 
1286 	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1287 		if (!--attempts)
1288 			return (ETIMEDOUT);
1289 		t3_os_sleep(10);
1290 	}
1291 	return 0;
1292 }
1293 
1294 static int
1295 init_tp_parity(struct adapter *adap)
1296 {
1297 	int i;
1298 	struct mbuf *m;
1299 	struct cpl_set_tcb_field *greq;
1300 	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1301 
1302 	t3_tp_set_offload_mode(adap, 1);
1303 
1304 	for (i = 0; i < 16; i++) {
1305 		struct cpl_smt_write_req *req;
1306 
1307 		m = m_gethdr(M_WAITOK, MT_DATA);
1308 		req = mtod(m, struct cpl_smt_write_req *);
1309 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1310 		memset(req, 0, sizeof(*req));
1311 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1312 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1313 		req->iff = i;
1314 		t3_mgmt_tx(adap, m);
1315 	}
1316 
1317 	for (i = 0; i < 2048; i++) {
1318 		struct cpl_l2t_write_req *req;
1319 
1320 		m = m_gethdr(M_WAITOK, MT_DATA);
1321 		req = mtod(m, struct cpl_l2t_write_req *);
1322 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1323 		memset(req, 0, sizeof(*req));
1324 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1325 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1326 		req->params = htonl(V_L2T_W_IDX(i));
1327 		t3_mgmt_tx(adap, m);
1328 	}
1329 
1330 	for (i = 0; i < 2048; i++) {
1331 		struct cpl_rte_write_req *req;
1332 
1333 		m = m_gethdr(M_WAITOK, MT_DATA);
1334 		req = mtod(m, struct cpl_rte_write_req *);
1335 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1336 		memset(req, 0, sizeof(*req));
1337 		req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1338 		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1339 		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1340 		t3_mgmt_tx(adap, m);
1341 	}
1342 
1343 	m = m_gethdr(M_WAITOK, MT_DATA);
1344 	greq = mtod(m, struct cpl_set_tcb_field *);
1345 	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1346 	memset(greq, 0, sizeof(*greq));
1347 	greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1348 	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1349 	greq->mask = htobe64(1);
1350 	t3_mgmt_tx(adap, m);
1351 
1352 	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1353 	t3_tp_set_offload_mode(adap, 0);
1354 	return (i);
1355 }
1356 
1357 /**
1358  *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1359  *	@adap: the adapter
1360  *
1361  *	Sets up RSS to distribute packets to multiple receive queues.  We
1362  *	configure the RSS CPU lookup table to distribute to the number of HW
1363  *	receive queues, and the response queue lookup table to narrow that
1364  *	down to the response queues actually configured for each port.
1365  *	We always configure the RSS mapping for two ports since the mapping
1366  *	table has plenty of entries.
1367  */
1368 static void
1369 setup_rss(adapter_t *adap)
1370 {
1371 	int i;
1372 	u_int nq[2];
1373 	uint8_t cpus[SGE_QSETS + 1];
1374 	uint16_t rspq_map[RSS_TABLE_SIZE];
1375 
1376 	for (i = 0; i < SGE_QSETS; ++i)
1377 		cpus[i] = i;
1378 	cpus[SGE_QSETS] = 0xff;
1379 
1380 	nq[0] = nq[1] = 0;
1381 	for_each_port(adap, i) {
1382 		const struct port_info *pi = adap2pinfo(adap, i);
1383 
1384 		nq[pi->tx_chan] += pi->nqsets;
1385 	}
1386 	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1387 		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1388 		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1389 	}
1390 	/* Calculate the reverse RSS map table */
1391 	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1392 		if (adap->rrss_map[rspq_map[i]] == 0xff)
1393 			adap->rrss_map[rspq_map[i]] = i;
1394 
1395 	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1396 		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1397 	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1398 	              cpus, rspq_map);
1399 
1400 }
1401 
1402 /*
1403  * Sends an mbuf to an offload queue driver
1404  * after dealing with any active network taps.
1405  */
1406 static inline int
1407 offload_tx(struct t3cdev *tdev, struct mbuf *m)
1408 {
1409 	int ret;
1410 
1411 	ret = t3_offload_tx(tdev, m);
1412 	return (ret);
1413 }
1414 
1415 static int
1416 write_smt_entry(struct adapter *adapter, int idx)
1417 {
1418 	struct port_info *pi = &adapter->port[idx];
1419 	struct cpl_smt_write_req *req;
1420 	struct mbuf *m;
1421 
1422 	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
1423 		return (ENOMEM);
1424 
1425 	req = mtod(m, struct cpl_smt_write_req *);
1426 	m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req);
1427 
1428 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1429 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx));
1430 	req->mtu_idx = NMTUS - 1;  /* should be 0 but there's a T3 bug */
1431 	req->iff = idx;
1432 	memset(req->src_mac1, 0, sizeof(req->src_mac1));
1433 	memcpy(req->src_mac0, pi->hw_addr, ETHER_ADDR_LEN);
1434 
1435 	m_set_priority(m, 1);
1436 
1437 	offload_tx(&adapter->tdev, m);
1438 
1439 	return (0);
1440 }
1441 
1442 static int
1443 init_smt(struct adapter *adapter)
1444 {
1445 	int i;
1446 
1447 	for_each_port(adapter, i)
1448 		write_smt_entry(adapter, i);
1449 	return 0;
1450 }
1451 
1452 static void
1453 init_port_mtus(adapter_t *adapter)
1454 {
1455 	unsigned int mtus = adapter->port[0].ifp->if_mtu;
1456 
1457 	if (adapter->port[1].ifp)
1458 		mtus |= adapter->port[1].ifp->if_mtu << 16;
1459 	t3_write_reg(adapter, A_TP_MTU_PORT_TABLE, mtus);
1460 }
1461 
1462 static void
1463 send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1464 			      int hi, int port)
1465 {
1466 	struct mbuf *m;
1467 	struct mngt_pktsched_wr *req;
1468 
1469 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1470 	if (m) {
1471 		req = mtod(m, struct mngt_pktsched_wr *);
1472 		req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1473 		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1474 		req->sched = sched;
1475 		req->idx = qidx;
1476 		req->min = lo;
1477 		req->max = hi;
1478 		req->binding = port;
1479 		m->m_len = m->m_pkthdr.len = sizeof(*req);
1480 		t3_mgmt_tx(adap, m);
1481 	}
1482 }
1483 
1484 static void
1485 bind_qsets(adapter_t *sc)
1486 {
1487 	int i, j;
1488 
1489 	cxgb_pcpu_startup_threads(sc);
1490 	for (i = 0; i < (sc)->params.nports; ++i) {
1491 		const struct port_info *pi = adap2pinfo(sc, i);
1492 
1493 		for (j = 0; j < pi->nqsets; ++j) {
1494 			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1495 					  -1, pi->tx_chan);
1496 
1497 		}
1498 	}
1499 }
1500 
1501 static void
1502 update_tpeeprom(struct adapter *adap)
1503 {
1504 #ifdef FIRMWARE_LATEST
1505 	const struct firmware *tpeeprom;
1506 #else
1507 	struct firmware *tpeeprom;
1508 #endif
1509 
1510 	uint32_t version;
1511 	unsigned int major, minor;
1512 	int ret, len;
1513 	char rev;
1514 
1515 	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1516 
1517 	major = G_TP_VERSION_MAJOR(version);
1518 	minor = G_TP_VERSION_MINOR(version);
1519 	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1520 		return;
1521 
1522 	rev = t3rev2char(adap);
1523 
1524 	tpeeprom = firmware_get(TPEEPROM_NAME);
1525 	if (tpeeprom == NULL) {
1526 		device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n",
1527 		    TPEEPROM_NAME);
1528 		return;
1529 	}
1530 
1531 	len = tpeeprom->datasize - 4;
1532 
1533 	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1534 	if (ret)
1535 		goto release_tpeeprom;
1536 
1537 	if (len != TP_SRAM_LEN) {
1538 		device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN);
1539 		return;
1540 	}
1541 
1542 	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1543 	    TP_SRAM_OFFSET);
1544 
1545 	if (!ret) {
1546 		device_printf(adap->dev,
1547 			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1548 			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1549 	} else
1550 		device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n");
1551 
1552 release_tpeeprom:
1553 	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1554 
1555 	return;
1556 }
1557 
1558 static int
1559 update_tpsram(struct adapter *adap)
1560 {
1561 #ifdef FIRMWARE_LATEST
1562 	const struct firmware *tpsram;
1563 #else
1564 	struct firmware *tpsram;
1565 #endif
1566 	int ret;
1567 	char rev;
1568 
1569 	rev = t3rev2char(adap);
1570 	if (!rev)
1571 		return 0;
1572 
1573 	update_tpeeprom(adap);
1574 
1575 	tpsram = firmware_get(TPSRAM_NAME);
1576 	if (tpsram == NULL){
1577 		device_printf(adap->dev, "could not load TP SRAM\n");
1578 		return (EINVAL);
1579 	} else
1580 		device_printf(adap->dev, "updating TP SRAM\n");
1581 
1582 	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1583 	if (ret)
1584 		goto release_tpsram;
1585 
1586 	ret = t3_set_proto_sram(adap, tpsram->data);
1587 	if (ret)
1588 		device_printf(adap->dev, "loading protocol SRAM failed\n");
1589 
1590 release_tpsram:
1591 	firmware_put(tpsram, FIRMWARE_UNLOAD);
1592 
1593 	return ret;
1594 }
1595 
1596 /**
1597  *	cxgb_up - enable the adapter
1598  *	@adap: adapter being enabled
1599  *
1600  *	Called when the first port is enabled, this function performs the
1601  *	actions necessary to make an adapter operational, such as completing
1602  *	the initialization of HW modules, and enabling interrupts.
1603  *
1604  */
1605 static int
1606 cxgb_up(struct adapter *sc)
1607 {
1608 	int err = 0;
1609 
1610 	if ((sc->flags & FULL_INIT_DONE) == 0) {
1611 
1612 		if ((sc->flags & FW_UPTODATE) == 0)
1613 			if ((err = upgrade_fw(sc)))
1614 				goto out;
1615 		if ((sc->flags & TPS_UPTODATE) == 0)
1616 			if ((err = update_tpsram(sc)))
1617 				goto out;
1618 		err = t3_init_hw(sc, 0);
1619 		if (err)
1620 			goto out;
1621 
1622 		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1623 		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1624 
1625 		err = setup_sge_qsets(sc);
1626 		if (err)
1627 			goto out;
1628 
1629 		setup_rss(sc);
1630 		t3_add_configured_sysctls(sc);
1631 		sc->flags |= FULL_INIT_DONE;
1632 	}
1633 
1634 	t3_intr_clear(sc);
1635 
1636 	/* If it's MSI or INTx, allocate a single interrupt for everything */
1637 	if ((sc->flags & USING_MSIX) == 0) {
1638 		if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
1639 		   &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
1640 			device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
1641 			    sc->irq_rid);
1642 			err = EINVAL;
1643 			goto out;
1644 		}
1645 		device_printf(sc->dev, "allocated irq_res=%p\n", sc->irq_res);
1646 
1647 		if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
1648 #ifdef INTR_FILTERS
1649 			NULL,
1650 #endif
1651 			sc->cxgb_intr, sc, &sc->intr_tag)) {
1652 			device_printf(sc->dev, "Cannot set up interrupt\n");
1653 			err = EINVAL;
1654 			goto irq_err;
1655 		}
1656 	} else {
1657 		cxgb_setup_msix(sc, sc->msi_count);
1658 	}
1659 
1660 	t3_sge_start(sc);
1661 	t3_intr_enable(sc);
1662 
1663 	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1664 	    is_offload(sc) && init_tp_parity(sc) == 0)
1665 		sc->flags |= TP_PARITY_INIT;
1666 
1667 	if (sc->flags & TP_PARITY_INIT) {
1668 		t3_write_reg(sc, A_TP_INT_CAUSE,
1669 				F_CMCACHEPERR | F_ARPLUTPERR);
1670 		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1671 	}
1672 
1673 
1674 	if (!(sc->flags & QUEUES_BOUND)) {
1675 		bind_qsets(sc);
1676 		sc->flags |= QUEUES_BOUND;
1677 	}
1678 out:
1679 	return (err);
1680 irq_err:
1681 	CH_ERR(sc, "request_irq failed, err %d\n", err);
1682 	goto out;
1683 }
1684 
1685 
1686 /*
1687  * Release resources when all the ports and offloading have been stopped.
1688  */
1689 static void
1690 cxgb_down_locked(struct adapter *sc)
1691 {
1692 
1693 	t3_sge_stop(sc);
1694 	t3_intr_disable(sc);
1695 
1696 	if (sc->intr_tag != NULL) {
1697 		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
1698 		sc->intr_tag = NULL;
1699 	}
1700 	if (sc->irq_res != NULL) {
1701 		device_printf(sc->dev, "de-allocating interrupt irq_rid=%d irq_res=%p\n",
1702 		    sc->irq_rid, sc->irq_res);
1703 		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
1704 		    sc->irq_res);
1705 		sc->irq_res = NULL;
1706 	}
1707 
1708 	if (sc->flags & USING_MSIX)
1709 		cxgb_teardown_msix(sc);
1710 
1711 	callout_stop(&sc->cxgb_tick_ch);
1712 	callout_stop(&sc->sge_timer_ch);
1713 	callout_drain(&sc->cxgb_tick_ch);
1714 	callout_drain(&sc->sge_timer_ch);
1715 
1716 	if (sc->tq != NULL) {
1717 		printf("draining slow intr\n");
1718 
1719 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
1720 			printf("draining ext intr\n");
1721 		taskqueue_drain(sc->tq, &sc->ext_intr_task);
1722 		printf("draining tick task\n");
1723 		taskqueue_drain(sc->tq, &sc->tick_task);
1724 	}
1725 	ADAPTER_UNLOCK(sc);
1726 }
1727 
1728 static int
1729 offload_open(struct port_info *pi)
1730 {
1731 	struct adapter *adapter = pi->adapter;
1732 	struct t3cdev *tdev = &adapter->tdev;
1733 
1734 	int adap_up = adapter->open_device_map & PORT_MASK;
1735 	int err = 0;
1736 
1737 	if (atomic_cmpset_int(&adapter->open_device_map,
1738 		(adapter->open_device_map & ~(1<<OFFLOAD_DEVMAP_BIT)),
1739 		(adapter->open_device_map | (1<<OFFLOAD_DEVMAP_BIT))) == 0)
1740 		return (0);
1741 
1742 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1743 		printf("offload_open: DEVMAP_BIT did not get set 0x%x\n",
1744 		    adapter->open_device_map);
1745 	ADAPTER_LOCK(pi->adapter);
1746 	if (!adap_up)
1747 		err = cxgb_up(adapter);
1748 	ADAPTER_UNLOCK(pi->adapter);
1749 	if (err)
1750 		return (err);
1751 
1752 	t3_tp_set_offload_mode(adapter, 1);
1753 	tdev->lldev = pi->ifp;
1754 
1755 	init_port_mtus(adapter);
1756 	t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd,
1757 		     adapter->params.b_wnd,
1758 		     adapter->params.rev == 0 ?
1759 		       adapter->port[0].ifp->if_mtu : 0xffff);
1760 	init_smt(adapter);
1761 	/* Call back all registered clients */
1762 	cxgb_add_clients(tdev);
1763 
1764 	/* restore them in case the offload module has changed them */
1765 	if (err) {
1766 		t3_tp_set_offload_mode(adapter, 0);
1767 		clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1768 		cxgb_set_dummy_ops(tdev);
1769 	}
1770 	return (err);
1771 }
1772 
1773 static int
1774 offload_close(struct t3cdev *tdev)
1775 {
1776 	struct adapter *adapter = tdev2adap(tdev);
1777 
1778 	if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT))
1779 		return (0);
1780 
1781 	/* Call back all registered clients */
1782 	cxgb_remove_clients(tdev);
1783 
1784 	tdev->lldev = NULL;
1785 	cxgb_set_dummy_ops(tdev);
1786 	t3_tp_set_offload_mode(adapter, 0);
1787 	clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
1788 
1789 	ADAPTER_LOCK(adapter);
1790 	if (!adapter->open_device_map)
1791 		cxgb_down_locked(adapter);
1792 	else
1793 		ADAPTER_UNLOCK(adapter);
1794 	return (0);
1795 }
1796 
1797 
1798 static void
1799 cxgb_init(void *arg)
1800 {
1801 	struct port_info *p = arg;
1802 
1803 	PORT_LOCK(p);
1804 	cxgb_init_locked(p);
1805 	PORT_UNLOCK(p);
1806 }
1807 
1808 static void
1809 cxgb_init_locked(struct port_info *p)
1810 {
1811 	struct ifnet *ifp;
1812 	adapter_t *sc = p->adapter;
1813 	int err;
1814 
1815 	PORT_LOCK_ASSERT_OWNED(p);
1816 	ifp = p->ifp;
1817 
1818 	ADAPTER_LOCK(p->adapter);
1819 	if ((sc->open_device_map == 0) && (err = cxgb_up(sc))) {
1820 		ADAPTER_UNLOCK(p->adapter);
1821 		cxgb_stop_locked(p);
1822 		return;
1823 	}
1824 	if (p->adapter->open_device_map == 0) {
1825 		t3_intr_clear(sc);
1826 	}
1827 	setbit(&p->adapter->open_device_map, p->port_id);
1828 	ADAPTER_UNLOCK(p->adapter);
1829 
1830 	if (is_offload(sc) && !ofld_disable) {
1831 		err = offload_open(p);
1832 		if (err)
1833 			log(LOG_WARNING,
1834 			    "Could not initialize offload capabilities\n");
1835 	}
1836 #if !defined(LINK_ATTACH)
1837 	cxgb_link_start(p);
1838 	t3_link_changed(sc, p->port_id);
1839 #endif
1840 	ifp->if_baudrate = IF_Mbps(p->link_config.speed);
1841 
1842 	device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id);
1843 	t3_port_intr_enable(sc, p->port_id);
1844 
1845 	t3_sge_reset_adapter(sc);
1846 
1847 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1848 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1849 }
1850 
1851 static void
1852 cxgb_set_rxmode(struct port_info *p)
1853 {
1854 	struct t3_rx_mode rm;
1855 	struct cmac *mac = &p->mac;
1856 
1857 	t3_init_rx_mode(&rm, p);
1858 	mtx_lock(&p->adapter->mdio_lock);
1859 	t3_mac_set_rx_mode(mac, &rm);
1860 	mtx_unlock(&p->adapter->mdio_lock);
1861 }
1862 
1863 static void
1864 cxgb_stop_locked(struct port_info *pi)
1865 {
1866 	struct ifnet *ifp;
1867 
1868 	PORT_LOCK_ASSERT_OWNED(pi);
1869 	ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter);
1870 
1871 	ifp = pi->ifp;
1872 	t3_port_intr_disable(pi->adapter, pi->port_id);
1873 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1874 
1875 	/* disable pause frames */
1876 	t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset,
1877 			 F_TXPAUSEEN, 0);
1878 
1879 	/* Reset RX FIFO HWM */
1880         t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1881 			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1882 
1883 
1884 	ADAPTER_LOCK(pi->adapter);
1885 	clrbit(&pi->adapter->open_device_map, pi->port_id);
1886 
1887 	if (pi->adapter->open_device_map == 0) {
1888 		cxgb_down_locked(pi->adapter);
1889 	} else
1890 		ADAPTER_UNLOCK(pi->adapter);
1891 
1892 #if !defined(LINK_ATTACH)
1893 	DELAY(100);
1894 
1895 	/* Wait for TXFIFO empty */
1896 	t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset,
1897 			F_TXFIFO_EMPTY, 1, 20, 5);
1898 
1899 	DELAY(100);
1900 	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
1901 
1902 	pi->phy.ops->power_down(&pi->phy, 1);
1903 #endif
1904 
1905 }
1906 
1907 static int
1908 cxgb_set_mtu(struct port_info *p, int mtu)
1909 {
1910 	struct ifnet *ifp = p->ifp;
1911 	int error = 0;
1912 
1913 	if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
1914 		error = EINVAL;
1915 	else if (ifp->if_mtu != mtu) {
1916 		PORT_LOCK(p);
1917 		ifp->if_mtu = mtu;
1918 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1919 			cxgb_stop_locked(p);
1920 			cxgb_init_locked(p);
1921 		}
1922 		PORT_UNLOCK(p);
1923 	}
1924 	return (error);
1925 }
1926 
1927 #ifdef LRO_SUPPORTED
1928 /*
1929  * Mark lro enabled or disabled in all qsets for this port
1930  */
1931 static int
1932 cxgb_set_lro(struct port_info *p, int enabled)
1933 {
1934 	int i;
1935 	struct adapter *adp = p->adapter;
1936 	struct sge_qset *q;
1937 
1938 	PORT_LOCK_ASSERT_OWNED(p);
1939 	for (i = 0; i < p->nqsets; i++) {
1940 		q = &adp->sge.qs[p->first_qset + i];
1941 		q->lro.enabled = (enabled != 0);
1942 	}
1943 	return (0);
1944 }
1945 #endif
1946 
1947 static int
1948 cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1949 {
1950 	struct port_info *p = ifp->if_softc;
1951 #ifdef INET
1952 	struct ifaddr *ifa = (struct ifaddr *)data;
1953 #endif
1954 	struct ifreq *ifr = (struct ifreq *)data;
1955 	int flags, error = 0, reinit = 0;
1956 	uint32_t mask;
1957 
1958 	/*
1959 	 * XXX need to check that we aren't in the middle of an unload
1960 	 */
1961 	switch (command) {
1962 	case SIOCSIFMTU:
1963 		error = cxgb_set_mtu(p, ifr->ifr_mtu);
1964 		break;
1965 	case SIOCSIFADDR:
1966 #ifdef INET
1967 		if (ifa->ifa_addr->sa_family == AF_INET) {
1968 			ifp->if_flags |= IFF_UP;
1969 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1970 				PORT_LOCK(p);
1971 				cxgb_init_locked(p);
1972 				PORT_UNLOCK(p);
1973 			}
1974 			arp_ifinit(ifp, ifa);
1975 		} else
1976 #endif
1977 			error = ether_ioctl(ifp, command, data);
1978 		break;
1979 	case SIOCSIFFLAGS:
1980 		PORT_LOCK(p);
1981 		if (ifp->if_flags & IFF_UP) {
1982 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1983 				flags = p->if_flags;
1984 				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1985 				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI))
1986 					cxgb_set_rxmode(p);
1987 			} else
1988 				cxgb_init_locked(p);
1989 			p->if_flags = ifp->if_flags;
1990 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1991 			cxgb_stop_locked(p);
1992 
1993 		PORT_UNLOCK(p);
1994 		break;
1995 	case SIOCADDMULTI:
1996 	case SIOCDELMULTI:
1997 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1998 			cxgb_set_rxmode(p);
1999 		}
2000 		break;
2001 	case SIOCSIFMEDIA:
2002 	case SIOCGIFMEDIA:
2003 		PORT_LOCK(p);
2004 		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2005 		PORT_UNLOCK(p);
2006 		break;
2007 	case SIOCSIFCAP:
2008 		PORT_LOCK(p);
2009 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2010 		if (mask & IFCAP_TXCSUM) {
2011 			if (IFCAP_TXCSUM & ifp->if_capenable) {
2012 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
2013 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
2014 				    | CSUM_IP | CSUM_TSO);
2015 			} else {
2016 				ifp->if_capenable |= IFCAP_TXCSUM;
2017 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
2018 				    | CSUM_IP);
2019 			}
2020 		}
2021 		if (mask & IFCAP_RXCSUM) {
2022 			ifp->if_capenable ^= IFCAP_RXCSUM;
2023 		}
2024 		if (mask & IFCAP_TSO4) {
2025 			if (IFCAP_TSO4 & ifp->if_capenable) {
2026 				ifp->if_capenable &= ~IFCAP_TSO4;
2027 				ifp->if_hwassist &= ~CSUM_TSO;
2028 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
2029 				ifp->if_capenable |= IFCAP_TSO4;
2030 				ifp->if_hwassist |= CSUM_TSO;
2031 			} else {
2032 				if (cxgb_debug)
2033 					printf("cxgb requires tx checksum offload"
2034 					    " be enabled to use TSO\n");
2035 				error = EINVAL;
2036 			}
2037 		}
2038 #ifdef LRO_SUPPORTED
2039 		if (mask & IFCAP_LRO) {
2040 			ifp->if_capenable ^= IFCAP_LRO;
2041 
2042 			/* Safe to do this even if cxgb_up not called yet */
2043 			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2044 		}
2045 #endif
2046 		if (mask & IFCAP_VLAN_HWTAGGING) {
2047 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2048 			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2049 		}
2050 		if (mask & IFCAP_VLAN_MTU) {
2051 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2052 			reinit = ifp->if_drv_flags & IFF_DRV_RUNNING;
2053 		}
2054 		if (mask & IFCAP_VLAN_HWCSUM) {
2055 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2056 		}
2057 		if (reinit) {
2058 			cxgb_stop_locked(p);
2059 			cxgb_init_locked(p);
2060 		}
2061 		PORT_UNLOCK(p);
2062 
2063 #ifdef VLAN_CAPABILITIES
2064 		VLAN_CAPABILITIES(ifp);
2065 #endif
2066 		break;
2067 	default:
2068 		error = ether_ioctl(ifp, command, data);
2069 		break;
2070 	}
2071 	return (error);
2072 }
2073 
2074 static int
2075 cxgb_media_change(struct ifnet *ifp)
2076 {
2077 	if_printf(ifp, "media change not supported\n");
2078 	return (ENXIO);
2079 }
2080 
2081 /*
2082  * Translates from phy->modtype to IFM_TYPE.
2083  */
2084 static int
2085 cxgb_ifm_type(int phymod)
2086 {
2087 	int rc = IFM_ETHER | IFM_FDX;
2088 
2089 	switch (phymod) {
2090 	case phy_modtype_sr:
2091 		rc |= IFM_10G_SR;
2092 		break;
2093 	case phy_modtype_lr:
2094 		rc |= IFM_10G_LR;
2095 		break;
2096 	case phy_modtype_lrm:
2097 #ifdef IFM_10G_LRM
2098 		rc |= IFM_10G_LRM;
2099 #endif
2100 		break;
2101 	case phy_modtype_twinax:
2102 #ifdef IFM_10G_TWINAX
2103 		rc |= IFM_10G_TWINAX;
2104 #endif
2105 		break;
2106 	case phy_modtype_twinax_long:
2107 #ifdef IFM_10G_TWINAX_LONG
2108 		rc |= IFM_10G_TWINAX_LONG;
2109 #endif
2110 		break;
2111 	case phy_modtype_none:
2112 		rc = IFM_ETHER | IFM_NONE;
2113 		break;
2114 	case phy_modtype_unknown:
2115 		break;
2116 	}
2117 
2118 	return (rc);
2119 }
2120 
2121 static void
2122 cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2123 {
2124 	struct port_info *p = ifp->if_softc;
2125 	struct ifmedia_entry *cur = p->media.ifm_cur;
2126 	int m;
2127 
2128 	if (cur->ifm_data != p->phy.modtype) {
2129 		/* p->media about to be rebuilt, must hold lock */
2130 		PORT_LOCK_ASSERT_OWNED(p);
2131 
2132 		m = cxgb_ifm_type(p->phy.modtype);
2133 		ifmedia_removeall(&p->media);
2134 		ifmedia_add(&p->media, m, p->phy.modtype, NULL);
2135 		ifmedia_set(&p->media, m);
2136 		cur = p->media.ifm_cur; /* ifmedia_set modified ifm_cur */
2137 		ifmr->ifm_current = m;
2138 	}
2139 
2140 	ifmr->ifm_status = IFM_AVALID;
2141 	ifmr->ifm_active = IFM_ETHER;
2142 
2143 	if (!p->link_config.link_ok)
2144 		return;
2145 
2146 	ifmr->ifm_status |= IFM_ACTIVE;
2147 
2148 	switch (p->link_config.speed) {
2149 	case 10:
2150 		ifmr->ifm_active |= IFM_10_T;
2151 		break;
2152 	case 100:
2153 		ifmr->ifm_active |= IFM_100_TX;
2154 			break;
2155 	case 1000:
2156 		ifmr->ifm_active |= IFM_1000_T;
2157 		break;
2158 	case 10000:
2159 		ifmr->ifm_active |= IFM_SUBTYPE(cur->ifm_media);
2160 		break;
2161 	}
2162 
2163 	if (p->link_config.duplex)
2164 		ifmr->ifm_active |= IFM_FDX;
2165 	else
2166 		ifmr->ifm_active |= IFM_HDX;
2167 }
2168 
2169 static void
2170 cxgb_async_intr(void *data)
2171 {
2172 	adapter_t *sc = data;
2173 
2174 	if (cxgb_debug)
2175 		device_printf(sc->dev, "cxgb_async_intr\n");
2176 	/*
2177 	 * May need to sleep - defer to taskqueue
2178 	 */
2179 	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2180 }
2181 
2182 static void
2183 cxgb_ext_intr_handler(void *arg, int count)
2184 {
2185 	adapter_t *sc = (adapter_t *)arg;
2186 
2187 	if (cxgb_debug)
2188 		printf("cxgb_ext_intr_handler\n");
2189 
2190 	t3_phy_intr_handler(sc);
2191 
2192 	/* Now reenable external interrupts */
2193 	ADAPTER_LOCK(sc);
2194 	if (sc->slow_intr_mask) {
2195 		sc->slow_intr_mask |= F_T3DBG;
2196 		t3_write_reg(sc, A_PL_INT_CAUSE0, F_T3DBG);
2197 		t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
2198 	}
2199 	ADAPTER_UNLOCK(sc);
2200 }
2201 
2202 static void
2203 check_link_status(adapter_t *sc)
2204 {
2205 	int i;
2206 
2207 	for (i = 0; i < (sc)->params.nports; ++i) {
2208 		struct port_info *p = &sc->port[i];
2209 
2210 		if (!(p->phy.caps & SUPPORTED_IRQ))
2211 			t3_link_changed(sc, i);
2212 		p->ifp->if_baudrate = IF_Mbps(p->link_config.speed);
2213 	}
2214 }
2215 
2216 static void
2217 check_t3b2_mac(struct adapter *adapter)
2218 {
2219 	int i;
2220 
2221 	if(adapter->flags & CXGB_SHUTDOWN)
2222 		return;
2223 
2224 	for_each_port(adapter, i) {
2225 		struct port_info *p = &adapter->port[i];
2226 		struct ifnet *ifp = p->ifp;
2227 		int status;
2228 
2229 		if(adapter->flags & CXGB_SHUTDOWN)
2230 			return;
2231 
2232 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2233 			continue;
2234 
2235 		status = 0;
2236 		PORT_LOCK(p);
2237 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
2238 			status = t3b2_mac_watchdog_task(&p->mac);
2239 		if (status == 1)
2240 			p->mac.stats.num_toggled++;
2241 		else if (status == 2) {
2242 			struct cmac *mac = &p->mac;
2243 			int mtu = ifp->if_mtu;
2244 
2245 			if (ifp->if_capenable & IFCAP_VLAN_MTU)
2246 				mtu += ETHER_VLAN_ENCAP_LEN;
2247 			t3_mac_set_mtu(mac, mtu);
2248 			t3_mac_set_address(mac, 0, p->hw_addr);
2249 			cxgb_set_rxmode(p);
2250 			t3_link_start(&p->phy, mac, &p->link_config);
2251 			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2252 			t3_port_intr_enable(adapter, p->port_id);
2253 			p->mac.stats.num_resets++;
2254 		}
2255 		PORT_UNLOCK(p);
2256 	}
2257 }
2258 
2259 static void
2260 cxgb_tick(void *arg)
2261 {
2262 	adapter_t *sc = (adapter_t *)arg;
2263 
2264 	if(sc->flags & CXGB_SHUTDOWN)
2265 		return;
2266 
2267 	taskqueue_enqueue(sc->tq, &sc->tick_task);
2268 	callout_reset(&sc->cxgb_tick_ch, CXGB_TICKS(sc), cxgb_tick, sc);
2269 }
2270 
2271 static void
2272 cxgb_tick_handler(void *arg, int count)
2273 {
2274 	adapter_t *sc = (adapter_t *)arg;
2275 	const struct adapter_params *p = &sc->params;
2276 	int i;
2277 
2278 	if(sc->flags & CXGB_SHUTDOWN)
2279 		return;
2280 
2281 	ADAPTER_LOCK(sc);
2282 	if (p->linkpoll_period)
2283 		check_link_status(sc);
2284 
2285 
2286 	sc->check_task_cnt++;
2287 
2288 	/*
2289 	 * adapter lock can currently only be acquired after the
2290 	 * port lock
2291 	 */
2292 	ADAPTER_UNLOCK(sc);
2293 
2294 	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2295 		check_t3b2_mac(sc);
2296 
2297 	for (i = 0; i < sc->params.nports; i++) {
2298 		struct port_info *pi = &sc->port[i];
2299 		struct ifnet *ifp = pi->ifp;
2300 		struct mac_stats *mstats = &pi->mac.stats;
2301 		PORT_LOCK(pi);
2302 		t3_mac_update_stats(&pi->mac);
2303 		PORT_UNLOCK(pi);
2304 
2305 
2306 		ifp->if_opackets =
2307 		    mstats->tx_frames_64 +
2308 		    mstats->tx_frames_65_127 +
2309 		    mstats->tx_frames_128_255 +
2310 		    mstats->tx_frames_256_511 +
2311 		    mstats->tx_frames_512_1023 +
2312 		    mstats->tx_frames_1024_1518 +
2313 		    mstats->tx_frames_1519_max;
2314 
2315 		ifp->if_ipackets =
2316 		    mstats->rx_frames_64 +
2317 		    mstats->rx_frames_65_127 +
2318 		    mstats->rx_frames_128_255 +
2319 		    mstats->rx_frames_256_511 +
2320 		    mstats->rx_frames_512_1023 +
2321 		    mstats->rx_frames_1024_1518 +
2322 		    mstats->rx_frames_1519_max;
2323 
2324 		ifp->if_obytes = mstats->tx_octets;
2325 		ifp->if_ibytes = mstats->rx_octets;
2326 		ifp->if_omcasts = mstats->tx_mcast_frames;
2327 		ifp->if_imcasts = mstats->rx_mcast_frames;
2328 
2329 		ifp->if_collisions =
2330 		    mstats->tx_total_collisions;
2331 
2332 		ifp->if_iqdrops = mstats->rx_cong_drops;
2333 
2334 		ifp->if_oerrors =
2335 		    mstats->tx_excess_collisions +
2336 		    mstats->tx_underrun +
2337 		    mstats->tx_len_errs +
2338 		    mstats->tx_mac_internal_errs +
2339 		    mstats->tx_excess_deferral +
2340 		    mstats->tx_fcs_errs;
2341 		ifp->if_ierrors =
2342 		    mstats->rx_jabber +
2343 		    mstats->rx_data_errs +
2344 		    mstats->rx_sequence_errs +
2345 		    mstats->rx_runt +
2346 		    mstats->rx_too_long +
2347 		    mstats->rx_mac_internal_errs +
2348 		    mstats->rx_short +
2349 		    mstats->rx_fcs_errs;
2350 	}
2351 }
2352 
2353 static void
2354 touch_bars(device_t dev)
2355 {
2356 	/*
2357 	 * Don't enable yet
2358 	 */
2359 #if !defined(__LP64__) && 0
2360 	u32 v;
2361 
2362 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2363 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2364 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2365 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2366 	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2367 	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2368 #endif
2369 }
2370 
2371 static int
2372 set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2373 {
2374 	uint8_t *buf;
2375 	int err = 0;
2376 	u32 aligned_offset, aligned_len, *p;
2377 	struct adapter *adapter = pi->adapter;
2378 
2379 
2380 	aligned_offset = offset & ~3;
2381 	aligned_len = (len + (offset & 3) + 3) & ~3;
2382 
2383 	if (aligned_offset != offset || aligned_len != len) {
2384 		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2385 		if (!buf)
2386 			return (ENOMEM);
2387 		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2388 		if (!err && aligned_len > 4)
2389 			err = t3_seeprom_read(adapter,
2390 					      aligned_offset + aligned_len - 4,
2391 					      (u32 *)&buf[aligned_len - 4]);
2392 		if (err)
2393 			goto out;
2394 		memcpy(buf + (offset & 3), data, len);
2395 	} else
2396 		buf = (uint8_t *)(uintptr_t)data;
2397 
2398 	err = t3_seeprom_wp(adapter, 0);
2399 	if (err)
2400 		goto out;
2401 
2402 	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2403 		err = t3_seeprom_write(adapter, aligned_offset, *p);
2404 		aligned_offset += 4;
2405 	}
2406 
2407 	if (!err)
2408 		err = t3_seeprom_wp(adapter, 1);
2409 out:
2410 	if (buf != data)
2411 		free(buf, M_DEVBUF);
2412 	return err;
2413 }
2414 
2415 
2416 static int
2417 in_range(int val, int lo, int hi)
2418 {
2419 	return val < 0 || (val <= hi && val >= lo);
2420 }
2421 
2422 static int
2423 cxgb_extension_open(struct cdev *dev, int flags, int fmp, d_thread_t *td)
2424 {
2425        return (0);
2426 }
2427 
2428 static int
2429 cxgb_extension_close(struct cdev *dev, int flags, int fmt, d_thread_t *td)
2430 {
2431        return (0);
2432 }
2433 
2434 static int
2435 cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2436     int fflag, struct thread *td)
2437 {
2438 	int mmd, error = 0;
2439 	struct port_info *pi = dev->si_drv1;
2440 	adapter_t *sc = pi->adapter;
2441 
2442 #ifdef PRIV_SUPPORTED
2443 	if (priv_check(td, PRIV_DRIVER)) {
2444 		if (cxgb_debug)
2445 			printf("user does not have access to privileged ioctls\n");
2446 		return (EPERM);
2447 	}
2448 #else
2449 	if (suser(td)) {
2450 		if (cxgb_debug)
2451 			printf("user does not have access to privileged ioctls\n");
2452 		return (EPERM);
2453 	}
2454 #endif
2455 
2456 	switch (cmd) {
2457 	case CHELSIO_GET_MIIREG: {
2458 		uint32_t val;
2459 		struct cphy *phy = &pi->phy;
2460 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2461 
2462 		if (!phy->mdio_read)
2463 			return (EOPNOTSUPP);
2464 		if (is_10G(sc)) {
2465 			mmd = mid->phy_id >> 8;
2466 			if (!mmd)
2467 				mmd = MDIO_DEV_PCS;
2468 			else if (mmd > MDIO_DEV_XGXS)
2469 				return (EINVAL);
2470 
2471 			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2472 					     mid->reg_num, &val);
2473 		} else
2474 		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2475 					     mid->reg_num & 0x1f, &val);
2476 		if (error == 0)
2477 			mid->val_out = val;
2478 		break;
2479 	}
2480 	case CHELSIO_SET_MIIREG: {
2481 		struct cphy *phy = &pi->phy;
2482 		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2483 
2484 		if (!phy->mdio_write)
2485 			return (EOPNOTSUPP);
2486 		if (is_10G(sc)) {
2487 			mmd = mid->phy_id >> 8;
2488 			if (!mmd)
2489 				mmd = MDIO_DEV_PCS;
2490 			else if (mmd > MDIO_DEV_XGXS)
2491 				return (EINVAL);
2492 
2493 			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2494 					      mmd, mid->reg_num, mid->val_in);
2495 		} else
2496 			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2497 					      mid->reg_num & 0x1f,
2498 					      mid->val_in);
2499 		break;
2500 	}
2501 	case CHELSIO_SETREG: {
2502 		struct ch_reg *edata = (struct ch_reg *)data;
2503 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2504 			return (EFAULT);
2505 		t3_write_reg(sc, edata->addr, edata->val);
2506 		break;
2507 	}
2508 	case CHELSIO_GETREG: {
2509 		struct ch_reg *edata = (struct ch_reg *)data;
2510 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2511 			return (EFAULT);
2512 		edata->val = t3_read_reg(sc, edata->addr);
2513 		break;
2514 	}
2515 	case CHELSIO_GET_SGE_CONTEXT: {
2516 		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2517 		mtx_lock_spin(&sc->sge.reg_lock);
2518 		switch (ecntxt->cntxt_type) {
2519 		case CNTXT_TYPE_EGRESS:
2520 			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2521 			    ecntxt->data);
2522 			break;
2523 		case CNTXT_TYPE_FL:
2524 			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2525 			    ecntxt->data);
2526 			break;
2527 		case CNTXT_TYPE_RSP:
2528 			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2529 			    ecntxt->data);
2530 			break;
2531 		case CNTXT_TYPE_CQ:
2532 			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2533 			    ecntxt->data);
2534 			break;
2535 		default:
2536 			error = EINVAL;
2537 			break;
2538 		}
2539 		mtx_unlock_spin(&sc->sge.reg_lock);
2540 		break;
2541 	}
2542 	case CHELSIO_GET_SGE_DESC: {
2543 		struct ch_desc *edesc = (struct ch_desc *)data;
2544 		int ret;
2545 		if (edesc->queue_num >= SGE_QSETS * 6)
2546 			return (EINVAL);
2547 		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2548 		    edesc->queue_num % 6, edesc->idx, edesc->data);
2549 		if (ret < 0)
2550 			return (EINVAL);
2551 		edesc->size = ret;
2552 		break;
2553 	}
2554 	case CHELSIO_GET_QSET_PARAMS: {
2555 		struct qset_params *q;
2556 		struct ch_qset_params *t = (struct ch_qset_params *)data;
2557 		int q1 = pi->first_qset;
2558 		int nqsets = pi->nqsets;
2559 		int i;
2560 
2561 		if (t->qset_idx >= nqsets)
2562 			return EINVAL;
2563 
2564 		i = q1 + t->qset_idx;
2565 		q = &sc->params.sge.qset[i];
2566 		t->rspq_size   = q->rspq_size;
2567 		t->txq_size[0] = q->txq_size[0];
2568 		t->txq_size[1] = q->txq_size[1];
2569 		t->txq_size[2] = q->txq_size[2];
2570 		t->fl_size[0]  = q->fl_size;
2571 		t->fl_size[1]  = q->jumbo_size;
2572 		t->polling     = q->polling;
2573 		t->lro         = q->lro;
2574 		t->intr_lat    = q->coalesce_usecs;
2575 		t->cong_thres  = q->cong_thres;
2576 		t->qnum        = i;
2577 
2578 		if (sc->flags & USING_MSIX)
2579 			t->vector = rman_get_start(sc->msix_irq_res[i]);
2580 		else
2581 			t->vector = rman_get_start(sc->irq_res);
2582 
2583 		break;
2584 	}
2585 	case CHELSIO_GET_QSET_NUM: {
2586 		struct ch_reg *edata = (struct ch_reg *)data;
2587 		edata->val = pi->nqsets;
2588 		break;
2589 	}
2590 	case CHELSIO_LOAD_FW: {
2591 		uint8_t *fw_data;
2592 		uint32_t vers;
2593 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2594 
2595 		/*
2596 		 * You're allowed to load a firmware only before FULL_INIT_DONE
2597 		 *
2598 		 * FW_UPTODATE is also set so the rest of the initialization
2599 		 * will not overwrite what was loaded here.  This gives you the
2600 		 * flexibility to load any firmware (and maybe shoot yourself in
2601 		 * the foot).
2602 		 */
2603 
2604 		ADAPTER_LOCK(sc);
2605 		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2606 			ADAPTER_UNLOCK(sc);
2607 			return (EBUSY);
2608 		}
2609 
2610 		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2611 		if (!fw_data)
2612 			error = ENOMEM;
2613 		else
2614 			error = copyin(t->buf, fw_data, t->len);
2615 
2616 		if (!error)
2617 			error = -t3_load_fw(sc, fw_data, t->len);
2618 
2619 		if (t3_get_fw_version(sc, &vers) == 0) {
2620 			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2621 			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2622 			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2623 		}
2624 
2625 		if (!error)
2626 			sc->flags |= FW_UPTODATE;
2627 
2628 		free(fw_data, M_DEVBUF);
2629 		ADAPTER_UNLOCK(sc);
2630 		break;
2631 	}
2632 	case CHELSIO_LOAD_BOOT: {
2633 		uint8_t *boot_data;
2634 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2635 
2636 		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2637 		if (!boot_data)
2638 			return ENOMEM;
2639 
2640 		error = copyin(t->buf, boot_data, t->len);
2641 		if (!error)
2642 			error = -t3_load_boot(sc, boot_data, t->len);
2643 
2644 		free(boot_data, M_DEVBUF);
2645 		break;
2646 	}
2647 	case CHELSIO_GET_PM: {
2648 		struct ch_pm *m = (struct ch_pm *)data;
2649 		struct tp_params *p = &sc->params.tp;
2650 
2651 		if (!is_offload(sc))
2652 			return (EOPNOTSUPP);
2653 
2654 		m->tx_pg_sz = p->tx_pg_size;
2655 		m->tx_num_pg = p->tx_num_pgs;
2656 		m->rx_pg_sz  = p->rx_pg_size;
2657 		m->rx_num_pg = p->rx_num_pgs;
2658 		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2659 
2660 		break;
2661 	}
2662 	case CHELSIO_SET_PM: {
2663 		struct ch_pm *m = (struct ch_pm *)data;
2664 		struct tp_params *p = &sc->params.tp;
2665 
2666 		if (!is_offload(sc))
2667 			return (EOPNOTSUPP);
2668 		if (sc->flags & FULL_INIT_DONE)
2669 			return (EBUSY);
2670 
2671 		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2672 		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2673 			return (EINVAL);	/* not power of 2 */
2674 		if (!(m->rx_pg_sz & 0x14000))
2675 			return (EINVAL);	/* not 16KB or 64KB */
2676 		if (!(m->tx_pg_sz & 0x1554000))
2677 			return (EINVAL);
2678 		if (m->tx_num_pg == -1)
2679 			m->tx_num_pg = p->tx_num_pgs;
2680 		if (m->rx_num_pg == -1)
2681 			m->rx_num_pg = p->rx_num_pgs;
2682 		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2683 			return (EINVAL);
2684 		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2685 		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2686 			return (EINVAL);
2687 
2688 		p->rx_pg_size = m->rx_pg_sz;
2689 		p->tx_pg_size = m->tx_pg_sz;
2690 		p->rx_num_pgs = m->rx_num_pg;
2691 		p->tx_num_pgs = m->tx_num_pg;
2692 		break;
2693 	}
2694 	case CHELSIO_SETMTUTAB: {
2695 		struct ch_mtus *m = (struct ch_mtus *)data;
2696 		int i;
2697 
2698 		if (!is_offload(sc))
2699 			return (EOPNOTSUPP);
2700 		if (offload_running(sc))
2701 			return (EBUSY);
2702 		if (m->nmtus != NMTUS)
2703 			return (EINVAL);
2704 		if (m->mtus[0] < 81)         /* accommodate SACK */
2705 			return (EINVAL);
2706 
2707 		/*
2708 		 * MTUs must be in ascending order
2709 		 */
2710 		for (i = 1; i < NMTUS; ++i)
2711 			if (m->mtus[i] < m->mtus[i - 1])
2712 				return (EINVAL);
2713 
2714 		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2715 		break;
2716 	}
2717 	case CHELSIO_GETMTUTAB: {
2718 		struct ch_mtus *m = (struct ch_mtus *)data;
2719 
2720 		if (!is_offload(sc))
2721 			return (EOPNOTSUPP);
2722 
2723 		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2724 		m->nmtus = NMTUS;
2725 		break;
2726 	}
2727 	case CHELSIO_GET_MEM: {
2728 		struct ch_mem_range *t = (struct ch_mem_range *)data;
2729 		struct mc7 *mem;
2730 		uint8_t *useraddr;
2731 		u64 buf[32];
2732 
2733 		/*
2734 		 * Use these to avoid modifying len/addr in the the return
2735 		 * struct
2736 		 */
2737 		uint32_t len = t->len, addr = t->addr;
2738 
2739 		if (!is_offload(sc))
2740 			return (EOPNOTSUPP);
2741 		if (!(sc->flags & FULL_INIT_DONE))
2742 			return (EIO);         /* need the memory controllers */
2743 		if ((addr & 0x7) || (len & 0x7))
2744 			return (EINVAL);
2745 		if (t->mem_id == MEM_CM)
2746 			mem = &sc->cm;
2747 		else if (t->mem_id == MEM_PMRX)
2748 			mem = &sc->pmrx;
2749 		else if (t->mem_id == MEM_PMTX)
2750 			mem = &sc->pmtx;
2751 		else
2752 			return (EINVAL);
2753 
2754 		/*
2755 		 * Version scheme:
2756 		 * bits 0..9: chip version
2757 		 * bits 10..15: chip revision
2758 		 */
2759 		t->version = 3 | (sc->params.rev << 10);
2760 
2761 		/*
2762 		 * Read 256 bytes at a time as len can be large and we don't
2763 		 * want to use huge intermediate buffers.
2764 		 */
2765 		useraddr = (uint8_t *)t->buf;
2766 		while (len) {
2767 			unsigned int chunk = min(len, sizeof(buf));
2768 
2769 			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2770 			if (error)
2771 				return (-error);
2772 			if (copyout(buf, useraddr, chunk))
2773 				return (EFAULT);
2774 			useraddr += chunk;
2775 			addr += chunk;
2776 			len -= chunk;
2777 		}
2778 		break;
2779 	}
2780 	case CHELSIO_READ_TCAM_WORD: {
2781 		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2782 
2783 		if (!is_offload(sc))
2784 			return (EOPNOTSUPP);
2785 		if (!(sc->flags & FULL_INIT_DONE))
2786 			return (EIO);         /* need MC5 */
2787 		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2788 		break;
2789 	}
2790 	case CHELSIO_SET_TRACE_FILTER: {
2791 		struct ch_trace *t = (struct ch_trace *)data;
2792 		const struct trace_params *tp;
2793 
2794 		tp = (const struct trace_params *)&t->sip;
2795 		if (t->config_tx)
2796 			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2797 					       t->trace_tx);
2798 		if (t->config_rx)
2799 			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2800 					       t->trace_rx);
2801 		break;
2802 	}
2803 	case CHELSIO_SET_PKTSCHED: {
2804 		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2805 		if (sc->open_device_map == 0)
2806 			return (EAGAIN);
2807 		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2808 		    p->binding);
2809 		break;
2810 	}
2811 	case CHELSIO_IFCONF_GETREGS: {
2812 		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2813 		int reglen = cxgb_get_regs_len();
2814 		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2815 		if (buf == NULL) {
2816 			return (ENOMEM);
2817 		}
2818 		if (regs->len > reglen)
2819 			regs->len = reglen;
2820 		else if (regs->len < reglen)
2821 			error = E2BIG;
2822 
2823 		if (!error) {
2824 			cxgb_get_regs(sc, regs, buf);
2825 			error = copyout(buf, regs->data, reglen);
2826 		}
2827 		free(buf, M_DEVBUF);
2828 
2829 		break;
2830 	}
2831 	case CHELSIO_SET_HW_SCHED: {
2832 		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2833 		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2834 
2835 		if ((sc->flags & FULL_INIT_DONE) == 0)
2836 			return (EAGAIN);       /* need TP to be initialized */
2837 		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2838 		    !in_range(t->channel, 0, 1) ||
2839 		    !in_range(t->kbps, 0, 10000000) ||
2840 		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2841 		    !in_range(t->flow_ipg, 0,
2842 			      dack_ticks_to_usec(sc, 0x7ff)))
2843 			return (EINVAL);
2844 
2845 		if (t->kbps >= 0) {
2846 			error = t3_config_sched(sc, t->kbps, t->sched);
2847 			if (error < 0)
2848 				return (-error);
2849 		}
2850 		if (t->class_ipg >= 0)
2851 			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2852 		if (t->flow_ipg >= 0) {
2853 			t->flow_ipg *= 1000;     /* us -> ns */
2854 			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2855 		}
2856 		if (t->mode >= 0) {
2857 			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2858 
2859 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2860 					 bit, t->mode ? bit : 0);
2861 		}
2862 		if (t->channel >= 0)
2863 			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2864 					 1 << t->sched, t->channel << t->sched);
2865 		break;
2866 	}
2867 	case CHELSIO_GET_EEPROM: {
2868 		int i;
2869 		struct ch_eeprom *e = (struct ch_eeprom *)data;
2870 		uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2871 
2872 		if (buf == NULL) {
2873 			return (ENOMEM);
2874 		}
2875 		e->magic = EEPROM_MAGIC;
2876 		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2877 			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2878 
2879 		if (!error)
2880 			error = copyout(buf + e->offset, e->data, e->len);
2881 
2882 		free(buf, M_DEVBUF);
2883 		break;
2884 	}
2885 	case CHELSIO_CLEAR_STATS: {
2886 		if (!(sc->flags & FULL_INIT_DONE))
2887 			return EAGAIN;
2888 
2889 		PORT_LOCK(pi);
2890 		t3_mac_update_stats(&pi->mac);
2891 		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
2892 		PORT_UNLOCK(pi);
2893 		break;
2894 	}
2895 	default:
2896 		return (EOPNOTSUPP);
2897 		break;
2898 	}
2899 
2900 	return (error);
2901 }
2902 
2903 static __inline void
2904 reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
2905     unsigned int end)
2906 {
2907 	uint32_t *p = (uint32_t *)(buf + start);
2908 
2909 	for ( ; start <= end; start += sizeof(uint32_t))
2910 		*p++ = t3_read_reg(ap, start);
2911 }
2912 
2913 #define T3_REGMAP_SIZE (3 * 1024)
2914 static int
2915 cxgb_get_regs_len(void)
2916 {
2917 	return T3_REGMAP_SIZE;
2918 }
2919 
2920 static void
2921 cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
2922 {
2923 
2924 	/*
2925 	 * Version scheme:
2926 	 * bits 0..9: chip version
2927 	 * bits 10..15: chip revision
2928 	 * bit 31: set for PCIe cards
2929 	 */
2930 	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
2931 
2932 	/*
2933 	 * We skip the MAC statistics registers because they are clear-on-read.
2934 	 * Also reading multi-register stats would need to synchronize with the
2935 	 * periodic mac stats accumulation.  Hard to justify the complexity.
2936 	 */
2937 	memset(buf, 0, cxgb_get_regs_len());
2938 	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
2939 	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
2940 	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
2941 	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
2942 	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
2943 	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
2944 		       XGM_REG(A_XGM_SERDES_STAT3, 1));
2945 	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
2946 		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
2947 }
2948 
2949 
2950 MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1);
2951