xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_main.c (revision 202ca9ae460faf1825ede303c46abd4e1f6cee28)
1 /*
2  * Copyright (C) 2007-2014 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 /*
23  * This driver is based on VMware's version 3227872, and contains additional
24  * enhancements (see README.txt).
25  */
26 #define	BUILD_NUMBER_NUMERIC	3227872
27 
28 /*
29  * If we run out of rxPool buffers, only allocate if the MTU is <= PAGESIZE
30  * so that we don't have to incur the cost of allocating multiple contiguous
31  * pages (very slow) in interrupt context.
32  */
33 #define	VMXNET3_ALLOC_OK(dp)	((dp)->cur_mtu <= PAGESIZE)
34 
35 /*
36  * TODO:
37  *    - Tx data ring
38  *    - MAC_CAPAB_POLL support
39  *    - Dynamic RX pool
40  */
41 
42 static int vmxnet3_getstat(void *, uint_t, uint64_t *);
43 static int vmxnet3_start(void *);
44 static void vmxnet3_stop(void *);
45 static int vmxnet3_setpromisc(void *, boolean_t);
46 static void vmxnet3_ioctl(void *arg, queue_t *wq, mblk_t *mp);
47 static int vmxnet3_multicst(void *, boolean_t, const uint8_t *);
48 static int vmxnet3_unicst(void *, const uint8_t *);
49 static boolean_t vmxnet3_getcapab(void *, mac_capab_t, void *);
50 static int vmxnet3_get_prop(void *, const char *, mac_prop_id_t, uint_t,
51     void *);
52 static int vmxnet3_set_prop(void *, const char *, mac_prop_id_t, uint_t,
53     const void *);
54 static void vmxnet3_prop_info(void *, const char *, mac_prop_id_t,
55     mac_prop_info_handle_t);
56 
57 int vmxnet3s_debug = 0;
58 
59 /* MAC callbacks */
60 static mac_callbacks_t vmxnet3_mac_callbacks = {
61 	.mc_callbacks =	MC_GETCAPAB | MC_IOCTL | MC_SETPROP | MC_PROPINFO,
62 	.mc_getstat =	vmxnet3_getstat,
63 	.mc_start =	vmxnet3_start,
64 	.mc_stop =	vmxnet3_stop,
65 	.mc_setpromisc = vmxnet3_setpromisc,
66 	.mc_multicst =	vmxnet3_multicst,
67 	.mc_unicst =	vmxnet3_unicst,
68 	.mc_tx =	vmxnet3_tx,
69 	.mc_ioctl =	vmxnet3_ioctl,
70 	.mc_getcapab =	vmxnet3_getcapab,
71 	.mc_getprop =	vmxnet3_get_prop,
72 	.mc_setprop =	vmxnet3_set_prop,
73 	.mc_propinfo =	vmxnet3_prop_info
74 };
75 
76 /* Tx DMA engine description */
77 static ddi_dma_attr_t vmxnet3_dma_attrs_tx = {
78 	.dma_attr_version =	DMA_ATTR_V0,
79 	.dma_attr_addr_lo =	0x0000000000000000ull,
80 	.dma_attr_addr_hi =	0xFFFFFFFFFFFFFFFFull,
81 	.dma_attr_count_max =	0xFFFFFFFFFFFFFFFFull,
82 	.dma_attr_align =	0x0000000000000001ull,
83 	.dma_attr_burstsizes =	0x0000000000000001ull,
84 	.dma_attr_minxfer =	0x00000001,
85 	.dma_attr_maxxfer =	0x000000000000FFFFull,
86 	.dma_attr_seg =		0xFFFFFFFFFFFFFFFFull,
87 	.dma_attr_sgllen =	-1,
88 	.dma_attr_granular =	0x00000001,
89 	.dma_attr_flags =	0
90 };
91 
92 /* --- */
93 
94 /*
95  * Fetch the statistics of a vmxnet3 device.
96  *
97  * Returns:
98  *	0 on success, non-zero on failure.
99  */
100 static int
101 vmxnet3_getstat(void *data, uint_t stat, uint64_t *val)
102 {
103 	vmxnet3_softc_t *dp = data;
104 	UPT1_TxStats *txStats;
105 	UPT1_RxStats *rxStats;
106 
107 	VMXNET3_DEBUG(dp, 3, "getstat(%u)\n", stat);
108 
109 	if (!dp->devEnabled) {
110 		return (EBUSY);
111 	}
112 
113 	txStats = &VMXNET3_TQDESC(dp)->stats;
114 	rxStats = &VMXNET3_RQDESC(dp)->stats;
115 
116 	/*
117 	 * First touch the related register
118 	 */
119 	switch (stat) {
120 	case MAC_STAT_MULTIRCV:
121 	case MAC_STAT_BRDCSTRCV:
122 	case MAC_STAT_MULTIXMT:
123 	case MAC_STAT_BRDCSTXMT:
124 	case MAC_STAT_NORCVBUF:
125 	case MAC_STAT_IERRORS:
126 	case MAC_STAT_NOXMTBUF:
127 	case MAC_STAT_OERRORS:
128 	case MAC_STAT_RBYTES:
129 	case MAC_STAT_IPACKETS:
130 	case MAC_STAT_OBYTES:
131 	case MAC_STAT_OPACKETS:
132 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
133 		break;
134 	case MAC_STAT_IFSPEED:
135 	case MAC_STAT_COLLISIONS:
136 	case ETHER_STAT_LINK_DUPLEX:
137 		/* nothing */
138 		break;
139 	default:
140 		return (ENOTSUP);
141 	}
142 
143 	/*
144 	 * Then fetch the corresponding stat
145 	 */
146 	switch (stat) {
147 	case MAC_STAT_IFSPEED:
148 		*val = dp->linkSpeed;
149 		break;
150 	case MAC_STAT_MULTIRCV:
151 		*val = rxStats->mcastPktsRxOK;
152 		break;
153 	case MAC_STAT_BRDCSTRCV:
154 		*val = rxStats->bcastPktsRxOK;
155 		break;
156 	case MAC_STAT_MULTIXMT:
157 		*val = txStats->mcastPktsTxOK;
158 		break;
159 	case MAC_STAT_BRDCSTXMT:
160 		*val = txStats->bcastPktsTxOK;
161 		break;
162 	case MAC_STAT_NORCVBUF:
163 		*val = rxStats->pktsRxOutOfBuf + dp->rx_alloc_failed;
164 		break;
165 	case MAC_STAT_IERRORS:
166 		*val = rxStats->pktsRxError;
167 		break;
168 	case MAC_STAT_NOXMTBUF:
169 		*val = txStats->pktsTxDiscard + dp->tx_pullup_failed;
170 		break;
171 	case MAC_STAT_OERRORS:
172 		*val = txStats->pktsTxError + dp->tx_error;
173 		break;
174 	case MAC_STAT_COLLISIONS:
175 		*val = 0;
176 		break;
177 	case MAC_STAT_RBYTES:
178 		*val = rxStats->ucastBytesRxOK + rxStats->mcastBytesRxOK +
179 		    rxStats->bcastBytesRxOK;
180 		break;
181 	case MAC_STAT_IPACKETS:
182 		*val = rxStats->ucastPktsRxOK + rxStats->mcastPktsRxOK +
183 		    rxStats->bcastPktsRxOK;
184 		break;
185 	case MAC_STAT_OBYTES:
186 		*val = txStats->ucastBytesTxOK + txStats->mcastBytesTxOK +
187 		    txStats->bcastBytesTxOK;
188 		break;
189 	case MAC_STAT_OPACKETS:
190 		*val = txStats->ucastPktsTxOK + txStats->mcastPktsTxOK +
191 		    txStats->bcastPktsTxOK;
192 		break;
193 	case ETHER_STAT_LINK_DUPLEX:
194 		*val = LINK_DUPLEX_FULL;
195 		break;
196 	default:
197 		ASSERT(B_FALSE);
198 	}
199 
200 	return (0);
201 }
202 
203 /*
204  * Allocate and initialize the shared data structures of a vmxnet3 device.
205  *
206  * Returns:
207  *	0 on sucess, non-zero on failure.
208  */
209 static int
210 vmxnet3_prepare_drivershared(vmxnet3_softc_t *dp)
211 {
212 	Vmxnet3_DriverShared *ds;
213 	size_t allocSize = sizeof (Vmxnet3_DriverShared);
214 	int err;
215 
216 	if ((err = vmxnet3_alloc_dma_mem_1(dp, &dp->sharedData, allocSize,
217 	    B_TRUE)) != 0) {
218 		return (err);
219 	}
220 	ds = VMXNET3_DS(dp);
221 	(void) memset(ds, 0, allocSize);
222 
223 	allocSize = sizeof (Vmxnet3_TxQueueDesc) + sizeof (Vmxnet3_RxQueueDesc);
224 	if ((err = vmxnet3_alloc_dma_mem_128(dp, &dp->queueDescs, allocSize,
225 	    B_TRUE)) != 0) {
226 		vmxnet3_free_dma_mem(&dp->sharedData);
227 		return (err);
228 	}
229 	(void) memset(dp->queueDescs.buf, 0, allocSize);
230 
231 	ds->magic = VMXNET3_REV1_MAGIC;
232 
233 	/* Take care of most of devRead */
234 	ds->devRead.misc.driverInfo.version = BUILD_NUMBER_NUMERIC;
235 #ifdef _LP64
236 	ds->devRead.misc.driverInfo.gos.gosBits = VMXNET3_GOS_BITS_64;
237 #else
238 	ds->devRead.misc.driverInfo.gos.gosBits = VMXNET3_GOS_BITS_32;
239 #endif
240 	ds->devRead.misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_SOLARIS;
241 	ds->devRead.misc.driverInfo.gos.gosVer = 10;
242 	ds->devRead.misc.driverInfo.vmxnet3RevSpt = 1;
243 	ds->devRead.misc.driverInfo.uptVerSpt = 1;
244 
245 	ds->devRead.misc.uptFeatures = UPT1_F_RXCSUM;
246 	ds->devRead.misc.mtu = dp->cur_mtu;
247 
248 	/* XXX: ds->devRead.misc.maxNumRxSG */
249 	ds->devRead.misc.numTxQueues = 1;
250 	ds->devRead.misc.numRxQueues = 1;
251 	ds->devRead.misc.queueDescPA = dp->queueDescs.bufPA;
252 	ds->devRead.misc.queueDescLen = allocSize;
253 
254 	/* TxQueue and RxQueue information is filled in other functions */
255 	ds->devRead.intrConf.autoMask = (dp->intrMaskMode == VMXNET3_IMM_AUTO);
256 	ds->devRead.intrConf.numIntrs = 1;
257 	/* XXX: ds->intr.modLevels */
258 	ds->devRead.intrConf.eventIntrIdx = 0;
259 
260 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAL,
261 	    VMXNET3_ADDR_LO(dp->sharedData.bufPA));
262 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAH,
263 	    VMXNET3_ADDR_HI(dp->sharedData.bufPA));
264 
265 	return (0);
266 }
267 
268 /*
269  * Destroy the shared data structures of a vmxnet3 device.
270  */
271 static void
272 vmxnet3_destroy_drivershared(vmxnet3_softc_t *dp)
273 {
274 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAL, 0);
275 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAH, 0);
276 
277 	vmxnet3_free_dma_mem(&dp->queueDescs);
278 	vmxnet3_free_dma_mem(&dp->sharedData);
279 }
280 
281 /*
282  * Allocate and initialize the command ring of a queue.
283  *
284  * Returns:
285  *	0 on success, non-zero on error.
286  */
287 static int
288 vmxnet3_alloc_cmdring(vmxnet3_softc_t *dp, vmxnet3_cmdring_t *cmdRing)
289 {
290 	size_t ringSize = cmdRing->size * sizeof (Vmxnet3_TxDesc);
291 	int err;
292 
293 	if ((err = vmxnet3_alloc_dma_mem_512(dp, &cmdRing->dma, ringSize,
294 	    B_TRUE)) != 0) {
295 		return (err);
296 	}
297 	(void) memset(cmdRing->dma.buf, 0, ringSize);
298 	cmdRing->avail = cmdRing->size;
299 	cmdRing->next2fill = 0;
300 	cmdRing->gen = VMXNET3_INIT_GEN;
301 
302 	return (0);
303 }
304 
305 /*
306  * Allocate and initialize the completion ring of a queue.
307  *
308  * Returns:
309  *    DDI_SUCCESS or DDI_FAILURE.
310  */
311 static int
312 vmxnet3_alloc_compring(vmxnet3_softc_t *dp, vmxnet3_compring_t *compRing)
313 {
314 	size_t ringSize = compRing->size * sizeof (Vmxnet3_TxCompDesc);
315 
316 	if (vmxnet3_alloc_dma_mem_512(dp, &compRing->dma, ringSize,
317 	    B_TRUE) != DDI_SUCCESS) {
318 		return (DDI_FAILURE);
319 	}
320 	(void) memset(compRing->dma.buf, 0, ringSize);
321 	compRing->next2comp = 0;
322 	compRing->gen = VMXNET3_INIT_GEN;
323 
324 	return (DDI_SUCCESS);
325 }
326 
327 /*
328  * Initialize the tx queue of a vmxnet3 device.
329  *
330  * Returns:
331  *	0 on success, non-zero on failure.
332  */
333 static int
334 vmxnet3_prepare_txqueue(vmxnet3_softc_t *dp)
335 {
336 	Vmxnet3_TxQueueDesc *tqdesc = VMXNET3_TQDESC(dp);
337 	vmxnet3_txqueue_t *txq = &dp->txQueue;
338 	int err;
339 
340 	ASSERT(!(txq->cmdRing.size & VMXNET3_RING_SIZE_MASK));
341 	ASSERT(!(txq->compRing.size & VMXNET3_RING_SIZE_MASK));
342 	ASSERT(!txq->cmdRing.dma.buf && !txq->compRing.dma.buf);
343 
344 	if ((err = vmxnet3_alloc_cmdring(dp, &txq->cmdRing)) != 0) {
345 		goto error;
346 	}
347 	tqdesc->conf.txRingBasePA = txq->cmdRing.dma.bufPA;
348 	tqdesc->conf.txRingSize = txq->cmdRing.size;
349 	tqdesc->conf.dataRingBasePA = 0;
350 	tqdesc->conf.dataRingSize = 0;
351 
352 	if ((err = vmxnet3_alloc_compring(dp, &txq->compRing)) != 0) {
353 		goto error_cmdring;
354 	}
355 	tqdesc->conf.compRingBasePA = txq->compRing.dma.bufPA;
356 	tqdesc->conf.compRingSize = txq->compRing.size;
357 
358 	txq->metaRing = kmem_zalloc(txq->cmdRing.size *
359 	    sizeof (vmxnet3_metatx_t), KM_SLEEP);
360 	ASSERT(txq->metaRing);
361 
362 	if ((err = vmxnet3_txqueue_init(dp, txq)) != 0) {
363 		goto error_mpring;
364 	}
365 
366 	return (0);
367 
368 error_mpring:
369 	kmem_free(txq->metaRing, txq->cmdRing.size * sizeof (vmxnet3_metatx_t));
370 	vmxnet3_free_dma_mem(&txq->compRing.dma);
371 error_cmdring:
372 	vmxnet3_free_dma_mem(&txq->cmdRing.dma);
373 error:
374 	return (err);
375 }
376 
377 /*
378  * Initialize the rx queue of a vmxnet3 device.
379  *
380  * Returns:
381  *	0 on success, non-zero on failure.
382  */
383 static int
384 vmxnet3_prepare_rxqueue(vmxnet3_softc_t *dp)
385 {
386 	Vmxnet3_RxQueueDesc *rqdesc = VMXNET3_RQDESC(dp);
387 	vmxnet3_rxqueue_t *rxq = &dp->rxQueue;
388 	int err = 0;
389 
390 	ASSERT(!(rxq->cmdRing.size & VMXNET3_RING_SIZE_MASK));
391 	ASSERT(!(rxq->compRing.size & VMXNET3_RING_SIZE_MASK));
392 	ASSERT(!rxq->cmdRing.dma.buf && !rxq->compRing.dma.buf);
393 
394 	if ((err = vmxnet3_alloc_cmdring(dp, &rxq->cmdRing)) != 0) {
395 		goto error;
396 	}
397 	rqdesc->conf.rxRingBasePA[0] = rxq->cmdRing.dma.bufPA;
398 	rqdesc->conf.rxRingSize[0] = rxq->cmdRing.size;
399 	rqdesc->conf.rxRingBasePA[1] = 0;
400 	rqdesc->conf.rxRingSize[1] = 0;
401 
402 	if ((err = vmxnet3_alloc_compring(dp, &rxq->compRing)) != 0) {
403 		goto error_cmdring;
404 	}
405 	rqdesc->conf.compRingBasePA = rxq->compRing.dma.bufPA;
406 	rqdesc->conf.compRingSize = rxq->compRing.size;
407 
408 	rxq->bufRing = kmem_zalloc(rxq->cmdRing.size *
409 	    sizeof (vmxnet3_bufdesc_t), KM_SLEEP);
410 	ASSERT(rxq->bufRing);
411 
412 	if ((err = vmxnet3_rxqueue_init(dp, rxq)) != 0) {
413 		goto error_bufring;
414 	}
415 
416 	return (0);
417 
418 error_bufring:
419 	kmem_free(rxq->bufRing, rxq->cmdRing.size * sizeof (vmxnet3_bufdesc_t));
420 	vmxnet3_free_dma_mem(&rxq->compRing.dma);
421 error_cmdring:
422 	vmxnet3_free_dma_mem(&rxq->cmdRing.dma);
423 error:
424 	return (err);
425 }
426 
427 /*
428  * Destroy the tx queue of a vmxnet3 device.
429  */
430 static void
431 vmxnet3_destroy_txqueue(vmxnet3_softc_t *dp)
432 {
433 	vmxnet3_txqueue_t *txq = &dp->txQueue;
434 
435 	ASSERT(txq->metaRing);
436 	ASSERT(txq->cmdRing.dma.buf && txq->compRing.dma.buf);
437 
438 	vmxnet3_txqueue_fini(dp, txq);
439 
440 	kmem_free(txq->metaRing, txq->cmdRing.size * sizeof (vmxnet3_metatx_t));
441 
442 	vmxnet3_free_dma_mem(&txq->cmdRing.dma);
443 	vmxnet3_free_dma_mem(&txq->compRing.dma);
444 }
445 
446 /*
447  * Destroy the rx queue of a vmxnet3 device.
448  */
449 static void
450 vmxnet3_destroy_rxqueue(vmxnet3_softc_t *dp)
451 {
452 	vmxnet3_rxqueue_t *rxq = &dp->rxQueue;
453 
454 	ASSERT(rxq->bufRing);
455 	ASSERT(rxq->cmdRing.dma.buf && rxq->compRing.dma.buf);
456 
457 	vmxnet3_rxqueue_fini(dp, rxq);
458 
459 	kmem_free(rxq->bufRing, rxq->cmdRing.size * sizeof (vmxnet3_bufdesc_t));
460 
461 	vmxnet3_free_dma_mem(&rxq->cmdRing.dma);
462 	vmxnet3_free_dma_mem(&rxq->compRing.dma);
463 }
464 
465 /*
466  * Apply new RX filters settings to a vmxnet3 device.
467  */
468 static void
469 vmxnet3_refresh_rxfilter(vmxnet3_softc_t *dp)
470 {
471 	Vmxnet3_DriverShared *ds = VMXNET3_DS(dp);
472 
473 	ds->devRead.rxFilterConf.rxMode = dp->rxMode;
474 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_RX_MODE);
475 }
476 
477 /*
478  * Fetch the link state of a vmxnet3 device.
479  */
480 static void
481 vmxnet3_refresh_linkstate(vmxnet3_softc_t *dp)
482 {
483 	uint32_t ret32;
484 
485 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
486 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
487 	if (ret32 & 1) {
488 		dp->linkState = LINK_STATE_UP;
489 		dp->linkSpeed = (ret32 >> 16) * 1000000ULL;
490 	} else {
491 		dp->linkState = LINK_STATE_DOWN;
492 		dp->linkSpeed = 0;
493 	}
494 }
495 
496 /*
497  * Start a vmxnet3 device: allocate and initialize the shared data
498  * structures and send a start command to the device.
499  *
500  * Returns:
501  *	0 on success, non-zero error on failure.
502  */
503 static int
504 vmxnet3_start(void *data)
505 {
506 	vmxnet3_softc_t *dp = data;
507 	Vmxnet3_TxQueueDesc *tqdesc;
508 	Vmxnet3_RxQueueDesc *rqdesc;
509 	int txQueueSize, rxQueueSize;
510 	uint32_t ret32;
511 	int err, dmaerr;
512 
513 	VMXNET3_DEBUG(dp, 1, "start()\n");
514 
515 	/*
516 	 * Allocate vmxnet3's shared data and advertise its PA
517 	 */
518 	if ((err = vmxnet3_prepare_drivershared(dp)) != 0) {
519 		VMXNET3_WARN(dp, "vmxnet3_prepare_drivershared() failed: %d",
520 		    err);
521 		goto error;
522 	}
523 	tqdesc = VMXNET3_TQDESC(dp);
524 	rqdesc = VMXNET3_RQDESC(dp);
525 
526 	/*
527 	 * Create and initialize the tx queue
528 	 */
529 	txQueueSize = vmxnet3_getprop(dp, "TxRingSize", 32, 4096,
530 	    VMXNET3_DEF_TX_RING_SIZE);
531 	if (!(txQueueSize & VMXNET3_RING_SIZE_MASK)) {
532 		dp->txQueue.cmdRing.size = txQueueSize;
533 		dp->txQueue.compRing.size = txQueueSize;
534 		dp->txQueue.sharedCtrl = &tqdesc->ctrl;
535 		if ((err = vmxnet3_prepare_txqueue(dp)) != 0) {
536 			VMXNET3_WARN(dp, "vmxnet3_prepare_txqueue() failed: %d",
537 			    err);
538 			goto error_shared_data;
539 		}
540 	} else {
541 		VMXNET3_WARN(dp, "invalid tx ring size (%d)\n", txQueueSize);
542 		err = EINVAL;
543 		goto error_shared_data;
544 	}
545 
546 	/*
547 	 * Create and initialize the rx queue
548 	 */
549 	rxQueueSize = vmxnet3_getprop(dp, "RxRingSize", 32, 4096,
550 	    VMXNET3_DEF_RX_RING_SIZE);
551 	if (!(rxQueueSize & VMXNET3_RING_SIZE_MASK)) {
552 		dp->rxQueue.cmdRing.size = rxQueueSize;
553 		dp->rxQueue.compRing.size = rxQueueSize;
554 		dp->rxQueue.sharedCtrl = &rqdesc->ctrl;
555 		if ((err = vmxnet3_prepare_rxqueue(dp)) != 0) {
556 			VMXNET3_WARN(dp, "vmxnet3_prepare_rxqueue() failed: %d",
557 			    err);
558 			goto error_tx_queue;
559 		}
560 	} else {
561 		VMXNET3_WARN(dp, "invalid rx ring size (%d)\n", rxQueueSize);
562 		err = EINVAL;
563 		goto error_tx_queue;
564 	}
565 
566 	/*
567 	 * Allocate the Tx DMA handle
568 	 */
569 	if ((dmaerr = ddi_dma_alloc_handle(dp->dip, &vmxnet3_dma_attrs_tx,
570 	    DDI_DMA_SLEEP, NULL, &dp->txDmaHandle)) != DDI_SUCCESS) {
571 		VMXNET3_WARN(dp, "ddi_dma_alloc_handle() failed: %d", dmaerr);
572 		err = vmxnet3_dmaerr2errno(dmaerr);
573 		goto error_rx_queue;
574 	}
575 
576 	/*
577 	 * Activate the device
578 	 */
579 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV);
580 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
581 	if (ret32) {
582 		VMXNET3_WARN(dp, "ACTIVATE_DEV failed: 0x%x\n", ret32);
583 		err = ENXIO;
584 		goto error_txhandle;
585 	}
586 	dp->devEnabled = B_TRUE;
587 
588 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD,
589 	    dp->txQueue.cmdRing.size - 1);
590 
591 	/*
592 	 * Update the RX filters, must be done after ACTIVATE_DEV
593 	 */
594 	dp->rxMode = VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST;
595 	vmxnet3_refresh_rxfilter(dp);
596 
597 	/*
598 	 * Get the link state now because no events will be generated
599 	 */
600 	vmxnet3_refresh_linkstate(dp);
601 	mac_link_update(dp->mac, dp->linkState);
602 
603 	/*
604 	 * Finally, unmask the interrupt
605 	 */
606 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 0);
607 
608 	return (0);
609 
610 error_txhandle:
611 	ddi_dma_free_handle(&dp->txDmaHandle);
612 error_rx_queue:
613 	vmxnet3_destroy_rxqueue(dp);
614 error_tx_queue:
615 	vmxnet3_destroy_txqueue(dp);
616 error_shared_data:
617 	vmxnet3_destroy_drivershared(dp);
618 error:
619 	return (err);
620 }
621 
622 /*
623  * Stop a vmxnet3 device: send a stop command to the device and
624  * de-allocate the shared data structures.
625  */
626 static void
627 vmxnet3_stop(void *data)
628 {
629 	vmxnet3_softc_t *dp = data;
630 
631 	VMXNET3_DEBUG(dp, 1, "stop()\n");
632 
633 	/*
634 	 * Take the 2 locks related to asynchronous events.
635 	 * These events should always check dp->devEnabled before poking dp.
636 	 */
637 	mutex_enter(&dp->intrLock);
638 	mutex_enter(&dp->rxPoolLock);
639 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 1);
640 	dp->devEnabled = B_FALSE;
641 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
642 	mutex_exit(&dp->rxPoolLock);
643 	mutex_exit(&dp->intrLock);
644 
645 	ddi_dma_free_handle(&dp->txDmaHandle);
646 
647 	vmxnet3_destroy_rxqueue(dp);
648 	vmxnet3_destroy_txqueue(dp);
649 
650 	vmxnet3_destroy_drivershared(dp);
651 }
652 
653 /*
654  * Set or unset promiscuous mode on a vmxnet3 device.
655  */
656 static int
657 vmxnet3_setpromisc(void *data, boolean_t promisc)
658 {
659 	vmxnet3_softc_t *dp = data;
660 
661 	VMXNET3_DEBUG(dp, 2, "setpromisc(%s)\n", promisc ? "TRUE" : "FALSE");
662 
663 	if (promisc) {
664 		dp->rxMode |= VMXNET3_RXM_PROMISC;
665 	} else {
666 		dp->rxMode &= ~VMXNET3_RXM_PROMISC;
667 	}
668 
669 	vmxnet3_refresh_rxfilter(dp);
670 
671 	return (0);
672 }
673 
674 /*
675  * Add or remove a multicast address from/to a vmxnet3 device.
676  *
677  * Returns:
678  *	0 on success, non-zero on failure.
679  */
680 static int
681 vmxnet3_multicst(void *data, boolean_t add, const uint8_t *macaddr)
682 {
683 	vmxnet3_softc_t *dp = data;
684 	vmxnet3_dmabuf_t newMfTable;
685 	int ret = 0;
686 	uint16_t macIdx;
687 	size_t allocSize;
688 
689 	VMXNET3_DEBUG(dp, 2, "multicst(%s, "MACADDR_FMT")\n",
690 	    add ? "add" : "remove", MACADDR_FMT_ARGS(macaddr));
691 
692 	/*
693 	 * First lookup the position of the given MAC to check if it is
694 	 * present in the existing MF table.
695 	 */
696 	for (macIdx = 0; macIdx < dp->mfTable.bufLen; macIdx += 6) {
697 		if (memcmp(&dp->mfTable.buf[macIdx], macaddr, 6) == 0) {
698 			break;
699 		}
700 	}
701 
702 	/*
703 	 * Check for 2 situations we can handle gracefully by bailing out:
704 	 * Adding an already existing filter or removing a non-existing one.
705 	 */
706 	if (add && macIdx < dp->mfTable.bufLen) {
707 		VMXNET3_WARN(dp, MACADDR_FMT " already in MC filter list "
708 		    "@ %u\n", MACADDR_FMT_ARGS(macaddr), macIdx / 6);
709 		ASSERT(B_FALSE);
710 		goto done;
711 	}
712 	if (!add && macIdx == dp->mfTable.bufLen) {
713 		VMXNET3_WARN(dp, MACADDR_FMT " not in MC filter list @ %u\n",
714 		    MACADDR_FMT_ARGS(macaddr), macIdx / 6);
715 		ASSERT(B_FALSE);
716 		goto done;
717 	}
718 
719 	/*
720 	 * Create the new MF table
721 	 */
722 	allocSize = dp->mfTable.bufLen + (add ? 6 : -6);
723 	if (allocSize) {
724 		ret = vmxnet3_alloc_dma_mem_1(dp, &newMfTable, allocSize,
725 		    B_TRUE);
726 		ASSERT(ret == 0);
727 		if (add) {
728 			(void) memcpy(newMfTable.buf, dp->mfTable.buf,
729 			    dp->mfTable.bufLen);
730 			(void) memcpy(newMfTable.buf + dp->mfTable.bufLen,
731 			    macaddr, 6);
732 		} else {
733 			(void) memcpy(newMfTable.buf, dp->mfTable.buf,
734 			    macIdx);
735 			(void) memcpy(newMfTable.buf + macIdx,
736 			    dp->mfTable.buf + macIdx + 6,
737 			    dp->mfTable.bufLen - macIdx - 6);
738 		}
739 	} else {
740 		newMfTable.buf = NULL;
741 		newMfTable.bufPA = 0;
742 		newMfTable.bufLen = 0;
743 	}
744 
745 	/*
746 	 * Now handle 2 corner cases: if we're creating the first filter or
747 	 * removing the last one, we have to update rxMode accordingly.
748 	 */
749 	if (add && newMfTable.bufLen == 6) {
750 		ASSERT(!(dp->rxMode & VMXNET3_RXM_MCAST));
751 		dp->rxMode |= VMXNET3_RXM_MCAST;
752 		vmxnet3_refresh_rxfilter(dp);
753 	}
754 	if (!add && dp->mfTable.bufLen == 6) {
755 		ASSERT(newMfTable.buf == NULL);
756 		ASSERT(dp->rxMode & VMXNET3_RXM_MCAST);
757 		dp->rxMode &= ~VMXNET3_RXM_MCAST;
758 		vmxnet3_refresh_rxfilter(dp);
759 	}
760 
761 	/*
762 	 * Now replace the old MF table with the new one
763 	 */
764 	if (dp->mfTable.buf) {
765 		vmxnet3_free_dma_mem(&dp->mfTable);
766 	}
767 	dp->mfTable = newMfTable;
768 	VMXNET3_DS(dp)->devRead.rxFilterConf.mfTablePA = newMfTable.bufPA;
769 	VMXNET3_DS(dp)->devRead.rxFilterConf.mfTableLen = newMfTable.bufLen;
770 
771 done:
772 	/* Always update the filters */
773 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_MAC_FILTERS);
774 
775 	return (ret);
776 }
777 
778 /*
779  * Set the mac address of a vmxnet3 device.
780  *
781  * Returns:
782  *	0
783  */
784 static int
785 vmxnet3_unicst(void *data, const uint8_t *macaddr)
786 {
787 	vmxnet3_softc_t *dp = data;
788 	uint32_t val32;
789 
790 	VMXNET3_DEBUG(dp, 2, "unicst("MACADDR_FMT")\n",
791 	    MACADDR_FMT_ARGS(macaddr));
792 
793 	val32 = *((uint32_t *)(macaddr + 0));
794 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_MACL, val32);
795 	val32 = *((uint16_t *)(macaddr + 4));
796 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_MACH, val32);
797 
798 	(void) memcpy(dp->macaddr, macaddr, 6);
799 
800 	return (0);
801 }
802 
803 /*
804  * Change the MTU as seen by the driver. This is only supported when
805  * the mac is stopped.
806  *
807  * Returns:
808  *	EBUSY if the device is enabled.
809  *	EINVAL for invalid MTU values.
810  *	0 on success.
811  */
812 static int
813 vmxnet3_change_mtu(vmxnet3_softc_t *dp, uint32_t new_mtu)
814 {
815 	int ret;
816 
817 	if (dp->devEnabled)
818 		return (EBUSY);
819 
820 	if (new_mtu == dp->cur_mtu) {
821 		VMXNET3_WARN(dp, "New MTU is same as old mtu : %d.\n", new_mtu);
822 		return (0);
823 	}
824 
825 	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU) {
826 		VMXNET3_WARN(dp, "New MTU not in valid range [%d, %d].\n",
827 		    VMXNET3_MIN_MTU, VMXNET3_MAX_MTU);
828 		return (EINVAL);
829 	} else if (new_mtu > ETHERMTU && !dp->allow_jumbo) {
830 		VMXNET3_WARN(dp, "MTU cannot be greater than %d because "
831 		    "accept-jumbo is not enabled.\n", ETHERMTU);
832 		return (EINVAL);
833 	}
834 
835 	dp->cur_mtu = new_mtu;
836 	dp->alloc_ok = VMXNET3_ALLOC_OK(dp);
837 
838 	if ((ret = mac_maxsdu_update(dp->mac, new_mtu)) != 0)
839 		VMXNET3_WARN(dp, "Unable to update mac with %d mtu: %d",
840 		    new_mtu, ret);
841 
842 	return (ret);
843 }
844 
845 /* ARGSUSED */
846 static int
847 vmxnet3_get_prop(void *data, const char *prop_name, mac_prop_id_t prop_id,
848     uint_t prop_val_size, void *prop_val)
849 {
850 	vmxnet3_softc_t *dp = data;
851 	int ret = 0;
852 
853 	switch (prop_id) {
854 	case MAC_PROP_MTU:
855 		ASSERT(prop_val_size >= sizeof (uint32_t));
856 		bcopy(&dp->cur_mtu, prop_val, sizeof (uint32_t));
857 		break;
858 	default:
859 		ret = ENOTSUP;
860 		break;
861 	}
862 	return (ret);
863 }
864 
865 /* ARGSUSED */
866 static int
867 vmxnet3_set_prop(void *data, const char *prop_name, mac_prop_id_t prop_id,
868     uint_t prop_val_size, const void *prop_val)
869 {
870 	vmxnet3_softc_t *dp = data;
871 	int ret;
872 
873 	switch (prop_id) {
874 	case MAC_PROP_MTU: {
875 		uint32_t new_mtu;
876 		ASSERT(prop_val_size >= sizeof (uint32_t));
877 		bcopy(prop_val, &new_mtu, sizeof (new_mtu));
878 		ret = vmxnet3_change_mtu(dp, new_mtu);
879 		break;
880 	}
881 	default:
882 		ret = ENOTSUP;
883 		break;
884 	}
885 
886 	return (ret);
887 }
888 
889 /* ARGSUSED */
890 static void
891 vmxnet3_prop_info(void *data, const char *prop_name, mac_prop_id_t prop_id,
892     mac_prop_info_handle_t prop_handle)
893 {
894 	switch (prop_id) {
895 	case MAC_PROP_MTU:
896 		mac_prop_info_set_range_uint32(prop_handle, VMXNET3_MIN_MTU,
897 		    VMXNET3_MAX_MTU);
898 		break;
899 	default:
900 		break;
901 	}
902 }
903 
904 /*
905  * DDI/DDK callback to handle IOCTL in driver. Currently it only handles
906  * ND_SET ioctl. Rest all are ignored. The ND_SET is used to set/reset
907  * accept-jumbo ndd parameted for the interface.
908  *
909  * Side effects:
910  *	MTU can be changed and device can be reset. An ACK or NACK is conveyed
911  *	to the calling function from the mblk which was used to call this
912  *	function.
913  */
914 static void
915 vmxnet3_ioctl(void *arg, queue_t *wq, mblk_t *mp)
916 {
917 	vmxnet3_softc_t *dp = arg;
918 	int ret = EINVAL;
919 	IOCP iocp;
920 	mblk_t *mp1;
921 	char *valp, *param;
922 	int data;
923 
924 	iocp = (void *)mp->b_rptr;
925 	iocp->ioc_error = 0;
926 
927 	switch (iocp->ioc_cmd) {
928 	case ND_SET:
929 		/*
930 		 * The mblk in continuation would contain the ndd parameter name
931 		 * and data value to be set
932 		 */
933 		mp1 = mp->b_cont;
934 		if (!mp1) {
935 			VMXNET3_WARN(dp, "Error locating parameter name.\n");
936 			ret = EINVAL;
937 			break;
938 		}
939 
940 		/* Force null termination */
941 		mp1->b_datap->db_lim[-1] = '\0';
942 
943 		/*
944 		 * From /usr/src/uts/common/inet/nd.c : nd_getset()
945 		 * "logic throughout nd_xxx assumes single data block for ioctl.
946 		 *  However, existing code sends in some big buffers."
947 		 */
948 		if (mp1->b_cont) {
949 			freemsg(mp1->b_cont);
950 			mp1->b_cont = NULL;
951 		}
952 
953 		valp = (char *)mp1->b_rptr;	/* Points to param name */
954 		ASSERT(valp);
955 		param = valp;
956 		VMXNET3_DEBUG(dp, 3, "ND Set ioctl for %s\n", param);
957 
958 		/*
959 		 * Go past the end of this null terminated string to get the
960 		 * data value.
961 		 */
962 		while (*valp && valp <= (char *)mp1->b_wptr)
963 			valp++;
964 
965 		if (valp > (char *)mp1->b_wptr) {
966 			/*
967 			 * We are already beyond the readable area of mblk and
968 			 * still haven't found the end of param string.
969 			 */
970 			VMXNET3_WARN(dp,
971 			    "No data value found to be set to param\n");
972 			data = -1;
973 		} else {
974 			/* Now this points to data string */
975 			valp++;
976 			/* Get numeric value of first letter */
977 			data = (int)*valp - (int)'0';
978 		}
979 
980 		if (strcmp("accept-jumbo", param) == 0) {
981 			if (data == 1) {
982 				VMXNET3_DEBUG(dp, 2,
983 				    "Accepting jumbo frames\n");
984 				dp->allow_jumbo = B_TRUE;
985 				ret = vmxnet3_change_mtu(dp, VMXNET3_MAX_MTU);
986 			} else if (data == 0) {
987 				VMXNET3_DEBUG(dp, 2,
988 				    "Rejecting jumbo frames\n");
989 				dp->allow_jumbo = B_FALSE;
990 				ret = vmxnet3_change_mtu(dp, ETHERMTU);
991 			} else {
992 				VMXNET3_WARN(dp, "Invalid data value to be set,"
993 				    " use 0 or 1\n");
994 				ret = -1;
995 			}
996 		}
997 		freemsg(mp1);
998 		mp->b_cont = NULL;
999 		break;
1000 
1001 	default:
1002 		if (mp->b_cont) {
1003 			freemsg(mp->b_cont);
1004 			mp->b_cont = NULL;
1005 		}
1006 		ret = -1;
1007 		break;
1008 	}
1009 
1010 	if (ret == 0)
1011 		miocack(wq, mp, 0, 0);
1012 	else
1013 		miocnak(wq, mp, 0, EINVAL);
1014 }
1015 
1016 /*
1017  * Get the capabilities of a vmxnet3 device.
1018  *
1019  * Returns:
1020  *	B_TRUE if the capability is supported, B_FALSE otherwise.
1021  */
1022 static boolean_t
1023 vmxnet3_getcapab(void *data, mac_capab_t capab, void *arg)
1024 {
1025 	vmxnet3_softc_t *dp = data;
1026 	boolean_t ret;
1027 
1028 	switch (capab) {
1029 	case MAC_CAPAB_HCKSUM: {
1030 		uint32_t *txflags = arg;
1031 		*txflags = HCKSUM_INET_PARTIAL;
1032 		ret = B_TRUE;
1033 		break;
1034 	}
1035 	case MAC_CAPAB_LSO: {
1036 		mac_capab_lso_t *lso = arg;
1037 		lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1038 		lso->lso_basic_tcp_ipv4.lso_max = IP_MAXPACKET;
1039 		ret = vmxnet3_getprop(dp, "EnableLSO", 0, 1, 1);
1040 		break;
1041 	}
1042 	default:
1043 		ret = B_FALSE;
1044 	}
1045 
1046 	VMXNET3_DEBUG(dp, 2, "getcapab(0x%x) -> %s\n", capab,
1047 	    ret ? "yes" : "no");
1048 
1049 	return (ret);
1050 }
1051 
1052 /*
1053  * Reset a vmxnet3 device. Only to be used when the device is wedged.
1054  *
1055  * Side effects:
1056  *	The device is reset.
1057  */
1058 static void
1059 vmxnet3_reset(void *data)
1060 {
1061 	int ret;
1062 
1063 	vmxnet3_softc_t *dp = data;
1064 
1065 	VMXNET3_DEBUG(dp, 1, "vmxnet3_reset()\n");
1066 
1067 	atomic_inc_32(&dp->reset_count);
1068 	vmxnet3_stop(dp);
1069 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
1070 	if ((ret = vmxnet3_start(dp)) != 0)
1071 		VMXNET3_WARN(dp, "failed to reset the device: %d", ret);
1072 }
1073 
1074 /*
1075  * Process pending events on a vmxnet3 device.
1076  *
1077  * Returns:
1078  *	B_TRUE if the link state changed, B_FALSE otherwise.
1079  */
1080 static boolean_t
1081 vmxnet3_intr_events(vmxnet3_softc_t *dp)
1082 {
1083 	Vmxnet3_DriverShared *ds = VMXNET3_DS(dp);
1084 	boolean_t linkStateChanged = B_FALSE;
1085 	uint32_t events = ds->ecr;
1086 
1087 	if (events) {
1088 		VMXNET3_DEBUG(dp, 2, "events(0x%x)\n", events);
1089 		if (events & (VMXNET3_ECR_RQERR | VMXNET3_ECR_TQERR)) {
1090 			Vmxnet3_TxQueueDesc *tqdesc = VMXNET3_TQDESC(dp);
1091 			Vmxnet3_RxQueueDesc *rqdesc = VMXNET3_RQDESC(dp);
1092 
1093 			VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD,
1094 			    VMXNET3_CMD_GET_QUEUE_STATUS);
1095 			if (tqdesc->status.stopped) {
1096 				VMXNET3_WARN(dp, "tq error 0x%x\n",
1097 				    tqdesc->status.error);
1098 			}
1099 			if (rqdesc->status.stopped) {
1100 				VMXNET3_WARN(dp, "rq error 0x%x\n",
1101 				    rqdesc->status.error);
1102 			}
1103 
1104 			if (ddi_taskq_dispatch(dp->resetTask, vmxnet3_reset,
1105 			    dp, DDI_NOSLEEP) == DDI_SUCCESS) {
1106 				VMXNET3_WARN(dp, "reset scheduled\n");
1107 			} else {
1108 				VMXNET3_WARN(dp,
1109 				    "ddi_taskq_dispatch() failed\n");
1110 			}
1111 		}
1112 		if (events & VMXNET3_ECR_LINK) {
1113 			vmxnet3_refresh_linkstate(dp);
1114 			linkStateChanged = B_TRUE;
1115 		}
1116 		if (events & VMXNET3_ECR_DIC) {
1117 			VMXNET3_DEBUG(dp, 1, "device implementation change\n");
1118 		}
1119 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_ECR, events);
1120 	}
1121 
1122 	return (linkStateChanged);
1123 }
1124 
1125 /*
1126  * Interrupt handler of a vmxnet3 device.
1127  *
1128  * Returns:
1129  *	DDI_INTR_CLAIMED or DDI_INTR_UNCLAIMED.
1130  */
1131 /* ARGSUSED1 */
1132 static uint_t
1133 vmxnet3_intr(caddr_t data1, caddr_t data2)
1134 {
1135 	vmxnet3_softc_t *dp = (void *) data1;
1136 
1137 	VMXNET3_DEBUG(dp, 3, "intr()\n");
1138 
1139 	mutex_enter(&dp->intrLock);
1140 
1141 	if (dp->devEnabled) {
1142 		boolean_t linkStateChanged;
1143 		boolean_t mustUpdateTx;
1144 		mblk_t *mps;
1145 
1146 		if (dp->intrType == DDI_INTR_TYPE_FIXED &&
1147 		    !VMXNET3_BAR1_GET32(dp, VMXNET3_REG_ICR)) {
1148 			goto intr_unclaimed;
1149 		}
1150 
1151 		if (dp->intrMaskMode == VMXNET3_IMM_ACTIVE) {
1152 			VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 1);
1153 		}
1154 
1155 		linkStateChanged = vmxnet3_intr_events(dp);
1156 		mustUpdateTx = vmxnet3_tx_complete(dp, &dp->txQueue);
1157 		mps = vmxnet3_rx_intr(dp, &dp->rxQueue);
1158 
1159 		mutex_exit(&dp->intrLock);
1160 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 0);
1161 
1162 		if (linkStateChanged) {
1163 			mac_link_update(dp->mac, dp->linkState);
1164 		}
1165 		if (mustUpdateTx) {
1166 			mac_tx_update(dp->mac);
1167 		}
1168 		if (mps) {
1169 			mac_rx(dp->mac, NULL, mps);
1170 		}
1171 
1172 		return (DDI_INTR_CLAIMED);
1173 	}
1174 
1175 intr_unclaimed:
1176 	mutex_exit(&dp->intrLock);
1177 	return (DDI_INTR_UNCLAIMED);
1178 }
1179 
1180 static int
1181 vmxnet3_kstat_update(kstat_t *ksp, int rw)
1182 {
1183 	vmxnet3_softc_t *dp = ksp->ks_private;
1184 	vmxnet3_kstats_t *statp = ksp->ks_data;
1185 
1186 	if (rw == KSTAT_WRITE)
1187 		return (EACCES);
1188 
1189 	statp->reset_count.value.ul = dp->reset_count;
1190 	statp->tx_pullup_needed.value.ul = dp->tx_pullup_needed;
1191 	statp->tx_ring_full.value.ul = dp->tx_ring_full;
1192 	statp->rx_alloc_buf.value.ul = dp->rx_alloc_buf;
1193 	statp->rx_pool_empty.value.ul = dp->rx_pool_empty;
1194 	statp->rx_num_bufs.value.ul = dp->rx_num_bufs;
1195 
1196 	return (0);
1197 }
1198 
1199 static int
1200 vmxnet3_kstat_init(vmxnet3_softc_t *dp)
1201 {
1202 	vmxnet3_kstats_t *statp;
1203 
1204 	dp->devKstats = kstat_create(VMXNET3_MODNAME, dp->instance,
1205 	    "statistics", "dev",  KSTAT_TYPE_NAMED,
1206 	    sizeof (vmxnet3_kstats_t) / sizeof (kstat_named_t), 0);
1207 	if (dp->devKstats == NULL)
1208 		return (DDI_FAILURE);
1209 
1210 	dp->devKstats->ks_update = vmxnet3_kstat_update;
1211 	dp->devKstats->ks_private = dp;
1212 
1213 	statp = dp->devKstats->ks_data;
1214 
1215 	kstat_named_init(&statp->reset_count, "reset_count", KSTAT_DATA_ULONG);
1216 	kstat_named_init(&statp->tx_pullup_needed, "tx_pullup_needed",
1217 	    KSTAT_DATA_ULONG);
1218 	kstat_named_init(&statp->tx_ring_full, "tx_ring_full",
1219 	    KSTAT_DATA_ULONG);
1220 	kstat_named_init(&statp->rx_alloc_buf, "rx_alloc_buf",
1221 	    KSTAT_DATA_ULONG);
1222 	kstat_named_init(&statp->rx_pool_empty, "rx_pool_empty",
1223 	    KSTAT_DATA_ULONG);
1224 	kstat_named_init(&statp->rx_num_bufs, "rx_num_bufs",
1225 	    KSTAT_DATA_ULONG);
1226 
1227 	kstat_install(dp->devKstats);
1228 
1229 	return (DDI_SUCCESS);
1230 }
1231 
1232 /*
1233  * Probe and attach a vmxnet3 instance to the stack.
1234  *
1235  * Returns:
1236  *	DDI_SUCCESS or DDI_FAILURE.
1237  */
1238 static int
1239 vmxnet3_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1240 {
1241 	vmxnet3_softc_t *dp;
1242 	mac_register_t *macr;
1243 	uint16_t vendorId, devId, ret16;
1244 	uint32_t ret32;
1245 	int ret, err;
1246 	uint_t uret;
1247 
1248 	if (cmd != DDI_ATTACH) {
1249 		goto error;
1250 	}
1251 
1252 	/*
1253 	 * Allocate the soft state
1254 	 */
1255 	dp = kmem_zalloc(sizeof (vmxnet3_softc_t), KM_SLEEP);
1256 	ASSERT(dp);
1257 
1258 	dp->dip = dip;
1259 	dp->instance = ddi_get_instance(dip);
1260 	dp->cur_mtu = ETHERMTU;
1261 	dp->allow_jumbo = B_TRUE;
1262 	dp->alloc_ok = VMXNET3_ALLOC_OK(dp);
1263 
1264 	VMXNET3_DEBUG(dp, 1, "attach()\n");
1265 
1266 	ddi_set_driver_private(dip, dp);
1267 
1268 	/*
1269 	 * Get access to the PCI bus configuration space
1270 	 */
1271 	if (pci_config_setup(dip, &dp->pciHandle) != DDI_SUCCESS) {
1272 		VMXNET3_WARN(dp, "pci_config_setup() failed\n");
1273 		goto error_soft_state;
1274 	}
1275 
1276 	/*
1277 	 * Make sure the chip is a vmxnet3 device
1278 	 */
1279 	vendorId = pci_config_get16(dp->pciHandle, PCI_CONF_VENID);
1280 	devId = pci_config_get16(dp->pciHandle, PCI_CONF_DEVID);
1281 	if (vendorId != PCI_VENDOR_ID_VMWARE ||
1282 	    devId != PCI_DEVICE_ID_VMWARE_VMXNET3) {
1283 		VMXNET3_WARN(dp, "wrong PCI venid/devid (0x%x, 0x%x)\n",
1284 		    vendorId, devId);
1285 		goto error_pci_config;
1286 	}
1287 
1288 	/*
1289 	 * Make sure we can access the registers through the I/O space
1290 	 */
1291 	ret16 = pci_config_get16(dp->pciHandle, PCI_CONF_COMM);
1292 	ret16 |= PCI_COMM_IO | PCI_COMM_ME;
1293 	pci_config_put16(dp->pciHandle, PCI_CONF_COMM, ret16);
1294 
1295 	/*
1296 	 * Map the I/O space in memory
1297 	 */
1298 	if (ddi_regs_map_setup(dip, 1, &dp->bar0, 0, 0, &vmxnet3_dev_attr,
1299 	    &dp->bar0Handle) != DDI_SUCCESS) {
1300 		VMXNET3_WARN(dp, "ddi_regs_map_setup() for BAR0 failed\n");
1301 		goto error_pci_config;
1302 	}
1303 
1304 	if (ddi_regs_map_setup(dip, 2, &dp->bar1, 0, 0, &vmxnet3_dev_attr,
1305 	    &dp->bar1Handle) != DDI_SUCCESS) {
1306 		VMXNET3_WARN(dp, "ddi_regs_map_setup() for BAR1 failed\n");
1307 		goto error_regs_map_0;
1308 	}
1309 
1310 	/*
1311 	 * Check the version number of the virtual device
1312 	 */
1313 	if (VMXNET3_BAR1_GET32(dp, VMXNET3_REG_VRRS) & 1) {
1314 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_VRRS, 1);
1315 	} else {
1316 		VMXNET3_WARN(dp, "incompatible h/w version\n");
1317 		goto error_regs_map_1;
1318 	}
1319 
1320 	if (VMXNET3_BAR1_GET32(dp, VMXNET3_REG_UVRS) & 1) {
1321 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_UVRS, 1);
1322 	} else {
1323 		VMXNET3_WARN(dp, "incompatible upt version\n");
1324 		goto error_regs_map_1;
1325 	}
1326 
1327 	if (vmxnet3_kstat_init(dp) != DDI_SUCCESS) {
1328 		VMXNET3_WARN(dp, "unable to initialize kstats");
1329 		goto error_regs_map_1;
1330 	}
1331 
1332 	/*
1333 	 * Read the MAC address from the device
1334 	 */
1335 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_MACL);
1336 	*((uint32_t *)(dp->macaddr + 0)) = ret32;
1337 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_MACH);
1338 	*((uint16_t *)(dp->macaddr + 4)) = ret32;
1339 
1340 	/*
1341 	 * Register with the MAC framework
1342 	 */
1343 	if (!(macr = mac_alloc(MAC_VERSION))) {
1344 		VMXNET3_WARN(dp, "mac_alloc() failed\n");
1345 		goto error_kstat;
1346 	}
1347 
1348 	macr->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1349 	macr->m_driver = dp;
1350 	macr->m_dip = dip;
1351 	macr->m_instance = 0;
1352 	macr->m_src_addr = dp->macaddr;
1353 	macr->m_dst_addr = NULL;
1354 	macr->m_callbacks = &vmxnet3_mac_callbacks;
1355 	macr->m_min_sdu = 0;
1356 	macr->m_max_sdu = ETHERMTU;
1357 	macr->m_margin = VLAN_TAGSZ;
1358 	macr->m_pdata = NULL;
1359 	macr->m_pdata_size = 0;
1360 
1361 	ret = mac_register(macr, &dp->mac);
1362 	mac_free(macr);
1363 	if (ret != DDI_SUCCESS) {
1364 		VMXNET3_WARN(dp, "mac_register() failed\n");
1365 		goto error_kstat;
1366 	}
1367 
1368 	/*
1369 	 * Register the interrupt(s) in this order of preference:
1370 	 * MSI-X, MSI, INTx
1371 	 */
1372 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_CONF_INTR);
1373 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
1374 	switch (ret32 & 0x3) {
1375 	case VMXNET3_IT_AUTO:
1376 	case VMXNET3_IT_MSIX:
1377 		dp->intrType = DDI_INTR_TYPE_MSIX;
1378 		err = ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1379 		    &ret, DDI_INTR_ALLOC_STRICT);
1380 		if (err == DDI_SUCCESS)
1381 			break;
1382 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_MSIX failed, err:%d\n",
1383 		    err);
1384 		/* FALLTHROUGH */
1385 	case VMXNET3_IT_MSI:
1386 		dp->intrType = DDI_INTR_TYPE_MSI;
1387 		if (ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1388 		    &ret, DDI_INTR_ALLOC_STRICT) == DDI_SUCCESS)
1389 			break;
1390 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_MSI failed\n");
1391 		/* FALLTHROUGH */
1392 	case VMXNET3_IT_INTX:
1393 		dp->intrType = DDI_INTR_TYPE_FIXED;
1394 		if (ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1395 		    &ret, DDI_INTR_ALLOC_STRICT) == DDI_SUCCESS) {
1396 			break;
1397 		}
1398 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_INTX failed\n");
1399 		/* FALLTHROUGH */
1400 	default:
1401 		VMXNET3_WARN(dp, "ddi_intr_alloc() failed\n");
1402 		goto error_mac;
1403 	}
1404 	dp->intrMaskMode = (ret32 >> 2) & 0x3;
1405 	if (dp->intrMaskMode == VMXNET3_IMM_LAZY) {
1406 		VMXNET3_WARN(dp, "Lazy masking is not supported\n");
1407 		goto error_intr;
1408 	}
1409 
1410 	if (ddi_intr_get_pri(dp->intrHandle, &uret) != DDI_SUCCESS) {
1411 		VMXNET3_WARN(dp, "ddi_intr_get_pri() failed\n");
1412 		goto error_intr;
1413 	}
1414 
1415 	VMXNET3_DEBUG(dp, 2, "intrType=0x%x, intrMaskMode=0x%x, intrPrio=%u\n",
1416 	    dp->intrType, dp->intrMaskMode, uret);
1417 
1418 	/*
1419 	 * Create a task queue to reset the device if it wedges.
1420 	 */
1421 	dp->resetTask = ddi_taskq_create(dip, "vmxnet3_reset_task", 1,
1422 	    TASKQ_DEFAULTPRI, 0);
1423 	if (!dp->resetTask) {
1424 		VMXNET3_WARN(dp, "ddi_taskq_create() failed()\n");
1425 		goto error_intr;
1426 	}
1427 
1428 	/*
1429 	 * Initialize our mutexes now that we know the interrupt priority
1430 	 * This _must_ be done before ddi_intr_enable()
1431 	 */
1432 	mutex_init(&dp->intrLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1433 	mutex_init(&dp->txLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1434 	mutex_init(&dp->rxPoolLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1435 
1436 	if (ddi_intr_add_handler(dp->intrHandle, vmxnet3_intr,
1437 	    dp, NULL) != DDI_SUCCESS) {
1438 		VMXNET3_WARN(dp, "ddi_intr_add_handler() failed\n");
1439 		goto error_mutexes;
1440 	}
1441 
1442 	err = ddi_intr_get_cap(dp->intrHandle, &dp->intrCap);
1443 	if (err != DDI_SUCCESS) {
1444 		VMXNET3_WARN(dp, "ddi_intr_get_cap() failed %d", err);
1445 		goto error_intr_handler;
1446 	}
1447 
1448 	if (dp->intrCap & DDI_INTR_FLAG_BLOCK) {
1449 		err = ddi_intr_block_enable(&dp->intrHandle, 1);
1450 		if (err != DDI_SUCCESS) {
1451 			VMXNET3_WARN(dp, "ddi_intr_block_enable() failed, "
1452 			    "err:%d\n", err);
1453 			goto error_intr_handler;
1454 		}
1455 	} else {
1456 		err = ddi_intr_enable(dp->intrHandle);
1457 		if ((err != DDI_SUCCESS)) {
1458 			VMXNET3_WARN(dp, "ddi_intr_enable() failed, err:%d\n",
1459 			    err);
1460 			goto error_intr_handler;
1461 		}
1462 	}
1463 
1464 	return (DDI_SUCCESS);
1465 
1466 error_intr_handler:
1467 	(void) ddi_intr_remove_handler(dp->intrHandle);
1468 error_mutexes:
1469 	mutex_destroy(&dp->rxPoolLock);
1470 	mutex_destroy(&dp->txLock);
1471 	mutex_destroy(&dp->intrLock);
1472 	ddi_taskq_destroy(dp->resetTask);
1473 error_intr:
1474 	(void) ddi_intr_free(dp->intrHandle);
1475 error_mac:
1476 	(void) mac_unregister(dp->mac);
1477 error_kstat:
1478 	kstat_delete(dp->devKstats);
1479 error_regs_map_1:
1480 	ddi_regs_map_free(&dp->bar1Handle);
1481 error_regs_map_0:
1482 	ddi_regs_map_free(&dp->bar0Handle);
1483 error_pci_config:
1484 	pci_config_teardown(&dp->pciHandle);
1485 error_soft_state:
1486 	kmem_free(dp, sizeof (vmxnet3_softc_t));
1487 error:
1488 	return (DDI_FAILURE);
1489 }
1490 
1491 /*
1492  * Detach a vmxnet3 instance from the stack.
1493  *
1494  * Returns:
1495  *	DDI_SUCCESS or DDI_FAILURE.
1496  */
1497 static int
1498 vmxnet3_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1499 {
1500 	vmxnet3_softc_t *dp = ddi_get_driver_private(dip);
1501 	unsigned int retries = 0;
1502 	int ret;
1503 
1504 	VMXNET3_DEBUG(dp, 1, "detach()\n");
1505 
1506 	if (cmd != DDI_DETACH) {
1507 		return (DDI_FAILURE);
1508 	}
1509 
1510 	while (dp->rx_num_bufs > 0) {
1511 		if (retries++ < 10) {
1512 			VMXNET3_WARN(dp, "rx pending (%u), waiting 1 second\n",
1513 			    dp->rx_num_bufs);
1514 			delay(drv_usectohz(1000000));
1515 		} else {
1516 			VMXNET3_WARN(dp, "giving up\n");
1517 			return (DDI_FAILURE);
1518 		}
1519 	}
1520 
1521 	if (dp->intrCap & DDI_INTR_FLAG_BLOCK) {
1522 		ret = ddi_intr_block_disable(&dp->intrHandle, 1);
1523 	} else {
1524 		ret = ddi_intr_disable(dp->intrHandle);
1525 	}
1526 	if (ret != DDI_SUCCESS) {
1527 		VMXNET3_WARN(dp, "unable to disable interrupts");
1528 		return (DDI_FAILURE);
1529 	}
1530 	if (ddi_intr_remove_handler(dp->intrHandle) != DDI_SUCCESS) {
1531 		VMXNET3_WARN(dp, "unable to remove interrupt handler");
1532 		return (DDI_FAILURE);
1533 	}
1534 	(void) ddi_intr_free(dp->intrHandle);
1535 
1536 	VERIFY(mac_unregister(dp->mac) == 0);
1537 
1538 	kstat_delete(dp->devKstats);
1539 
1540 	if (dp->mfTable.buf) {
1541 		vmxnet3_free_dma_mem(&dp->mfTable);
1542 	}
1543 
1544 	mutex_destroy(&dp->rxPoolLock);
1545 	mutex_destroy(&dp->txLock);
1546 	mutex_destroy(&dp->intrLock);
1547 	ddi_taskq_destroy(dp->resetTask);
1548 
1549 	ddi_regs_map_free(&dp->bar1Handle);
1550 	ddi_regs_map_free(&dp->bar0Handle);
1551 	pci_config_teardown(&dp->pciHandle);
1552 
1553 	kmem_free(dp, sizeof (vmxnet3_softc_t));
1554 
1555 	return (DDI_SUCCESS);
1556 }
1557 
1558 /*
1559  * Structures used by the module loader
1560  */
1561 
1562 #define	VMXNET3_IDENT "VMware Ethernet v3 " VMXNET3_DRIVER_VERSION_STRING
1563 
1564 DDI_DEFINE_STREAM_OPS(
1565 	vmxnet3_dev_ops,
1566 	nulldev,
1567 	nulldev,
1568 	vmxnet3_attach,
1569 	vmxnet3_detach,
1570 	nodev,
1571 	NULL,
1572 	D_NEW | D_MP,
1573 	NULL,
1574 	ddi_quiesce_not_supported);
1575 
1576 static struct modldrv vmxnet3_modldrv = {
1577 	&mod_driverops,		/* drv_modops */
1578 	VMXNET3_IDENT,		/* drv_linkinfo */
1579 	&vmxnet3_dev_ops	/* drv_dev_ops */
1580 };
1581 
1582 static struct modlinkage vmxnet3_modlinkage = {
1583 	MODREV_1,			/* ml_rev */
1584 	{ &vmxnet3_modldrv, NULL }	/* ml_linkage */
1585 };
1586 
1587 /* Module load entry point */
1588 int
1589 _init(void)
1590 {
1591 	int ret;
1592 
1593 	mac_init_ops(&vmxnet3_dev_ops, VMXNET3_MODNAME);
1594 	ret = mod_install(&vmxnet3_modlinkage);
1595 	if (ret != DDI_SUCCESS) {
1596 		mac_fini_ops(&vmxnet3_dev_ops);
1597 	}
1598 
1599 	return (ret);
1600 }
1601 
1602 /* Module unload entry point */
1603 int
1604 _fini(void)
1605 {
1606 	int ret;
1607 
1608 	ret = mod_remove(&vmxnet3_modlinkage);
1609 	if (ret == DDI_SUCCESS) {
1610 		mac_fini_ops(&vmxnet3_dev_ops);
1611 	}
1612 
1613 	return (ret);
1614 }
1615 
1616 /* Module info entry point */
1617 int
1618 _info(struct modinfo *modinfop)
1619 {
1620 	return (mod_info(&vmxnet3_modlinkage, modinfop));
1621 }
1622