xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_main.c (revision a536a2a3182b56eef2630fb4b4509c9e106655e6)
1 /*
2  * Copyright (C) 2007-2014 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 /*
23  * This driver is based on VMware's version 3227872, and contains additional
24  * enhancements (see README.txt).
25  */
26 #define	BUILD_NUMBER_NUMERIC	3227872
27 
28 /*
29  * If we run out of rxPool buffers, only allocate if the MTU is <= PAGESIZE
30  * so that we don't have to incur the cost of allocating multiple contiguous
31  * pages (very slow) in interrupt context.
32  */
33 #define	VMXNET3_ALLOC_OK(dp)	((dp)->cur_mtu <= PAGESIZE)
34 
35 /*
36  * TODO:
37  *    - Tx data ring
38  *    - MAC_CAPAB_POLL support
39  *    - Dynamic RX pool
40  */
41 
42 static int vmxnet3_getstat(void *, uint_t, uint64_t *);
43 static int vmxnet3_start(void *);
44 static void vmxnet3_stop(void *);
45 static int vmxnet3_setpromisc(void *, boolean_t);
46 static void vmxnet3_ioctl(void *arg, queue_t *wq, mblk_t *mp);
47 static int vmxnet3_multicst(void *, boolean_t, const uint8_t *);
48 static int vmxnet3_unicst(void *, const uint8_t *);
49 static boolean_t vmxnet3_getcapab(void *, mac_capab_t, void *);
50 static int vmxnet3_get_prop(void *, const char *, mac_prop_id_t, uint_t,
51     void *);
52 static int vmxnet3_set_prop(void *, const char *, mac_prop_id_t, uint_t,
53     const void *);
54 static void vmxnet3_prop_info(void *, const char *, mac_prop_id_t,
55     mac_prop_info_handle_t);
56 
57 int vmxnet3s_debug = 0;
58 
59 /* MAC callbacks */
60 static mac_callbacks_t vmxnet3_mac_callbacks = {
61 	.mc_callbacks =	MC_GETCAPAB | MC_IOCTL | MC_SETPROP | MC_PROPINFO,
62 	.mc_getstat =	vmxnet3_getstat,
63 	.mc_start =	vmxnet3_start,
64 	.mc_stop =	vmxnet3_stop,
65 	.mc_setpromisc = vmxnet3_setpromisc,
66 	.mc_multicst =	vmxnet3_multicst,
67 	.mc_unicst =	vmxnet3_unicst,
68 	.mc_tx =	vmxnet3_tx,
69 	.mc_ioctl =	vmxnet3_ioctl,
70 	.mc_getcapab =	vmxnet3_getcapab,
71 	.mc_getprop =	vmxnet3_get_prop,
72 	.mc_setprop =	vmxnet3_set_prop,
73 	.mc_propinfo =	vmxnet3_prop_info
74 };
75 
76 /* Tx DMA engine description */
77 static ddi_dma_attr_t vmxnet3_dma_attrs_tx = {
78 	.dma_attr_version =	DMA_ATTR_V0,
79 	.dma_attr_addr_lo =	0x0000000000000000ull,
80 	.dma_attr_addr_hi =	0xFFFFFFFFFFFFFFFFull,
81 	.dma_attr_count_max =	0xFFFFFFFFFFFFFFFFull,
82 	.dma_attr_align =	0x0000000000000001ull,
83 	.dma_attr_burstsizes =	0x0000000000000001ull,
84 	.dma_attr_minxfer =	0x00000001,
85 	.dma_attr_maxxfer =	0x000000000000FFFFull,
86 	.dma_attr_seg =		0xFFFFFFFFFFFFFFFFull,
87 	.dma_attr_sgllen =	-1,
88 	.dma_attr_granular =	0x00000001,
89 	.dma_attr_flags =	0
90 };
91 
92 /* --- */
93 
94 /*
95  * Fetch the statistics of a vmxnet3 device.
96  *
97  * Returns:
98  *	0 on success, non-zero on failure.
99  */
100 static int
101 vmxnet3_getstat(void *data, uint_t stat, uint64_t *val)
102 {
103 	vmxnet3_softc_t *dp = data;
104 	UPT1_TxStats *txStats;
105 	UPT1_RxStats *rxStats;
106 
107 	VMXNET3_DEBUG(dp, 3, "getstat(%u)\n", stat);
108 
109 	if (!dp->devEnabled) {
110 		return (EBUSY);
111 	}
112 
113 	txStats = &VMXNET3_TQDESC(dp)->stats;
114 	rxStats = &VMXNET3_RQDESC(dp)->stats;
115 
116 	/*
117 	 * First touch the related register
118 	 */
119 	switch (stat) {
120 	case MAC_STAT_MULTIRCV:
121 	case MAC_STAT_BRDCSTRCV:
122 	case MAC_STAT_MULTIXMT:
123 	case MAC_STAT_BRDCSTXMT:
124 	case MAC_STAT_NORCVBUF:
125 	case MAC_STAT_IERRORS:
126 	case MAC_STAT_NOXMTBUF:
127 	case MAC_STAT_OERRORS:
128 	case MAC_STAT_RBYTES:
129 	case MAC_STAT_IPACKETS:
130 	case MAC_STAT_OBYTES:
131 	case MAC_STAT_OPACKETS:
132 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
133 		break;
134 	case MAC_STAT_IFSPEED:
135 	case MAC_STAT_COLLISIONS:
136 	case ETHER_STAT_LINK_DUPLEX:
137 		/* nothing */
138 		break;
139 	default:
140 		return (ENOTSUP);
141 	}
142 
143 	/*
144 	 * Then fetch the corresponding stat
145 	 */
146 	switch (stat) {
147 	case MAC_STAT_IFSPEED:
148 		*val = dp->linkSpeed;
149 		break;
150 	case MAC_STAT_MULTIRCV:
151 		*val = rxStats->mcastPktsRxOK;
152 		break;
153 	case MAC_STAT_BRDCSTRCV:
154 		*val = rxStats->bcastPktsRxOK;
155 		break;
156 	case MAC_STAT_MULTIXMT:
157 		*val = txStats->mcastPktsTxOK;
158 		break;
159 	case MAC_STAT_BRDCSTXMT:
160 		*val = txStats->bcastPktsTxOK;
161 		break;
162 	case MAC_STAT_NORCVBUF:
163 		*val = rxStats->pktsRxOutOfBuf + dp->rx_alloc_failed;
164 		break;
165 	case MAC_STAT_IERRORS:
166 		*val = rxStats->pktsRxError;
167 		break;
168 	case MAC_STAT_NOXMTBUF:
169 		*val = txStats->pktsTxDiscard + dp->tx_pullup_failed;
170 		break;
171 	case MAC_STAT_OERRORS:
172 		*val = txStats->pktsTxError + dp->tx_error;
173 		break;
174 	case MAC_STAT_COLLISIONS:
175 		*val = 0;
176 		break;
177 	case MAC_STAT_RBYTES:
178 		*val = rxStats->ucastBytesRxOK + rxStats->mcastBytesRxOK +
179 		    rxStats->bcastBytesRxOK;
180 		break;
181 	case MAC_STAT_IPACKETS:
182 		*val = rxStats->ucastPktsRxOK + rxStats->mcastPktsRxOK +
183 		    rxStats->bcastPktsRxOK;
184 		break;
185 	case MAC_STAT_OBYTES:
186 		*val = txStats->ucastBytesTxOK + txStats->mcastBytesTxOK +
187 		    txStats->bcastBytesTxOK;
188 		break;
189 	case MAC_STAT_OPACKETS:
190 		*val = txStats->ucastPktsTxOK + txStats->mcastPktsTxOK +
191 		    txStats->bcastPktsTxOK;
192 		break;
193 	case ETHER_STAT_LINK_DUPLEX:
194 		*val = LINK_DUPLEX_FULL;
195 		break;
196 	default:
197 		ASSERT(B_FALSE);
198 	}
199 
200 	return (0);
201 }
202 
203 /*
204  * Allocate and initialize the shared data structures of a vmxnet3 device.
205  *
206  * Returns:
207  *	0 on sucess, non-zero on failure.
208  */
209 static int
210 vmxnet3_prepare_drivershared(vmxnet3_softc_t *dp)
211 {
212 	Vmxnet3_DriverShared *ds;
213 	size_t allocSize = sizeof (Vmxnet3_DriverShared);
214 	int err;
215 
216 	if ((err = vmxnet3_alloc_dma_mem_1(dp, &dp->sharedData, allocSize,
217 	    B_TRUE)) != 0) {
218 		return (err);
219 	}
220 	ds = VMXNET3_DS(dp);
221 	(void) memset(ds, 0, allocSize);
222 
223 	allocSize = sizeof (Vmxnet3_TxQueueDesc) + sizeof (Vmxnet3_RxQueueDesc);
224 	if ((err = vmxnet3_alloc_dma_mem_128(dp, &dp->queueDescs, allocSize,
225 	    B_TRUE)) != 0) {
226 		vmxnet3_free_dma_mem(&dp->sharedData);
227 		return (err);
228 	}
229 	(void) memset(dp->queueDescs.buf, 0, allocSize);
230 
231 	ds->magic = VMXNET3_REV1_MAGIC;
232 
233 	/* Take care of most of devRead */
234 	ds->devRead.misc.driverInfo.version = BUILD_NUMBER_NUMERIC;
235 #ifdef _LP64
236 	ds->devRead.misc.driverInfo.gos.gosBits = VMXNET3_GOS_BITS_64;
237 #else
238 	ds->devRead.misc.driverInfo.gos.gosBits = VMXNET3_GOS_BITS_32;
239 #endif
240 	ds->devRead.misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_SOLARIS;
241 	ds->devRead.misc.driverInfo.gos.gosVer = 10;
242 	ds->devRead.misc.driverInfo.vmxnet3RevSpt = 1;
243 	ds->devRead.misc.driverInfo.uptVerSpt = 1;
244 
245 	ds->devRead.misc.uptFeatures = UPT1_F_RXCSUM;
246 	ds->devRead.misc.mtu = dp->cur_mtu;
247 
248 	/* XXX: ds->devRead.misc.maxNumRxSG */
249 	ds->devRead.misc.numTxQueues = 1;
250 	ds->devRead.misc.numRxQueues = 1;
251 	ds->devRead.misc.queueDescPA = dp->queueDescs.bufPA;
252 	ds->devRead.misc.queueDescLen = allocSize;
253 
254 	/* TxQueue and RxQueue information is filled in other functions */
255 	ds->devRead.intrConf.autoMask = (dp->intrMaskMode == VMXNET3_IMM_AUTO);
256 	ds->devRead.intrConf.numIntrs = 1;
257 	/* XXX: ds->intr.modLevels */
258 	ds->devRead.intrConf.eventIntrIdx = 0;
259 
260 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAL,
261 	    VMXNET3_ADDR_LO(dp->sharedData.bufPA));
262 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAH,
263 	    VMXNET3_ADDR_HI(dp->sharedData.bufPA));
264 
265 	return (0);
266 }
267 
268 /*
269  * Destroy the shared data structures of a vmxnet3 device.
270  */
271 static void
272 vmxnet3_destroy_drivershared(vmxnet3_softc_t *dp)
273 {
274 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAL, 0);
275 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_DSAH, 0);
276 
277 	vmxnet3_free_dma_mem(&dp->queueDescs);
278 	vmxnet3_free_dma_mem(&dp->sharedData);
279 }
280 
281 /*
282  * Allocate and initialize the command ring of a queue.
283  *
284  * Returns:
285  *	0 on success, non-zero on error.
286  */
287 static int
288 vmxnet3_alloc_cmdring(vmxnet3_softc_t *dp, vmxnet3_cmdring_t *cmdRing)
289 {
290 	size_t ringSize = cmdRing->size * sizeof (Vmxnet3_TxDesc);
291 	int err;
292 
293 	if ((err = vmxnet3_alloc_dma_mem_512(dp, &cmdRing->dma, ringSize,
294 	    B_TRUE)) != 0) {
295 		return (err);
296 	}
297 	(void) memset(cmdRing->dma.buf, 0, ringSize);
298 	cmdRing->avail = cmdRing->size;
299 	cmdRing->next2fill = 0;
300 	cmdRing->gen = VMXNET3_INIT_GEN;
301 
302 	return (0);
303 }
304 
305 /*
306  * Allocate and initialize the completion ring of a queue.
307  *
308  * Returns:
309  *    DDI_SUCCESS or DDI_FAILURE.
310  */
311 static int
312 vmxnet3_alloc_compring(vmxnet3_softc_t *dp, vmxnet3_compring_t *compRing)
313 {
314 	size_t ringSize = compRing->size * sizeof (Vmxnet3_TxCompDesc);
315 
316 	if (vmxnet3_alloc_dma_mem_512(dp, &compRing->dma, ringSize,
317 	    B_TRUE) != DDI_SUCCESS) {
318 		return (DDI_FAILURE);
319 	}
320 	(void) memset(compRing->dma.buf, 0, ringSize);
321 	compRing->next2comp = 0;
322 	compRing->gen = VMXNET3_INIT_GEN;
323 
324 	return (DDI_SUCCESS);
325 }
326 
327 /*
328  * Initialize the tx queue of a vmxnet3 device.
329  *
330  * Returns:
331  *	0 on success, non-zero on failure.
332  */
333 static int
334 vmxnet3_prepare_txqueue(vmxnet3_softc_t *dp)
335 {
336 	Vmxnet3_TxQueueDesc *tqdesc = VMXNET3_TQDESC(dp);
337 	vmxnet3_txqueue_t *txq = &dp->txQueue;
338 	int err;
339 
340 	ASSERT(!(txq->cmdRing.size & VMXNET3_RING_SIZE_MASK));
341 	ASSERT(!(txq->compRing.size & VMXNET3_RING_SIZE_MASK));
342 	ASSERT(!txq->cmdRing.dma.buf && !txq->compRing.dma.buf);
343 
344 	if ((err = vmxnet3_alloc_cmdring(dp, &txq->cmdRing)) != 0) {
345 		goto error;
346 	}
347 	tqdesc->conf.txRingBasePA = txq->cmdRing.dma.bufPA;
348 	tqdesc->conf.txRingSize = txq->cmdRing.size;
349 	tqdesc->conf.dataRingBasePA = 0;
350 	tqdesc->conf.dataRingSize = 0;
351 
352 	if ((err = vmxnet3_alloc_compring(dp, &txq->compRing)) != 0) {
353 		goto error_cmdring;
354 	}
355 	tqdesc->conf.compRingBasePA = txq->compRing.dma.bufPA;
356 	tqdesc->conf.compRingSize = txq->compRing.size;
357 
358 	txq->metaRing = kmem_zalloc(txq->cmdRing.size *
359 	    sizeof (vmxnet3_metatx_t), KM_SLEEP);
360 	ASSERT(txq->metaRing);
361 
362 	if ((err = vmxnet3_txqueue_init(dp, txq)) != 0) {
363 		goto error_mpring;
364 	}
365 
366 	return (0);
367 
368 error_mpring:
369 	kmem_free(txq->metaRing, txq->cmdRing.size * sizeof (vmxnet3_metatx_t));
370 	vmxnet3_free_dma_mem(&txq->compRing.dma);
371 error_cmdring:
372 	vmxnet3_free_dma_mem(&txq->cmdRing.dma);
373 error:
374 	return (err);
375 }
376 
377 /*
378  * Initialize the rx queue of a vmxnet3 device.
379  *
380  * Returns:
381  *	0 on success, non-zero on failure.
382  */
383 static int
384 vmxnet3_prepare_rxqueue(vmxnet3_softc_t *dp)
385 {
386 	Vmxnet3_RxQueueDesc *rqdesc = VMXNET3_RQDESC(dp);
387 	vmxnet3_rxqueue_t *rxq = &dp->rxQueue;
388 	int err = 0;
389 
390 	ASSERT(!(rxq->cmdRing.size & VMXNET3_RING_SIZE_MASK));
391 	ASSERT(!(rxq->compRing.size & VMXNET3_RING_SIZE_MASK));
392 	ASSERT(!rxq->cmdRing.dma.buf && !rxq->compRing.dma.buf);
393 
394 	if ((err = vmxnet3_alloc_cmdring(dp, &rxq->cmdRing)) != 0) {
395 		goto error;
396 	}
397 	rqdesc->conf.rxRingBasePA[0] = rxq->cmdRing.dma.bufPA;
398 	rqdesc->conf.rxRingSize[0] = rxq->cmdRing.size;
399 	rqdesc->conf.rxRingBasePA[1] = 0;
400 	rqdesc->conf.rxRingSize[1] = 0;
401 
402 	if ((err = vmxnet3_alloc_compring(dp, &rxq->compRing)) != 0) {
403 		goto error_cmdring;
404 	}
405 	rqdesc->conf.compRingBasePA = rxq->compRing.dma.bufPA;
406 	rqdesc->conf.compRingSize = rxq->compRing.size;
407 
408 	rxq->bufRing = kmem_zalloc(rxq->cmdRing.size *
409 	    sizeof (vmxnet3_bufdesc_t), KM_SLEEP);
410 	ASSERT(rxq->bufRing);
411 
412 	if ((err = vmxnet3_rxqueue_init(dp, rxq)) != 0) {
413 		goto error_bufring;
414 	}
415 
416 	return (0);
417 
418 error_bufring:
419 	kmem_free(rxq->bufRing, rxq->cmdRing.size * sizeof (vmxnet3_bufdesc_t));
420 	vmxnet3_free_dma_mem(&rxq->compRing.dma);
421 error_cmdring:
422 	vmxnet3_free_dma_mem(&rxq->cmdRing.dma);
423 error:
424 	return (err);
425 }
426 
427 /*
428  * Destroy the tx queue of a vmxnet3 device.
429  */
430 static void
431 vmxnet3_destroy_txqueue(vmxnet3_softc_t *dp)
432 {
433 	vmxnet3_txqueue_t *txq = &dp->txQueue;
434 
435 	ASSERT(txq->metaRing);
436 	ASSERT(txq->cmdRing.dma.buf && txq->compRing.dma.buf);
437 
438 	vmxnet3_txqueue_fini(dp, txq);
439 
440 	kmem_free(txq->metaRing, txq->cmdRing.size * sizeof (vmxnet3_metatx_t));
441 
442 	vmxnet3_free_dma_mem(&txq->cmdRing.dma);
443 	vmxnet3_free_dma_mem(&txq->compRing.dma);
444 }
445 
446 /*
447  * Destroy the rx queue of a vmxnet3 device.
448  */
449 static void
450 vmxnet3_destroy_rxqueue(vmxnet3_softc_t *dp)
451 {
452 	vmxnet3_rxqueue_t *rxq = &dp->rxQueue;
453 
454 	ASSERT(rxq->bufRing);
455 	ASSERT(rxq->cmdRing.dma.buf && rxq->compRing.dma.buf);
456 
457 	vmxnet3_rxqueue_fini(dp, rxq);
458 
459 	kmem_free(rxq->bufRing, rxq->cmdRing.size * sizeof (vmxnet3_bufdesc_t));
460 
461 	vmxnet3_free_dma_mem(&rxq->cmdRing.dma);
462 	vmxnet3_free_dma_mem(&rxq->compRing.dma);
463 }
464 
465 /*
466  * Apply new RX filters settings to a vmxnet3 device.
467  */
468 static void
469 vmxnet3_refresh_rxfilter(vmxnet3_softc_t *dp)
470 {
471 	Vmxnet3_DriverShared *ds = VMXNET3_DS(dp);
472 
473 	ds->devRead.rxFilterConf.rxMode = dp->rxMode;
474 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_RX_MODE);
475 }
476 
477 /*
478  * Fetch the link state of a vmxnet3 device.
479  */
480 static void
481 vmxnet3_refresh_linkstate(vmxnet3_softc_t *dp)
482 {
483 	uint32_t ret32;
484 
485 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
486 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
487 	if (ret32 & 1) {
488 		dp->linkState = LINK_STATE_UP;
489 		dp->linkSpeed = (ret32 >> 16) * 1000000ULL;
490 	} else {
491 		dp->linkState = LINK_STATE_DOWN;
492 		dp->linkSpeed = 0;
493 	}
494 }
495 
496 /*
497  * Start a vmxnet3 device: allocate and initialize the shared data
498  * structures and send a start command to the device.
499  *
500  * Returns:
501  *	0 on success, non-zero error on failure.
502  */
503 static int
504 vmxnet3_start(void *data)
505 {
506 	vmxnet3_softc_t *dp = data;
507 	Vmxnet3_TxQueueDesc *tqdesc;
508 	Vmxnet3_RxQueueDesc *rqdesc;
509 	int txQueueSize, rxQueueSize;
510 	uint32_t ret32;
511 	int err, dmaerr;
512 
513 	VMXNET3_DEBUG(dp, 1, "start()\n");
514 
515 	/*
516 	 * Allocate vmxnet3's shared data and advertise its PA
517 	 */
518 	if ((err = vmxnet3_prepare_drivershared(dp)) != 0) {
519 		VMXNET3_WARN(dp, "vmxnet3_prepare_drivershared() failed: %d",
520 		    err);
521 		goto error;
522 	}
523 	tqdesc = VMXNET3_TQDESC(dp);
524 	rqdesc = VMXNET3_RQDESC(dp);
525 
526 	/*
527 	 * Create and initialize the tx queue
528 	 */
529 	txQueueSize = vmxnet3_getprop(dp, "TxRingSize", 32, 4096,
530 	    VMXNET3_DEF_TX_RING_SIZE);
531 	if (!(txQueueSize & VMXNET3_RING_SIZE_MASK)) {
532 		dp->txQueue.cmdRing.size = txQueueSize;
533 		dp->txQueue.compRing.size = txQueueSize;
534 		dp->txQueue.sharedCtrl = &tqdesc->ctrl;
535 		if ((err = vmxnet3_prepare_txqueue(dp)) != 0) {
536 			VMXNET3_WARN(dp, "vmxnet3_prepare_txqueue() failed: %d",
537 			    err);
538 			goto error_shared_data;
539 		}
540 	} else {
541 		VMXNET3_WARN(dp, "invalid tx ring size (%d)\n", txQueueSize);
542 		err = EINVAL;
543 		goto error_shared_data;
544 	}
545 
546 	/*
547 	 * Create and initialize the rx queue
548 	 */
549 	rxQueueSize = vmxnet3_getprop(dp, "RxRingSize", 32, 4096,
550 	    VMXNET3_DEF_RX_RING_SIZE);
551 	if (!(rxQueueSize & VMXNET3_RING_SIZE_MASK)) {
552 		dp->rxQueue.cmdRing.size = rxQueueSize;
553 		dp->rxQueue.compRing.size = rxQueueSize;
554 		dp->rxQueue.sharedCtrl = &rqdesc->ctrl;
555 		if ((err = vmxnet3_prepare_rxqueue(dp)) != 0) {
556 			VMXNET3_WARN(dp, "vmxnet3_prepare_rxqueue() failed: %d",
557 			    err);
558 			goto error_tx_queue;
559 		}
560 	} else {
561 		VMXNET3_WARN(dp, "invalid rx ring size (%d)\n", rxQueueSize);
562 		err = EINVAL;
563 		goto error_tx_queue;
564 	}
565 
566 	/*
567 	 * Allocate the Tx DMA handle
568 	 */
569 	if ((dmaerr = ddi_dma_alloc_handle(dp->dip, &vmxnet3_dma_attrs_tx,
570 	    DDI_DMA_SLEEP, NULL, &dp->txDmaHandle)) != DDI_SUCCESS) {
571 		VMXNET3_WARN(dp, "ddi_dma_alloc_handle() failed: %d", dmaerr);
572 		err = vmxnet3_dmaerr2errno(dmaerr);
573 		goto error_rx_queue;
574 	}
575 
576 	/*
577 	 * Activate the device
578 	 */
579 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV);
580 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
581 	if (ret32) {
582 		VMXNET3_WARN(dp, "ACTIVATE_DEV failed: 0x%x\n", ret32);
583 		err = ENXIO;
584 		goto error_txhandle;
585 	}
586 	dp->devEnabled = B_TRUE;
587 
588 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD,
589 	    dp->txQueue.cmdRing.size - 1);
590 
591 	/*
592 	 * Update the RX filters, must be done after ACTIVATE_DEV
593 	 */
594 	dp->rxMode = VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST;
595 	vmxnet3_refresh_rxfilter(dp);
596 
597 	/*
598 	 * Get the link state now because no events will be generated
599 	 */
600 	vmxnet3_refresh_linkstate(dp);
601 	mac_link_update(dp->mac, dp->linkState);
602 
603 	/*
604 	 * Finally, unmask the interrupt
605 	 */
606 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 0);
607 
608 	return (0);
609 
610 error_txhandle:
611 	ddi_dma_free_handle(&dp->txDmaHandle);
612 error_rx_queue:
613 	vmxnet3_destroy_rxqueue(dp);
614 error_tx_queue:
615 	vmxnet3_destroy_txqueue(dp);
616 error_shared_data:
617 	vmxnet3_destroy_drivershared(dp);
618 error:
619 	return (err);
620 }
621 
622 /*
623  * Stop a vmxnet3 device: send a stop command to the device and
624  * de-allocate the shared data structures.
625  */
626 static void
627 vmxnet3_stop(void *data)
628 {
629 	vmxnet3_softc_t *dp = data;
630 
631 	VMXNET3_DEBUG(dp, 1, "stop()\n");
632 
633 	/*
634 	 * Take the 2 locks related to asynchronous events.
635 	 * These events should always check dp->devEnabled before poking dp.
636 	 */
637 	mutex_enter(&dp->intrLock);
638 	mutex_enter(&dp->rxPoolLock);
639 	VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 1);
640 	dp->devEnabled = B_FALSE;
641 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV);
642 	mutex_exit(&dp->rxPoolLock);
643 	mutex_exit(&dp->intrLock);
644 
645 	ddi_dma_free_handle(&dp->txDmaHandle);
646 
647 	vmxnet3_destroy_rxqueue(dp);
648 	vmxnet3_destroy_txqueue(dp);
649 
650 	vmxnet3_destroy_drivershared(dp);
651 }
652 
653 /*
654  * Set or unset promiscuous mode on a vmxnet3 device.
655  */
656 static int
657 vmxnet3_setpromisc(void *data, boolean_t promisc)
658 {
659 	vmxnet3_softc_t *dp = data;
660 
661 	VMXNET3_DEBUG(dp, 2, "setpromisc(%s)\n", promisc ? "TRUE" : "FALSE");
662 
663 	if (promisc) {
664 		dp->rxMode |= VMXNET3_RXM_PROMISC;
665 	} else {
666 		dp->rxMode &= ~VMXNET3_RXM_PROMISC;
667 	}
668 
669 	vmxnet3_refresh_rxfilter(dp);
670 
671 	return (0);
672 }
673 
674 /*
675  * Add or remove a multicast address from/to a vmxnet3 device.
676  *
677  * Returns:
678  *	0 on success, non-zero on failure.
679  */
680 static int
681 vmxnet3_multicst(void *data, boolean_t add, const uint8_t *macaddr)
682 {
683 	vmxnet3_softc_t *dp = data;
684 	vmxnet3_dmabuf_t newMfTable;
685 	int ret = 0;
686 	uint16_t macIdx;
687 	size_t allocSize;
688 
689 	VMXNET3_DEBUG(dp, 2, "multicst(%s, "MACADDR_FMT")\n",
690 	    add ? "add" : "remove", MACADDR_FMT_ARGS(macaddr));
691 
692 	/*
693 	 * First lookup the position of the given MAC to check if it is
694 	 * present in the existing MF table.
695 	 */
696 	for (macIdx = 0; macIdx < dp->mfTable.bufLen; macIdx += 6) {
697 		if (memcmp(&dp->mfTable.buf[macIdx], macaddr, 6) == 0) {
698 			break;
699 		}
700 	}
701 
702 	/*
703 	 * Check for 2 situations we can handle gracefully by bailing out:
704 	 * Adding an already existing filter or removing a non-existing one.
705 	 */
706 	if (add && macIdx < dp->mfTable.bufLen) {
707 		VMXNET3_WARN(dp, MACADDR_FMT " already in MC filter list "
708 		    "@ %u\n", MACADDR_FMT_ARGS(macaddr), macIdx / 6);
709 		ASSERT(B_FALSE);
710 		goto done;
711 	}
712 	if (!add && macIdx == dp->mfTable.bufLen) {
713 		VMXNET3_WARN(dp, MACADDR_FMT " not in MC filter list @ %u\n",
714 		    MACADDR_FMT_ARGS(macaddr), macIdx / 6);
715 		ASSERT(B_FALSE);
716 		goto done;
717 	}
718 
719 	/*
720 	 * Create the new MF table
721 	 */
722 	allocSize = dp->mfTable.bufLen + (add ? 6 : -6);
723 	if (allocSize) {
724 		ret = vmxnet3_alloc_dma_mem_1(dp, &newMfTable, allocSize,
725 		    B_TRUE);
726 		ASSERT(ret == 0);
727 		if (add) {
728 			(void) memcpy(newMfTable.buf, dp->mfTable.buf,
729 			    dp->mfTable.bufLen);
730 			(void) memcpy(newMfTable.buf + dp->mfTable.bufLen,
731 			    macaddr, 6);
732 		} else {
733 			(void) memcpy(newMfTable.buf, dp->mfTable.buf,
734 			    macIdx);
735 			(void) memcpy(newMfTable.buf + macIdx,
736 			    dp->mfTable.buf + macIdx + 6,
737 			    dp->mfTable.bufLen - macIdx - 6);
738 		}
739 	} else {
740 		newMfTable.buf = NULL;
741 		newMfTable.bufPA = 0;
742 		newMfTable.bufLen = 0;
743 	}
744 
745 	/*
746 	 * Now handle 2 corner cases: if we're creating the first filter or
747 	 * removing the last one, we have to update rxMode accordingly.
748 	 */
749 	if (add && newMfTable.bufLen == 6) {
750 		ASSERT(!(dp->rxMode & VMXNET3_RXM_MCAST));
751 		dp->rxMode |= VMXNET3_RXM_MCAST;
752 		vmxnet3_refresh_rxfilter(dp);
753 	}
754 	if (!add && dp->mfTable.bufLen == 6) {
755 		ASSERT(newMfTable.buf == NULL);
756 		ASSERT(dp->rxMode & VMXNET3_RXM_MCAST);
757 		dp->rxMode &= ~VMXNET3_RXM_MCAST;
758 		vmxnet3_refresh_rxfilter(dp);
759 	}
760 
761 	/*
762 	 * Now replace the old MF table with the new one
763 	 */
764 	if (dp->mfTable.buf) {
765 		vmxnet3_free_dma_mem(&dp->mfTable);
766 	}
767 	dp->mfTable = newMfTable;
768 	VMXNET3_DS(dp)->devRead.rxFilterConf.mfTablePA = newMfTable.bufPA;
769 	VMXNET3_DS(dp)->devRead.rxFilterConf.mfTableLen = newMfTable.bufLen;
770 
771 done:
772 	/* Always update the filters */
773 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_UPDATE_MAC_FILTERS);
774 
775 	return (ret);
776 }
777 
778 /*
779  * Set the mac address of a vmxnet3 device.
780  *
781  * Returns:
782  *	0
783  */
784 static int
785 vmxnet3_unicst(void *data, const uint8_t *macaddr)
786 {
787 	vmxnet3_softc_t *dp = data;
788 	uint32_t val32;
789 
790 	VMXNET3_DEBUG(dp, 2, "unicst("MACADDR_FMT")\n",
791 	    MACADDR_FMT_ARGS(macaddr));
792 
793 	val32 = *((uint32_t *)(macaddr + 0));
794 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_MACL, val32);
795 	val32 = *((uint16_t *)(macaddr + 4));
796 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_MACH, val32);
797 
798 	(void) memcpy(dp->macaddr, macaddr, 6);
799 
800 	return (0);
801 }
802 
803 /*
804  * Change the MTU as seen by the driver. This is only supported when
805  * the mac is stopped.
806  *
807  * Returns:
808  *	EBUSY if the device is enabled.
809  *	EINVAL for invalid MTU values.
810  *	0 on success.
811  */
812 static int
813 vmxnet3_change_mtu(vmxnet3_softc_t *dp, uint32_t new_mtu)
814 {
815 	int ret;
816 
817 	if (dp->devEnabled)
818 		return (EBUSY);
819 
820 	if (new_mtu == dp->cur_mtu) {
821 		VMXNET3_WARN(dp, "New MTU is same as old mtu : %d.\n", new_mtu);
822 		return (0);
823 	}
824 
825 	if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU) {
826 		VMXNET3_WARN(dp, "New MTU not in valid range [%d, %d].\n",
827 		    VMXNET3_MIN_MTU, VMXNET3_MAX_MTU);
828 		return (EINVAL);
829 	} else if (new_mtu > ETHERMTU && !dp->allow_jumbo) {
830 		VMXNET3_WARN(dp, "MTU cannot be greater than %d because "
831 		    "accept-jumbo is not enabled.\n", ETHERMTU);
832 		return (EINVAL);
833 	}
834 
835 	dp->cur_mtu = new_mtu;
836 	dp->alloc_ok = VMXNET3_ALLOC_OK(dp);
837 
838 	if ((ret = mac_maxsdu_update(dp->mac, new_mtu)) != 0)
839 		VMXNET3_WARN(dp, "Unable to update mac with %d mtu: %d",
840 		    new_mtu, ret);
841 
842 	return (ret);
843 }
844 
845 /* ARGSUSED */
846 static int
847 vmxnet3_get_prop(void *data, const char *prop_name, mac_prop_id_t prop_id,
848     uint_t prop_val_size, void *prop_val)
849 {
850 	vmxnet3_softc_t *dp = data;
851 	int ret = 0;
852 
853 	switch (prop_id) {
854 	case MAC_PROP_MTU:
855 		ASSERT(prop_val_size >= sizeof (uint32_t));
856 		bcopy(&dp->cur_mtu, prop_val, sizeof (uint32_t));
857 		break;
858 	default:
859 		VMXNET3_WARN(dp, "vmxnet3_get_prop property %d not supported",
860 		    prop_id);
861 		ret = ENOTSUP;
862 	}
863 	return (ret);
864 }
865 
866 /* ARGSUSED */
867 static int
868 vmxnet3_set_prop(void *data, const char *prop_name, mac_prop_id_t prop_id,
869     uint_t prop_val_size, const void *prop_val)
870 {
871 	vmxnet3_softc_t *dp = data;
872 	int ret;
873 
874 	switch (prop_id) {
875 	case MAC_PROP_MTU: {
876 		uint32_t new_mtu;
877 		ASSERT(prop_val_size >= sizeof (uint32_t));
878 		bcopy(prop_val, &new_mtu, sizeof (new_mtu));
879 		ret = vmxnet3_change_mtu(dp, new_mtu);
880 		break;
881 	}
882 	default:
883 		VMXNET3_WARN(dp, "vmxnet3_set_prop property %d not supported",
884 		    prop_id);
885 		ret = ENOTSUP;
886 	}
887 
888 	return (ret);
889 }
890 
891 /* ARGSUSED */
892 static void
893 vmxnet3_prop_info(void *data, const char *prop_name, mac_prop_id_t prop_id,
894     mac_prop_info_handle_t prop_handle)
895 {
896 	vmxnet3_softc_t *dp = data;
897 
898 	switch (prop_id) {
899 	case MAC_PROP_MTU:
900 		mac_prop_info_set_range_uint32(prop_handle, VMXNET3_MIN_MTU,
901 		    VMXNET3_MAX_MTU);
902 		break;
903 	default:
904 		VMXNET3_WARN(dp, "vmxnet3_prop_info: property %d not supported",
905 		    prop_id);
906 	}
907 }
908 
909 /*
910  * DDI/DDK callback to handle IOCTL in driver. Currently it only handles
911  * ND_SET ioctl. Rest all are ignored. The ND_SET is used to set/reset
912  * accept-jumbo ndd parameted for the interface.
913  *
914  * Side effects:
915  *	MTU can be changed and device can be reset. An ACK or NACK is conveyed
916  *	to the calling function from the mblk which was used to call this
917  *	function.
918  */
919 static void
920 vmxnet3_ioctl(void *arg, queue_t *wq, mblk_t *mp)
921 {
922 	vmxnet3_softc_t *dp = arg;
923 	int ret = EINVAL;
924 	IOCP iocp;
925 	mblk_t *mp1;
926 	char *valp, *param;
927 	int data;
928 
929 	iocp = (void *)mp->b_rptr;
930 	iocp->ioc_error = 0;
931 
932 	switch (iocp->ioc_cmd) {
933 	case ND_SET:
934 		/*
935 		 * The mblk in continuation would contain the ndd parameter name
936 		 * and data value to be set
937 		 */
938 		mp1 = mp->b_cont;
939 		if (!mp1) {
940 			VMXNET3_WARN(dp, "Error locating parameter name.\n");
941 			ret = EINVAL;
942 			break;
943 		}
944 
945 		/* Force null termination */
946 		mp1->b_datap->db_lim[-1] = '\0';
947 
948 		/*
949 		 * From /usr/src/uts/common/inet/nd.c : nd_getset()
950 		 * "logic throughout nd_xxx assumes single data block for ioctl.
951 		 *  However, existing code sends in some big buffers."
952 		 */
953 		if (mp1->b_cont) {
954 			freemsg(mp1->b_cont);
955 			mp1->b_cont = NULL;
956 		}
957 
958 		valp = (char *)mp1->b_rptr;	/* Points to param name */
959 		ASSERT(valp);
960 		param = valp;
961 		VMXNET3_DEBUG(dp, 3, "ND Set ioctl for %s\n", param);
962 
963 		/*
964 		 * Go past the end of this null terminated string to get the
965 		 * data value.
966 		 */
967 		while (*valp && valp <= (char *)mp1->b_wptr)
968 			valp++;
969 
970 		if (valp > (char *)mp1->b_wptr) {
971 			/*
972 			 * We are already beyond the readable area of mblk and
973 			 * still haven't found the end of param string.
974 			 */
975 			VMXNET3_WARN(dp,
976 			    "No data value found to be set to param\n");
977 			data = -1;
978 		} else {
979 			/* Now this points to data string */
980 			valp++;
981 			/* Get numeric value of first letter */
982 			data = (int)*valp - (int)'0';
983 		}
984 
985 		if (strcmp("accept-jumbo", param) == 0) {
986 			if (data == 1) {
987 				VMXNET3_DEBUG(dp, 2,
988 				    "Accepting jumbo frames\n");
989 				dp->allow_jumbo = B_TRUE;
990 				ret = vmxnet3_change_mtu(dp, VMXNET3_MAX_MTU);
991 			} else if (data == 0) {
992 				VMXNET3_DEBUG(dp, 2,
993 				    "Rejecting jumbo frames\n");
994 				dp->allow_jumbo = B_FALSE;
995 				ret = vmxnet3_change_mtu(dp, ETHERMTU);
996 			} else {
997 				VMXNET3_WARN(dp, "Invalid data value to be set,"
998 				    " use 0 or 1\n");
999 				ret = -1;
1000 			}
1001 		}
1002 		freemsg(mp1);
1003 		mp->b_cont = NULL;
1004 		break;
1005 
1006 	default:
1007 		if (mp->b_cont) {
1008 			freemsg(mp->b_cont);
1009 			mp->b_cont = NULL;
1010 		}
1011 		ret = -1;
1012 		break;
1013 	}
1014 
1015 	if (ret == 0)
1016 		miocack(wq, mp, 0, 0);
1017 	else
1018 		miocnak(wq, mp, 0, EINVAL);
1019 }
1020 
1021 /*
1022  * Get the capabilities of a vmxnet3 device.
1023  *
1024  * Returns:
1025  *	B_TRUE if the capability is supported, B_FALSE otherwise.
1026  */
1027 static boolean_t
1028 vmxnet3_getcapab(void *data, mac_capab_t capab, void *arg)
1029 {
1030 	vmxnet3_softc_t *dp = data;
1031 	boolean_t ret;
1032 
1033 	switch (capab) {
1034 	case MAC_CAPAB_HCKSUM: {
1035 		uint32_t *txflags = arg;
1036 		*txflags = HCKSUM_INET_PARTIAL;
1037 		ret = B_TRUE;
1038 		break;
1039 	}
1040 	case MAC_CAPAB_LSO: {
1041 		mac_capab_lso_t *lso = arg;
1042 		lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1043 		lso->lso_basic_tcp_ipv4.lso_max = IP_MAXPACKET;
1044 		ret = vmxnet3_getprop(dp, "EnableLSO", 0, 1, 1);
1045 		break;
1046 	}
1047 	default:
1048 		ret = B_FALSE;
1049 	}
1050 
1051 	VMXNET3_DEBUG(dp, 2, "getcapab(0x%x) -> %s\n", capab,
1052 	    ret ? "yes" : "no");
1053 
1054 	return (ret);
1055 }
1056 
1057 /*
1058  * Reset a vmxnet3 device. Only to be used when the device is wedged.
1059  *
1060  * Side effects:
1061  *	The device is reset.
1062  */
1063 static void
1064 vmxnet3_reset(void *data)
1065 {
1066 	int ret;
1067 
1068 	vmxnet3_softc_t *dp = data;
1069 
1070 	VMXNET3_DEBUG(dp, 1, "vmxnet3_reset()\n");
1071 
1072 	atomic_inc_32(&dp->reset_count);
1073 	vmxnet3_stop(dp);
1074 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
1075 	if ((ret = vmxnet3_start(dp)) != 0)
1076 		VMXNET3_WARN(dp, "failed to reset the device: %d", ret);
1077 }
1078 
1079 /*
1080  * Process pending events on a vmxnet3 device.
1081  *
1082  * Returns:
1083  *	B_TRUE if the link state changed, B_FALSE otherwise.
1084  */
1085 static boolean_t
1086 vmxnet3_intr_events(vmxnet3_softc_t *dp)
1087 {
1088 	Vmxnet3_DriverShared *ds = VMXNET3_DS(dp);
1089 	boolean_t linkStateChanged = B_FALSE;
1090 	uint32_t events = ds->ecr;
1091 
1092 	if (events) {
1093 		VMXNET3_DEBUG(dp, 2, "events(0x%x)\n", events);
1094 		if (events & (VMXNET3_ECR_RQERR | VMXNET3_ECR_TQERR)) {
1095 			Vmxnet3_TxQueueDesc *tqdesc = VMXNET3_TQDESC(dp);
1096 			Vmxnet3_RxQueueDesc *rqdesc = VMXNET3_RQDESC(dp);
1097 
1098 			VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD,
1099 			    VMXNET3_CMD_GET_QUEUE_STATUS);
1100 			if (tqdesc->status.stopped) {
1101 				VMXNET3_WARN(dp, "tq error 0x%x\n",
1102 				    tqdesc->status.error);
1103 			}
1104 			if (rqdesc->status.stopped) {
1105 				VMXNET3_WARN(dp, "rq error 0x%x\n",
1106 				    rqdesc->status.error);
1107 			}
1108 
1109 			if (ddi_taskq_dispatch(dp->resetTask, vmxnet3_reset,
1110 			    dp, DDI_NOSLEEP) == DDI_SUCCESS) {
1111 				VMXNET3_WARN(dp, "reset scheduled\n");
1112 			} else {
1113 				VMXNET3_WARN(dp,
1114 				    "ddi_taskq_dispatch() failed\n");
1115 			}
1116 		}
1117 		if (events & VMXNET3_ECR_LINK) {
1118 			vmxnet3_refresh_linkstate(dp);
1119 			linkStateChanged = B_TRUE;
1120 		}
1121 		if (events & VMXNET3_ECR_DIC) {
1122 			VMXNET3_DEBUG(dp, 1, "device implementation change\n");
1123 		}
1124 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_ECR, events);
1125 	}
1126 
1127 	return (linkStateChanged);
1128 }
1129 
1130 /*
1131  * Interrupt handler of a vmxnet3 device.
1132  *
1133  * Returns:
1134  *	DDI_INTR_CLAIMED or DDI_INTR_UNCLAIMED.
1135  */
1136 /* ARGSUSED1 */
1137 static uint_t
1138 vmxnet3_intr(caddr_t data1, caddr_t data2)
1139 {
1140 	vmxnet3_softc_t *dp = (void *) data1;
1141 
1142 	VMXNET3_DEBUG(dp, 3, "intr()\n");
1143 
1144 	mutex_enter(&dp->intrLock);
1145 
1146 	if (dp->devEnabled) {
1147 		boolean_t linkStateChanged;
1148 		boolean_t mustUpdateTx;
1149 		mblk_t *mps;
1150 
1151 		if (dp->intrType == DDI_INTR_TYPE_FIXED &&
1152 		    !VMXNET3_BAR1_GET32(dp, VMXNET3_REG_ICR)) {
1153 			goto intr_unclaimed;
1154 		}
1155 
1156 		if (dp->intrMaskMode == VMXNET3_IMM_ACTIVE) {
1157 			VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 1);
1158 		}
1159 
1160 		linkStateChanged = vmxnet3_intr_events(dp);
1161 		mustUpdateTx = vmxnet3_tx_complete(dp, &dp->txQueue);
1162 		mps = vmxnet3_rx_intr(dp, &dp->rxQueue);
1163 
1164 		mutex_exit(&dp->intrLock);
1165 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_IMR, 0);
1166 
1167 		if (linkStateChanged) {
1168 			mac_link_update(dp->mac, dp->linkState);
1169 		}
1170 		if (mustUpdateTx) {
1171 			mac_tx_update(dp->mac);
1172 		}
1173 		if (mps) {
1174 			mac_rx(dp->mac, NULL, mps);
1175 		}
1176 
1177 		return (DDI_INTR_CLAIMED);
1178 	}
1179 
1180 intr_unclaimed:
1181 	mutex_exit(&dp->intrLock);
1182 	return (DDI_INTR_UNCLAIMED);
1183 }
1184 
1185 static int
1186 vmxnet3_kstat_update(kstat_t *ksp, int rw)
1187 {
1188 	vmxnet3_softc_t *dp = ksp->ks_private;
1189 	vmxnet3_kstats_t *statp = ksp->ks_data;
1190 
1191 	if (rw == KSTAT_WRITE)
1192 		return (EACCES);
1193 
1194 	statp->reset_count.value.ul = dp->reset_count;
1195 	statp->tx_pullup_needed.value.ul = dp->tx_pullup_needed;
1196 	statp->tx_ring_full.value.ul = dp->tx_ring_full;
1197 	statp->rx_alloc_buf.value.ul = dp->rx_alloc_buf;
1198 	statp->rx_pool_empty.value.ul = dp->rx_pool_empty;
1199 	statp->rx_num_bufs.value.ul = dp->rx_num_bufs;
1200 
1201 	return (0);
1202 }
1203 
1204 static int
1205 vmxnet3_kstat_init(vmxnet3_softc_t *dp)
1206 {
1207 	vmxnet3_kstats_t *statp;
1208 
1209 	dp->devKstats = kstat_create(VMXNET3_MODNAME, dp->instance,
1210 	    "statistics", "dev",  KSTAT_TYPE_NAMED,
1211 	    sizeof (vmxnet3_kstats_t) / sizeof (kstat_named_t), 0);
1212 	if (dp->devKstats == NULL)
1213 		return (DDI_FAILURE);
1214 
1215 	dp->devKstats->ks_update = vmxnet3_kstat_update;
1216 	dp->devKstats->ks_private = dp;
1217 
1218 	statp = dp->devKstats->ks_data;
1219 
1220 	kstat_named_init(&statp->reset_count, "reset_count", KSTAT_DATA_ULONG);
1221 	kstat_named_init(&statp->tx_pullup_needed, "tx_pullup_needed",
1222 	    KSTAT_DATA_ULONG);
1223 	kstat_named_init(&statp->tx_ring_full, "tx_ring_full",
1224 	    KSTAT_DATA_ULONG);
1225 	kstat_named_init(&statp->rx_alloc_buf, "rx_alloc_buf",
1226 	    KSTAT_DATA_ULONG);
1227 	kstat_named_init(&statp->rx_pool_empty, "rx_pool_empty",
1228 	    KSTAT_DATA_ULONG);
1229 	kstat_named_init(&statp->rx_num_bufs, "rx_num_bufs",
1230 	    KSTAT_DATA_ULONG);
1231 
1232 	kstat_install(dp->devKstats);
1233 
1234 	return (DDI_SUCCESS);
1235 }
1236 
1237 /*
1238  * Probe and attach a vmxnet3 instance to the stack.
1239  *
1240  * Returns:
1241  *	DDI_SUCCESS or DDI_FAILURE.
1242  */
1243 static int
1244 vmxnet3_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1245 {
1246 	vmxnet3_softc_t *dp;
1247 	mac_register_t *macr;
1248 	uint16_t vendorId, devId, ret16;
1249 	uint32_t ret32;
1250 	int ret, err;
1251 	uint_t uret;
1252 
1253 	if (cmd != DDI_ATTACH) {
1254 		goto error;
1255 	}
1256 
1257 	/*
1258 	 * Allocate the soft state
1259 	 */
1260 	dp = kmem_zalloc(sizeof (vmxnet3_softc_t), KM_SLEEP);
1261 	ASSERT(dp);
1262 
1263 	dp->dip = dip;
1264 	dp->instance = ddi_get_instance(dip);
1265 	dp->cur_mtu = ETHERMTU;
1266 	dp->allow_jumbo = B_TRUE;
1267 	dp->alloc_ok = VMXNET3_ALLOC_OK(dp);
1268 
1269 	VMXNET3_DEBUG(dp, 1, "attach()\n");
1270 
1271 	ddi_set_driver_private(dip, dp);
1272 
1273 	/*
1274 	 * Get access to the PCI bus configuration space
1275 	 */
1276 	if (pci_config_setup(dip, &dp->pciHandle) != DDI_SUCCESS) {
1277 		VMXNET3_WARN(dp, "pci_config_setup() failed\n");
1278 		goto error_soft_state;
1279 	}
1280 
1281 	/*
1282 	 * Make sure the chip is a vmxnet3 device
1283 	 */
1284 	vendorId = pci_config_get16(dp->pciHandle, PCI_CONF_VENID);
1285 	devId = pci_config_get16(dp->pciHandle, PCI_CONF_DEVID);
1286 	if (vendorId != PCI_VENDOR_ID_VMWARE ||
1287 	    devId != PCI_DEVICE_ID_VMWARE_VMXNET3) {
1288 		VMXNET3_WARN(dp, "wrong PCI venid/devid (0x%x, 0x%x)\n",
1289 		    vendorId, devId);
1290 		goto error_pci_config;
1291 	}
1292 
1293 	/*
1294 	 * Make sure we can access the registers through the I/O space
1295 	 */
1296 	ret16 = pci_config_get16(dp->pciHandle, PCI_CONF_COMM);
1297 	ret16 |= PCI_COMM_IO | PCI_COMM_ME;
1298 	pci_config_put16(dp->pciHandle, PCI_CONF_COMM, ret16);
1299 
1300 	/*
1301 	 * Map the I/O space in memory
1302 	 */
1303 	if (ddi_regs_map_setup(dip, 1, &dp->bar0, 0, 0, &vmxnet3_dev_attr,
1304 	    &dp->bar0Handle) != DDI_SUCCESS) {
1305 		VMXNET3_WARN(dp, "ddi_regs_map_setup() for BAR0 failed\n");
1306 		goto error_pci_config;
1307 	}
1308 
1309 	if (ddi_regs_map_setup(dip, 2, &dp->bar1, 0, 0, &vmxnet3_dev_attr,
1310 	    &dp->bar1Handle) != DDI_SUCCESS) {
1311 		VMXNET3_WARN(dp, "ddi_regs_map_setup() for BAR1 failed\n");
1312 		goto error_regs_map_0;
1313 	}
1314 
1315 	/*
1316 	 * Check the version number of the virtual device
1317 	 */
1318 	if (VMXNET3_BAR1_GET32(dp, VMXNET3_REG_VRRS) & 1) {
1319 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_VRRS, 1);
1320 	} else {
1321 		VMXNET3_WARN(dp, "incompatible h/w version\n");
1322 		goto error_regs_map_1;
1323 	}
1324 
1325 	if (VMXNET3_BAR1_GET32(dp, VMXNET3_REG_UVRS) & 1) {
1326 		VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_UVRS, 1);
1327 	} else {
1328 		VMXNET3_WARN(dp, "incompatible upt version\n");
1329 		goto error_regs_map_1;
1330 	}
1331 
1332 	if (vmxnet3_kstat_init(dp) != DDI_SUCCESS) {
1333 		VMXNET3_WARN(dp, "unable to initialize kstats");
1334 		goto error_regs_map_1;
1335 	}
1336 
1337 	/*
1338 	 * Read the MAC address from the device
1339 	 */
1340 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_MACL);
1341 	*((uint32_t *)(dp->macaddr + 0)) = ret32;
1342 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_MACH);
1343 	*((uint16_t *)(dp->macaddr + 4)) = ret32;
1344 
1345 	/*
1346 	 * Register with the MAC framework
1347 	 */
1348 	if (!(macr = mac_alloc(MAC_VERSION))) {
1349 		VMXNET3_WARN(dp, "mac_alloc() failed\n");
1350 		goto error_kstat;
1351 	}
1352 
1353 	macr->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1354 	macr->m_driver = dp;
1355 	macr->m_dip = dip;
1356 	macr->m_instance = 0;
1357 	macr->m_src_addr = dp->macaddr;
1358 	macr->m_dst_addr = NULL;
1359 	macr->m_callbacks = &vmxnet3_mac_callbacks;
1360 	macr->m_min_sdu = 0;
1361 	macr->m_max_sdu = ETHERMTU;
1362 	macr->m_margin = VLAN_TAGSZ;
1363 	macr->m_pdata = NULL;
1364 	macr->m_pdata_size = 0;
1365 
1366 	ret = mac_register(macr, &dp->mac);
1367 	mac_free(macr);
1368 	if (ret != DDI_SUCCESS) {
1369 		VMXNET3_WARN(dp, "mac_register() failed\n");
1370 		goto error_kstat;
1371 	}
1372 
1373 	/*
1374 	 * Register the interrupt(s) in this order of preference:
1375 	 * MSI-X, MSI, INTx
1376 	 */
1377 	VMXNET3_BAR1_PUT32(dp, VMXNET3_REG_CMD, VMXNET3_CMD_GET_CONF_INTR);
1378 	ret32 = VMXNET3_BAR1_GET32(dp, VMXNET3_REG_CMD);
1379 	switch (ret32 & 0x3) {
1380 	case VMXNET3_IT_AUTO:
1381 	case VMXNET3_IT_MSIX:
1382 		dp->intrType = DDI_INTR_TYPE_MSIX;
1383 		err = ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1384 		    &ret, DDI_INTR_ALLOC_STRICT);
1385 		if (err == DDI_SUCCESS)
1386 			break;
1387 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_MSIX failed, err:%d\n",
1388 		    err);
1389 		/* FALLTHROUGH */
1390 	case VMXNET3_IT_MSI:
1391 		dp->intrType = DDI_INTR_TYPE_MSI;
1392 		if (ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1393 		    &ret, DDI_INTR_ALLOC_STRICT) == DDI_SUCCESS)
1394 			break;
1395 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_MSI failed\n");
1396 		/* FALLTHROUGH */
1397 	case VMXNET3_IT_INTX:
1398 		dp->intrType = DDI_INTR_TYPE_FIXED;
1399 		if (ddi_intr_alloc(dip, &dp->intrHandle, dp->intrType, 0, 1,
1400 		    &ret, DDI_INTR_ALLOC_STRICT) == DDI_SUCCESS) {
1401 			break;
1402 		}
1403 		VMXNET3_DEBUG(dp, 2, "DDI_INTR_TYPE_INTX failed\n");
1404 		/* FALLTHROUGH */
1405 	default:
1406 		VMXNET3_WARN(dp, "ddi_intr_alloc() failed\n");
1407 		goto error_mac;
1408 	}
1409 	dp->intrMaskMode = (ret32 >> 2) & 0x3;
1410 	if (dp->intrMaskMode == VMXNET3_IMM_LAZY) {
1411 		VMXNET3_WARN(dp, "Lazy masking is not supported\n");
1412 		goto error_intr;
1413 	}
1414 
1415 	if (ddi_intr_get_pri(dp->intrHandle, &uret) != DDI_SUCCESS) {
1416 		VMXNET3_WARN(dp, "ddi_intr_get_pri() failed\n");
1417 		goto error_intr;
1418 	}
1419 
1420 	VMXNET3_DEBUG(dp, 2, "intrType=0x%x, intrMaskMode=0x%x, intrPrio=%u\n",
1421 	    dp->intrType, dp->intrMaskMode, uret);
1422 
1423 	/*
1424 	 * Create a task queue to reset the device if it wedges.
1425 	 */
1426 	dp->resetTask = ddi_taskq_create(dip, "vmxnet3_reset_task", 1,
1427 	    TASKQ_DEFAULTPRI, 0);
1428 	if (!dp->resetTask) {
1429 		VMXNET3_WARN(dp, "ddi_taskq_create() failed()\n");
1430 		goto error_intr;
1431 	}
1432 
1433 	/*
1434 	 * Initialize our mutexes now that we know the interrupt priority
1435 	 * This _must_ be done before ddi_intr_enable()
1436 	 */
1437 	mutex_init(&dp->intrLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1438 	mutex_init(&dp->txLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1439 	mutex_init(&dp->rxPoolLock, NULL, MUTEX_DRIVER, DDI_INTR_PRI(uret));
1440 
1441 	if (ddi_intr_add_handler(dp->intrHandle, vmxnet3_intr,
1442 	    dp, NULL) != DDI_SUCCESS) {
1443 		VMXNET3_WARN(dp, "ddi_intr_add_handler() failed\n");
1444 		goto error_mutexes;
1445 	}
1446 
1447 	err = ddi_intr_get_cap(dp->intrHandle, &dp->intrCap);
1448 	if (err != DDI_SUCCESS) {
1449 		VMXNET3_WARN(dp, "ddi_intr_get_cap() failed %d", err);
1450 		goto error_intr_handler;
1451 	}
1452 
1453 	if (dp->intrCap & DDI_INTR_FLAG_BLOCK) {
1454 		err = ddi_intr_block_enable(&dp->intrHandle, 1);
1455 		if (err != DDI_SUCCESS) {
1456 			VMXNET3_WARN(dp, "ddi_intr_block_enable() failed, "
1457 			    "err:%d\n", err);
1458 			goto error_intr_handler;
1459 		}
1460 	} else {
1461 		err = ddi_intr_enable(dp->intrHandle);
1462 		if ((err != DDI_SUCCESS)) {
1463 			VMXNET3_WARN(dp, "ddi_intr_enable() failed, err:%d\n",
1464 			    err);
1465 			goto error_intr_handler;
1466 		}
1467 	}
1468 
1469 	return (DDI_SUCCESS);
1470 
1471 error_intr_handler:
1472 	(void) ddi_intr_remove_handler(dp->intrHandle);
1473 error_mutexes:
1474 	mutex_destroy(&dp->rxPoolLock);
1475 	mutex_destroy(&dp->txLock);
1476 	mutex_destroy(&dp->intrLock);
1477 	ddi_taskq_destroy(dp->resetTask);
1478 error_intr:
1479 	(void) ddi_intr_free(dp->intrHandle);
1480 error_mac:
1481 	(void) mac_unregister(dp->mac);
1482 error_kstat:
1483 	kstat_delete(dp->devKstats);
1484 error_regs_map_1:
1485 	ddi_regs_map_free(&dp->bar1Handle);
1486 error_regs_map_0:
1487 	ddi_regs_map_free(&dp->bar0Handle);
1488 error_pci_config:
1489 	pci_config_teardown(&dp->pciHandle);
1490 error_soft_state:
1491 	kmem_free(dp, sizeof (vmxnet3_softc_t));
1492 error:
1493 	return (DDI_FAILURE);
1494 }
1495 
1496 /*
1497  * Detach a vmxnet3 instance from the stack.
1498  *
1499  * Returns:
1500  *	DDI_SUCCESS or DDI_FAILURE.
1501  */
1502 static int
1503 vmxnet3_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1504 {
1505 	vmxnet3_softc_t *dp = ddi_get_driver_private(dip);
1506 	unsigned int retries = 0;
1507 	int ret;
1508 
1509 	VMXNET3_DEBUG(dp, 1, "detach()\n");
1510 
1511 	if (cmd != DDI_DETACH) {
1512 		return (DDI_FAILURE);
1513 	}
1514 
1515 	while (dp->rx_num_bufs > 0) {
1516 		if (retries++ < 10) {
1517 			VMXNET3_WARN(dp, "rx pending (%u), waiting 1 second\n",
1518 			    dp->rx_num_bufs);
1519 			delay(drv_usectohz(1000000));
1520 		} else {
1521 			VMXNET3_WARN(dp, "giving up\n");
1522 			return (DDI_FAILURE);
1523 		}
1524 	}
1525 
1526 	if (dp->intrCap & DDI_INTR_FLAG_BLOCK) {
1527 		ret = ddi_intr_block_disable(&dp->intrHandle, 1);
1528 	} else {
1529 		ret = ddi_intr_disable(dp->intrHandle);
1530 	}
1531 	if (ret != DDI_SUCCESS) {
1532 		VMXNET3_WARN(dp, "unable to disable interrupts");
1533 		return (DDI_FAILURE);
1534 	}
1535 	if (ddi_intr_remove_handler(dp->intrHandle) != DDI_SUCCESS) {
1536 		VMXNET3_WARN(dp, "unable to remove interrupt handler");
1537 		return (DDI_FAILURE);
1538 	}
1539 	(void) ddi_intr_free(dp->intrHandle);
1540 
1541 	VERIFY(mac_unregister(dp->mac) == 0);
1542 
1543 	kstat_delete(dp->devKstats);
1544 
1545 	if (dp->mfTable.buf) {
1546 		vmxnet3_free_dma_mem(&dp->mfTable);
1547 	}
1548 
1549 	mutex_destroy(&dp->rxPoolLock);
1550 	mutex_destroy(&dp->txLock);
1551 	mutex_destroy(&dp->intrLock);
1552 	ddi_taskq_destroy(dp->resetTask);
1553 
1554 	ddi_regs_map_free(&dp->bar1Handle);
1555 	ddi_regs_map_free(&dp->bar0Handle);
1556 	pci_config_teardown(&dp->pciHandle);
1557 
1558 	kmem_free(dp, sizeof (vmxnet3_softc_t));
1559 
1560 	return (DDI_SUCCESS);
1561 }
1562 
1563 /*
1564  * Structures used by the module loader
1565  */
1566 
1567 #define	VMXNET3_IDENT "VMware Ethernet v3 " VMXNET3_DRIVER_VERSION_STRING
1568 
1569 DDI_DEFINE_STREAM_OPS(
1570 	vmxnet3_dev_ops,
1571 	nulldev,
1572 	nulldev,
1573 	vmxnet3_attach,
1574 	vmxnet3_detach,
1575 	nodev,
1576 	NULL,
1577 	D_NEW | D_MP,
1578 	NULL,
1579 	ddi_quiesce_not_supported);
1580 
1581 static struct modldrv vmxnet3_modldrv = {
1582 	&mod_driverops,		/* drv_modops */
1583 	VMXNET3_IDENT,		/* drv_linkinfo */
1584 	&vmxnet3_dev_ops	/* drv_dev_ops */
1585 };
1586 
1587 static struct modlinkage vmxnet3_modlinkage = {
1588 	MODREV_1,			/* ml_rev */
1589 	{ &vmxnet3_modldrv, NULL }	/* ml_linkage */
1590 };
1591 
1592 /* Module load entry point */
1593 int
1594 _init(void)
1595 {
1596 	int ret;
1597 
1598 	mac_init_ops(&vmxnet3_dev_ops, VMXNET3_MODNAME);
1599 	ret = mod_install(&vmxnet3_modlinkage);
1600 	if (ret != DDI_SUCCESS) {
1601 		mac_fini_ops(&vmxnet3_dev_ops);
1602 	}
1603 
1604 	return (ret);
1605 }
1606 
1607 /* Module unload entry point */
1608 int
1609 _fini(void)
1610 {
1611 	int ret;
1612 
1613 	ret = mod_remove(&vmxnet3_modlinkage);
1614 	if (ret == DDI_SUCCESS) {
1615 		mac_fini_ops(&vmxnet3_dev_ops);
1616 	}
1617 
1618 	return (ret);
1619 }
1620 
1621 /* Module info entry point */
1622 int
1623 _info(struct modinfo *modinfop)
1624 {
1625 	return (mod_info(&vmxnet3_modlinkage, modinfop));
1626 }
1627 
1628 void
1629 vmxnet3_log(int level, vmxnet3_softc_t *dp, char *fmt, ...)
1630 {
1631 	dev_err(dp->dip, level, fmt);
1632 }
1633