xref: /titanic_50/usr/src/uts/common/io/vnic/vnic_dev.c (revision 56b2bdd1f04d465cfe4a95b88ae5cba5884154e4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/cred.h>
27 #include <sys/sysmacros.h>
28 #include <sys/conf.h>
29 #include <sys/cmn_err.h>
30 #include <sys/list.h>
31 #include <sys/ksynch.h>
32 #include <sys/kmem.h>
33 #include <sys/stream.h>
34 #include <sys/modctl.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/atomic.h>
38 #include <sys/stat.h>
39 #include <sys/modhash.h>
40 #include <sys/strsubr.h>
41 #include <sys/strsun.h>
42 #include <sys/dlpi.h>
43 #include <sys/mac.h>
44 #include <sys/mac_provider.h>
45 #include <sys/mac_client.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/mac_ether.h>
48 #include <sys/dls.h>
49 #include <sys/pattr.h>
50 #include <sys/time.h>
51 #include <sys/vlan.h>
52 #include <sys/vnic.h>
53 #include <sys/vnic_impl.h>
54 #include <sys/mac_flow_impl.h>
55 #include <inet/ip_impl.h>
56 
57 /*
58  * Note that for best performance, the VNIC is a passthrough design.
59  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
60  * This MAC client is opened by the VNIC driver at VNIC creation,
61  * and closed when the VNIC is deleted.
62  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
63  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
64  * of allocating a new MAC client, it asks the VNIC driver to return
65  * the lower MAC client handle associated with the VNIC, and that handle
66  * is returned to the upper MAC client directly. This allows access
67  * by upper MAC clients of the VNIC to have direct access to the lower
68  * MAC client for the control path and data path.
69  *
70  * Due to this passthrough, some of the entry points exported by the
71  * VNIC driver are never directly invoked. These entry points include
72  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
73  */
74 
75 static int vnic_m_start(void *);
76 static void vnic_m_stop(void *);
77 static int vnic_m_promisc(void *, boolean_t);
78 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
79 static int vnic_m_unicst(void *, const uint8_t *);
80 static int vnic_m_stat(void *, uint_t, uint64_t *);
81 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
82 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
83     const void *);
84 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
85     mac_prop_info_handle_t);
86 static mblk_t *vnic_m_tx(void *, mblk_t *);
87 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
88 static void vnic_notify_cb(void *, mac_notify_type_t);
89 
90 static kmem_cache_t	*vnic_cache;
91 static krwlock_t	vnic_lock;
92 static uint_t		vnic_count;
93 
94 #define	ANCHOR_VNIC_MIN_MTU	576
95 #define	ANCHOR_VNIC_MAX_MTU	9000
96 
97 /* hash of VNICs (vnic_t's), keyed by VNIC id */
98 static mod_hash_t	*vnic_hash;
99 #define	VNIC_HASHSZ	64
100 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
101 
102 #define	VNIC_M_CALLBACK_FLAGS	\
103 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
104 
105 static mac_callbacks_t vnic_m_callbacks = {
106 	VNIC_M_CALLBACK_FLAGS,
107 	vnic_m_stat,
108 	vnic_m_start,
109 	vnic_m_stop,
110 	vnic_m_promisc,
111 	vnic_m_multicst,
112 	vnic_m_unicst,
113 	vnic_m_tx,
114 	NULL,
115 	vnic_m_ioctl,
116 	vnic_m_capab_get,
117 	NULL,
118 	NULL,
119 	vnic_m_setprop,
120 	NULL,
121 	vnic_m_propinfo
122 };
123 
124 void
125 vnic_dev_init(void)
126 {
127 	vnic_cache = kmem_cache_create("vnic_cache",
128 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
129 
130 	vnic_hash = mod_hash_create_idhash("vnic_hash",
131 	    VNIC_HASHSZ, mod_hash_null_valdtor);
132 
133 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
134 
135 	vnic_count = 0;
136 }
137 
138 void
139 vnic_dev_fini(void)
140 {
141 	ASSERT(vnic_count == 0);
142 
143 	rw_destroy(&vnic_lock);
144 	mod_hash_destroy_idhash(vnic_hash);
145 	kmem_cache_destroy(vnic_cache);
146 }
147 
148 uint_t
149 vnic_dev_count(void)
150 {
151 	return (vnic_count);
152 }
153 
154 static vnic_ioc_diag_t
155 vnic_mac2vnic_diag(mac_diag_t diag)
156 {
157 	switch (diag) {
158 	case MAC_DIAG_MACADDR_NIC:
159 		return (VNIC_IOC_DIAG_MACADDR_NIC);
160 	case MAC_DIAG_MACADDR_INUSE:
161 		return (VNIC_IOC_DIAG_MACADDR_INUSE);
162 	case MAC_DIAG_MACADDR_INVALID:
163 		return (VNIC_IOC_DIAG_MACADDR_INVALID);
164 	case MAC_DIAG_MACADDRLEN_INVALID:
165 		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
166 	case MAC_DIAG_MACFACTORYSLOTINVALID:
167 		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
168 	case MAC_DIAG_MACFACTORYSLOTUSED:
169 		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
170 	case MAC_DIAG_MACFACTORYSLOTALLUSED:
171 		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
172 	case MAC_DIAG_MACFACTORYNOTSUP:
173 		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
174 	case MAC_DIAG_MACPREFIX_INVALID:
175 		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
176 	case MAC_DIAG_MACPREFIXLEN_INVALID:
177 		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
178 	case MAC_DIAG_MACNO_HWRINGS:
179 		return (VNIC_IOC_DIAG_NO_HWRINGS);
180 	default:
181 		return (VNIC_IOC_DIAG_NONE);
182 	}
183 }
184 
185 static int
186 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
187     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
188     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
189     uint16_t vid, boolean_t req_hwgrp_flag)
190 {
191 	mac_diag_t mac_diag;
192 	uint16_t mac_flags = 0;
193 	int err;
194 	uint_t addr_len;
195 
196 	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
197 		mac_flags |= MAC_UNICAST_NODUPCHECK;
198 
199 	switch (vnic_addr_type) {
200 	case VNIC_MAC_ADDR_TYPE_FIXED:
201 	case VNIC_MAC_ADDR_TYPE_VRID:
202 		/*
203 		 * The MAC address value to assign to the VNIC
204 		 * is already provided in mac_addr_arg. addr_len_ptr_arg
205 		 * already contains the MAC address length.
206 		 */
207 		break;
208 
209 	case VNIC_MAC_ADDR_TYPE_RANDOM:
210 		/*
211 		 * Random MAC address. There are two sub-cases:
212 		 *
213 		 * 1 - If mac_len == 0, a new MAC address is generated.
214 		 *	The length of the MAC address to generated depends
215 		 *	on the type of MAC used. The prefix to use for the MAC
216 		 *	address is stored in the most significant bytes
217 		 *	of the mac_addr argument, and its length is specified
218 		 *	by the mac_prefix_len argument. This prefix can
219 		 *	correspond to a IEEE OUI in the case of Ethernet,
220 		 *	for example.
221 		 *
222 		 * 2 - If mac_len > 0, the address was already picked
223 		 *	randomly, and is now passed back during VNIC
224 		 *	re-creation. The mac_addr argument contains the MAC
225 		 *	address that was generated. We distinguish this
226 		 *	case from the fixed MAC address case, since we
227 		 *	want the user consumers to know, when they query
228 		 *	the list of VNICs, that a VNIC was assigned a
229 		 *	random MAC address vs assigned a fixed address
230 		 *	specified by the user.
231 		 */
232 
233 		/*
234 		 * If it's a pre-generated address, we're done. mac_addr_arg
235 		 * and addr_len_ptr_arg already contain the MAC address
236 		 * value and length.
237 		 */
238 		if (*addr_len_ptr_arg > 0)
239 			break;
240 
241 		/* generate a new random MAC address */
242 		if ((err = mac_addr_random(vnic->vn_mch,
243 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
244 			*diag = vnic_mac2vnic_diag(mac_diag);
245 			return (err);
246 		}
247 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
248 		break;
249 
250 	case VNIC_MAC_ADDR_TYPE_FACTORY:
251 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
252 		if (err != 0) {
253 			if (err == EINVAL)
254 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
255 			if (err == EBUSY)
256 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
257 			if (err == ENOSPC)
258 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
259 			return (err);
260 		}
261 
262 		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
263 		    mac_addr_arg, &addr_len, NULL, NULL);
264 		*addr_len_ptr_arg = addr_len;
265 		break;
266 
267 	case VNIC_MAC_ADDR_TYPE_AUTO:
268 		/* first try to allocate a factory MAC address */
269 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
270 		if (err == 0) {
271 			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
272 			    mac_addr_arg, &addr_len, NULL, NULL);
273 			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
274 			*addr_len_ptr_arg = addr_len;
275 			break;
276 		}
277 
278 		/*
279 		 * Allocating a factory MAC address failed, generate a
280 		 * random MAC address instead.
281 		 */
282 		if ((err = mac_addr_random(vnic->vn_mch,
283 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
284 			*diag = vnic_mac2vnic_diag(mac_diag);
285 			return (err);
286 		}
287 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
288 		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
289 		break;
290 	case VNIC_MAC_ADDR_TYPE_PRIMARY:
291 		/*
292 		 * We get the address here since we copy it in the
293 		 * vnic's vn_addr.
294 		 * We can't ask for hardware resources since we
295 		 * don't currently support hardware classification
296 		 * for these MAC clients.
297 		 */
298 		if (req_hwgrp_flag) {
299 			*diag = VNIC_IOC_DIAG_NO_HWRINGS;
300 			return (ENOTSUP);
301 		}
302 		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
303 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
304 		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
305 		break;
306 	}
307 
308 	vnic->vn_addr_type = vnic_addr_type;
309 
310 	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
311 	    &vnic->vn_muh, vid, &mac_diag);
312 	if (err != 0) {
313 		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
314 			/* release factory MAC address */
315 			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
316 		}
317 		*diag = vnic_mac2vnic_diag(mac_diag);
318 	}
319 
320 	return (err);
321 }
322 
323 /*
324  * Create a new VNIC upon request from administrator.
325  * Returns 0 on success, an errno on failure.
326  */
327 /* ARGSUSED */
328 int
329 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
330     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
331     int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
332     int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
333     cred_t *credp)
334 {
335 	vnic_t *vnic;
336 	mac_register_t *mac;
337 	int err;
338 	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
339 	char vnic_name[MAXNAMELEN];
340 	const mac_info_t *minfop;
341 	uint32_t req_hwgrp_flag = B_FALSE;
342 
343 	*diag = VNIC_IOC_DIAG_NONE;
344 
345 	rw_enter(&vnic_lock, RW_WRITER);
346 
347 	/* does a VNIC with the same id already exist? */
348 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
349 	    (mod_hash_val_t *)&vnic);
350 	if (err == 0) {
351 		rw_exit(&vnic_lock);
352 		return (EEXIST);
353 	}
354 
355 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
356 	if (vnic == NULL) {
357 		rw_exit(&vnic_lock);
358 		return (ENOMEM);
359 	}
360 
361 	bzero(vnic, sizeof (*vnic));
362 
363 	vnic->vn_id = vnic_id;
364 	vnic->vn_link_id = linkid;
365 	vnic->vn_vrid = vrid;
366 	vnic->vn_af = af;
367 
368 	if (!is_anchor) {
369 		if (linkid == DATALINK_INVALID_LINKID) {
370 			err = EINVAL;
371 			goto bail;
372 		}
373 
374 		/*
375 		 * Open the lower MAC and assign its initial bandwidth and
376 		 * MAC address. We do this here during VNIC creation and
377 		 * do not wait until the upper MAC client open so that we
378 		 * can validate the VNIC creation parameters (bandwidth,
379 		 * MAC address, etc) and reserve a factory MAC address if
380 		 * one was requested.
381 		 */
382 		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
383 		if (err != 0)
384 			goto bail;
385 
386 		/*
387 		 * VNIC(vlan) over VNICs(vlans) is not supported.
388 		 */
389 		if (mac_is_vnic(vnic->vn_lower_mh)) {
390 			err = EINVAL;
391 			goto bail;
392 		}
393 
394 		/* only ethernet support for now */
395 		minfop = mac_info(vnic->vn_lower_mh);
396 		if (minfop->mi_nativemedia != DL_ETHER) {
397 			err = ENOTSUP;
398 			goto bail;
399 		}
400 
401 		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
402 		    NULL);
403 		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
404 		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
405 		if (err != 0)
406 			goto bail;
407 
408 		/* assign a MAC address to the VNIC */
409 
410 		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
411 		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
412 		    req_hwgrp_flag);
413 		if (err != 0) {
414 			vnic->vn_muh = NULL;
415 			if (diag != NULL && req_hwgrp_flag)
416 				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
417 			goto bail;
418 		}
419 
420 		/* register to receive notification from underlying MAC */
421 		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
422 		    vnic);
423 
424 		*vnic_addr_type = vnic->vn_addr_type;
425 		vnic->vn_addr_len = *mac_len;
426 		vnic->vn_vid = vid;
427 
428 		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
429 
430 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
431 			vnic->vn_slot_id = *mac_slot;
432 
433 		/*
434 		 * Set the initial VNIC capabilities. If the VNIC is created
435 		 * over MACs which does not support nactive vlan, disable
436 		 * VNIC's hardware checksum capability if its VID is not 0,
437 		 * since the underlying MAC would get the hardware checksum
438 		 * offset wrong in case of VLAN packets.
439 		 */
440 		if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
441 		    MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
442 			if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
443 			    &vnic->vn_hcksum_txflags))
444 				vnic->vn_hcksum_txflags = 0;
445 		} else {
446 			vnic->vn_hcksum_txflags = 0;
447 		}
448 	}
449 
450 	/* register with the MAC module */
451 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
452 		goto bail;
453 
454 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
455 	mac->m_driver = vnic;
456 	mac->m_dip = vnic_get_dip();
457 	mac->m_instance = (uint_t)-1;
458 	mac->m_src_addr = vnic->vn_addr;
459 	mac->m_callbacks = &vnic_m_callbacks;
460 
461 	if (!is_anchor) {
462 		/*
463 		 * If this is a VNIC based VLAN, then we check for the
464 		 * margin unless it has been created with the force
465 		 * flag. If we are configuring a VLAN over an etherstub,
466 		 * we don't check the margin even if force is not set.
467 		 */
468 		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
469 			if (vid != VLAN_ID_NONE)
470 				vnic->vn_force = B_TRUE;
471 			/*
472 			 * As the current margin size of the underlying mac is
473 			 * used to determine the margin size of the VNIC
474 			 * itself, request the underlying mac not to change
475 			 * to a smaller margin size.
476 			 */
477 			err = mac_margin_add(vnic->vn_lower_mh,
478 			    &vnic->vn_margin, B_TRUE);
479 			ASSERT(err == 0);
480 		} else {
481 			vnic->vn_margin = VLAN_TAGSZ;
482 			err = mac_margin_add(vnic->vn_lower_mh,
483 			    &vnic->vn_margin, B_FALSE);
484 			if (err != 0) {
485 				mac_free(mac);
486 				if (diag != NULL)
487 					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
488 				goto bail;
489 			}
490 		}
491 
492 		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
493 		    &mac->m_max_sdu);
494 	} else {
495 		vnic->vn_margin = VLAN_TAGSZ;
496 		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
497 		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
498 	}
499 
500 	mac->m_margin = vnic->vn_margin;
501 
502 	err = mac_register(mac, &vnic->vn_mh);
503 	mac_free(mac);
504 	if (err != 0) {
505 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
506 		    vnic->vn_margin) == 0);
507 		goto bail;
508 	}
509 
510 	/* Set the VNIC's MAC in the client */
511 	if (!is_anchor) {
512 		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
513 
514 		if (mrp != NULL) {
515 			if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
516 			    (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
517 				req_hwgrp_flag = B_TRUE;
518 			}
519 			err = mac_client_set_resources(vnic->vn_mch, mrp);
520 			if (err != 0) {
521 				(void) mac_unregister(vnic->vn_mh);
522 				goto bail;
523 			}
524 		}
525 	}
526 
527 	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
528 	if (err != 0) {
529 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
530 		    vnic->vn_margin) == 0);
531 		(void) mac_unregister(vnic->vn_mh);
532 		goto bail;
533 	}
534 
535 	/* add new VNIC to hash table */
536 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
537 	    (mod_hash_val_t)vnic);
538 	ASSERT(err == 0);
539 	vnic_count++;
540 
541 	vnic->vn_enabled = B_TRUE;
542 	rw_exit(&vnic_lock);
543 
544 	return (0);
545 
546 bail:
547 	rw_exit(&vnic_lock);
548 	if (!is_anchor) {
549 		if (vnic->vn_mnh != NULL)
550 			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
551 		if (vnic->vn_muh != NULL)
552 			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
553 		if (vnic->vn_mch != NULL)
554 			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
555 		if (vnic->vn_lower_mh != NULL)
556 			mac_close(vnic->vn_lower_mh);
557 	}
558 
559 	kmem_cache_free(vnic_cache, vnic);
560 	return (err);
561 }
562 
563 /*
564  * Modify the properties of an existing VNIC.
565  */
566 /* ARGSUSED */
567 int
568 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
569     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
570     uint_t mac_slot, mac_resource_props_t *mrp)
571 {
572 	vnic_t *vnic = NULL;
573 
574 	rw_enter(&vnic_lock, RW_WRITER);
575 
576 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
577 	    (mod_hash_val_t *)&vnic) != 0) {
578 		rw_exit(&vnic_lock);
579 		return (ENOENT);
580 	}
581 
582 	rw_exit(&vnic_lock);
583 
584 	return (0);
585 }
586 
587 /* ARGSUSED */
588 int
589 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
590 {
591 	vnic_t *vnic = NULL;
592 	mod_hash_val_t val;
593 	datalink_id_t tmpid;
594 	int rc;
595 
596 	rw_enter(&vnic_lock, RW_WRITER);
597 
598 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
599 	    (mod_hash_val_t *)&vnic) != 0) {
600 		rw_exit(&vnic_lock);
601 		return (ENOENT);
602 	}
603 
604 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
605 		rw_exit(&vnic_lock);
606 		return (rc);
607 	}
608 
609 	ASSERT(vnic_id == tmpid);
610 
611 	/*
612 	 * We cannot unregister the MAC yet. Unregistering would
613 	 * free up mac_impl_t which should not happen at this time.
614 	 * So disable mac_impl_t by calling mac_disable(). This will prevent
615 	 * any new claims on mac_impl_t.
616 	 */
617 	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
618 		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
619 		    crgetzoneid(credp));
620 		rw_exit(&vnic_lock);
621 		return (rc);
622 	}
623 
624 	vnic->vn_enabled = B_FALSE;
625 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
626 	ASSERT(vnic == (vnic_t *)val);
627 	vnic_count--;
628 	rw_exit(&vnic_lock);
629 
630 	/*
631 	 * XXX-nicolas shouldn't have a void cast here, if it's
632 	 * expected that the function will never fail, then we should
633 	 * have an ASSERT().
634 	 */
635 	(void) mac_unregister(vnic->vn_mh);
636 
637 	if (vnic->vn_lower_mh != NULL) {
638 		/*
639 		 * Check if MAC address for the vnic was obtained from the
640 		 * factory MAC addresses. If yes, release it.
641 		 */
642 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
643 			(void) mac_addr_factory_release(vnic->vn_mch,
644 			    vnic->vn_slot_id);
645 		}
646 		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
647 		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
648 		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
649 		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
650 		mac_close(vnic->vn_lower_mh);
651 	}
652 
653 	kmem_cache_free(vnic_cache, vnic);
654 	return (0);
655 }
656 
657 /* ARGSUSED */
658 mblk_t *
659 vnic_m_tx(void *arg, mblk_t *mp_chain)
660 {
661 	/*
662 	 * This function could be invoked for an anchor VNIC when sending
663 	 * broadcast and multicast packets, and unicast packets which did
664 	 * not match any local known destination.
665 	 */
666 	freemsgchain(mp_chain);
667 	return (NULL);
668 }
669 
670 /*ARGSUSED*/
671 static void
672 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
673 {
674 	miocnak(q, mp, 0, ENOTSUP);
675 }
676 
677 /*
678  * This entry point cannot be passed-through, since it is invoked
679  * for the per-VNIC kstats which must be exported independently
680  * of the existence of VNIC MAC clients.
681  */
682 static int
683 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
684 {
685 	vnic_t *vnic = arg;
686 	int rval = 0;
687 
688 	if (vnic->vn_lower_mh == NULL) {
689 		/*
690 		 * It's an anchor VNIC, which does not have any
691 		 * statistics in itself.
692 		 */
693 		return (ENOTSUP);
694 	}
695 
696 	/*
697 	 * ENOTSUP must be reported for unsupported stats, the VNIC
698 	 * driver reports a subset of the stats that would
699 	 * be returned by a real piece of hardware.
700 	 */
701 
702 	switch (stat) {
703 	case MAC_STAT_LINK_STATE:
704 	case MAC_STAT_LINK_UP:
705 	case MAC_STAT_PROMISC:
706 	case MAC_STAT_IFSPEED:
707 	case MAC_STAT_MULTIRCV:
708 	case MAC_STAT_MULTIXMT:
709 	case MAC_STAT_BRDCSTRCV:
710 	case MAC_STAT_BRDCSTXMT:
711 	case MAC_STAT_OPACKETS:
712 	case MAC_STAT_OBYTES:
713 	case MAC_STAT_IERRORS:
714 	case MAC_STAT_OERRORS:
715 	case MAC_STAT_RBYTES:
716 	case MAC_STAT_IPACKETS:
717 		*val = mac_client_stat_get(vnic->vn_mch, stat);
718 		break;
719 	default:
720 		rval = ENOTSUP;
721 	}
722 
723 	return (rval);
724 }
725 
726 /*
727  * Invoked by the upper MAC to retrieve the lower MAC client handle
728  * corresponding to a VNIC. A pointer to this function is obtained
729  * by the upper MAC via capability query.
730  *
731  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
732  * receive the same MAC client handle for the same VNIC. This is ok
733  * as long as we have only one VNIC MAC client which sends and
734  * receives data, but we don't currently enforce this at the MAC layer.
735  */
736 static void *
737 vnic_mac_client_handle(void *vnic_arg)
738 {
739 	vnic_t *vnic = vnic_arg;
740 
741 	return (vnic->vn_mch);
742 }
743 
744 
745 /*
746  * Return information about the specified capability.
747  */
748 /* ARGSUSED */
749 static boolean_t
750 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
751 {
752 	vnic_t *vnic = arg;
753 
754 	switch (cap) {
755 	case MAC_CAPAB_HCKSUM: {
756 		uint32_t *hcksum_txflags = cap_data;
757 
758 		*hcksum_txflags = vnic->vn_hcksum_txflags &
759 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
760 		    HCKSUM_INET_PARTIAL);
761 		break;
762 	}
763 	case MAC_CAPAB_VNIC: {
764 		mac_capab_vnic_t *vnic_capab = cap_data;
765 
766 		if (vnic->vn_lower_mh == NULL) {
767 			/*
768 			 * It's an anchor VNIC, we don't have an underlying
769 			 * NIC and MAC client handle.
770 			 */
771 			return (B_FALSE);
772 		}
773 
774 		if (vnic_capab != NULL) {
775 			vnic_capab->mcv_arg = vnic;
776 			vnic_capab->mcv_mac_client_handle =
777 			    vnic_mac_client_handle;
778 		}
779 		break;
780 	}
781 	case MAC_CAPAB_ANCHOR_VNIC: {
782 		/* since it's an anchor VNIC we don't have lower mac handle */
783 		if (vnic->vn_lower_mh == NULL) {
784 			ASSERT(vnic->vn_link_id == 0);
785 			return (B_TRUE);
786 		}
787 		return (B_FALSE);
788 	}
789 	case MAC_CAPAB_NO_NATIVEVLAN:
790 		return (B_FALSE);
791 	case MAC_CAPAB_NO_ZCOPY:
792 		return (B_TRUE);
793 	case MAC_CAPAB_VRRP: {
794 		mac_capab_vrrp_t *vrrp_capab = cap_data;
795 
796 		if (vnic->vn_vrid != 0) {
797 			if (vrrp_capab != NULL)
798 				vrrp_capab->mcv_af = vnic->vn_af;
799 			return (B_TRUE);
800 		}
801 		return (B_FALSE);
802 	}
803 	default:
804 		return (B_FALSE);
805 	}
806 	return (B_TRUE);
807 }
808 
809 /* ARGSUSED */
810 static int
811 vnic_m_start(void *arg)
812 {
813 	return (0);
814 }
815 
816 /* ARGSUSED */
817 static void
818 vnic_m_stop(void *arg)
819 {
820 }
821 
822 /* ARGSUSED */
823 static int
824 vnic_m_promisc(void *arg, boolean_t on)
825 {
826 	return (0);
827 }
828 
829 /* ARGSUSED */
830 static int
831 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
832 {
833 	return (0);
834 }
835 
836 static int
837 vnic_m_unicst(void *arg, const uint8_t *macaddr)
838 {
839 	vnic_t *vnic = arg;
840 
841 	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
842 }
843 
844 /*
845  * Callback functions for set/get of properties
846  */
847 /*ARGSUSED*/
848 static int
849 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
850     uint_t pr_valsize, const void *pr_val)
851 {
852 	int 		err = ENOTSUP;
853 	vnic_t		*vn = m_driver;
854 
855 	/* allow setting MTU only on an etherstub */
856 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
857 		return (err);
858 
859 	switch (pr_num) {
860 	case MAC_PROP_MTU: {
861 		uint32_t	mtu;
862 
863 		if (pr_valsize < sizeof (mtu)) {
864 			err = EINVAL;
865 			break;
866 		}
867 		bcopy(pr_val, &mtu, sizeof (mtu));
868 		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
869 			err = EINVAL;
870 			break;
871 		}
872 		err = mac_maxsdu_update(vn->vn_mh, mtu);
873 		break;
874 	}
875 	default:
876 		break;
877 	}
878 	return (err);
879 }
880 
881 /* ARGSUSED */
882 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
883     mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
884 {
885 	vnic_t		*vn = m_driver;
886 
887 	/* MTU setting allowed only on an etherstub */
888 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
889 		return;
890 
891 	switch (pr_num) {
892 	case MAC_PROP_MTU:
893 		mac_prop_info_set_range_uint32(prh,
894 		    ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
895 		break;
896 	}
897 }
898 
899 
900 int
901 vnic_info(vnic_info_t *info, cred_t *credp)
902 {
903 	vnic_t		*vnic;
904 	int		err;
905 
906 	/* Make sure that the VNIC link is visible from the caller's zone. */
907 	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
908 		return (ENOENT);
909 
910 	rw_enter(&vnic_lock, RW_WRITER);
911 
912 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
913 	    (mod_hash_val_t *)&vnic);
914 	if (err != 0) {
915 		rw_exit(&vnic_lock);
916 		return (ENOENT);
917 	}
918 
919 	info->vn_link_id = vnic->vn_link_id;
920 	info->vn_mac_addr_type = vnic->vn_addr_type;
921 	info->vn_mac_len = vnic->vn_addr_len;
922 	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
923 	info->vn_mac_slot = vnic->vn_slot_id;
924 	info->vn_mac_prefix_len = 0;
925 	info->vn_vid = vnic->vn_vid;
926 	info->vn_force = vnic->vn_force;
927 	info->vn_vrid = vnic->vn_vrid;
928 	info->vn_af = vnic->vn_af;
929 
930 	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
931 	if (vnic->vn_mch != NULL)
932 		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
933 
934 	rw_exit(&vnic_lock);
935 	return (0);
936 }
937 
938 static void
939 vnic_notify_cb(void *arg, mac_notify_type_t type)
940 {
941 	vnic_t *vnic = arg;
942 
943 	/*
944 	 * Do not deliver notifications if the vnic is not fully initialized
945 	 * or is in process of being torn down.
946 	 */
947 	if (!vnic->vn_enabled)
948 		return;
949 
950 	switch (type) {
951 	case MAC_NOTE_UNICST:
952 		/*
953 		 * Only the VLAN VNIC needs to be notified with primary MAC
954 		 * address change.
955 		 */
956 		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
957 			return;
958 
959 		/*  the unicast MAC address value */
960 		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
961 
962 		/* notify its upper layer MAC about MAC address change */
963 		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
964 		break;
965 
966 	case MAC_NOTE_LINK:
967 		mac_link_update(vnic->vn_mh,
968 		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
969 		break;
970 
971 	default:
972 		break;
973 	}
974 }
975