xref: /titanic_52/usr/src/uts/common/io/vnic/vnic_dev.c (revision 48bc00d6814e04ff3edb32cafe7d1bc580baff68)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/cred.h>
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/cmn_err.h>
31 #include <sys/list.h>
32 #include <sys/ksynch.h>
33 #include <sys/kmem.h>
34 #include <sys/stream.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/atomic.h>
39 #include <sys/stat.h>
40 #include <sys/modhash.h>
41 #include <sys/strsubr.h>
42 #include <sys/strsun.h>
43 #include <sys/dlpi.h>
44 #include <sys/mac.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client.h>
47 #include <sys/mac_client_priv.h>
48 #include <sys/mac_ether.h>
49 #include <sys/dls.h>
50 #include <sys/pattr.h>
51 #include <sys/time.h>
52 #include <sys/vlan.h>
53 #include <sys/vnic.h>
54 #include <sys/vnic_impl.h>
55 #include <sys/mac_flow_impl.h>
56 #include <inet/ip_impl.h>
57 
58 /*
59  * Note that for best performance, the VNIC is a passthrough design.
60  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
61  * This MAC client is opened by the VNIC driver at VNIC creation,
62  * and closed when the VNIC is deleted.
63  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
64  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
65  * of allocating a new MAC client, it asks the VNIC driver to return
66  * the lower MAC client handle associated with the VNIC, and that handle
67  * is returned to the upper MAC client directly. This allows access
68  * by upper MAC clients of the VNIC to have direct access to the lower
69  * MAC client for the control path and data path.
70  *
71  * Due to this passthrough, some of the entry points exported by the
72  * VNIC driver are never directly invoked. These entry points include
73  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
74  */
75 
76 static int vnic_m_start(void *);
77 static void vnic_m_stop(void *);
78 static int vnic_m_promisc(void *, boolean_t);
79 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
80 static int vnic_m_unicst(void *, const uint8_t *);
81 static int vnic_m_stat(void *, uint_t, uint64_t *);
82 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
83 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
84     const void *);
85 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
86     uint_t, void *, uint_t *);
87 static mblk_t *vnic_m_tx(void *, mblk_t *);
88 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
89 static void vnic_notify_cb(void *, mac_notify_type_t);
90 
91 static kmem_cache_t	*vnic_cache;
92 static krwlock_t	vnic_lock;
93 static uint_t		vnic_count;
94 
95 #define	ANCHOR_VNIC_MIN_MTU	576
96 #define	ANCHOR_VNIC_MAX_MTU	9000
97 
98 /* hash of VNICs (vnic_t's), keyed by VNIC id */
99 static mod_hash_t	*vnic_hash;
100 #define	VNIC_HASHSZ	64
101 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
102 
103 #define	VNIC_M_CALLBACK_FLAGS	\
104 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
105 
106 static mac_callbacks_t vnic_m_callbacks = {
107 	VNIC_M_CALLBACK_FLAGS,
108 	vnic_m_stat,
109 	vnic_m_start,
110 	vnic_m_stop,
111 	vnic_m_promisc,
112 	vnic_m_multicst,
113 	vnic_m_unicst,
114 	vnic_m_tx,
115 	vnic_m_ioctl,
116 	vnic_m_capab_get,
117 	NULL,
118 	NULL,
119 	vnic_m_setprop,
120 	vnic_m_getprop
121 };
122 
123 void
124 vnic_dev_init(void)
125 {
126 	vnic_cache = kmem_cache_create("vnic_cache",
127 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
128 
129 	vnic_hash = mod_hash_create_idhash("vnic_hash",
130 	    VNIC_HASHSZ, mod_hash_null_valdtor);
131 
132 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
133 
134 	vnic_count = 0;
135 }
136 
137 void
138 vnic_dev_fini(void)
139 {
140 	ASSERT(vnic_count == 0);
141 
142 	rw_destroy(&vnic_lock);
143 	mod_hash_destroy_idhash(vnic_hash);
144 	kmem_cache_destroy(vnic_cache);
145 }
146 
147 uint_t
148 vnic_dev_count(void)
149 {
150 	return (vnic_count);
151 }
152 
153 static vnic_ioc_diag_t
154 vnic_mac2vnic_diag(mac_diag_t diag)
155 {
156 	switch (diag) {
157 	case MAC_DIAG_MACADDR_NIC:
158 		return (VNIC_IOC_DIAG_MACADDR_NIC);
159 	case MAC_DIAG_MACADDR_INUSE:
160 		return (VNIC_IOC_DIAG_MACADDR_INUSE);
161 	case MAC_DIAG_MACADDR_INVALID:
162 		return (VNIC_IOC_DIAG_MACADDR_INVALID);
163 	case MAC_DIAG_MACADDRLEN_INVALID:
164 		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
165 	case MAC_DIAG_MACFACTORYSLOTINVALID:
166 		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
167 	case MAC_DIAG_MACFACTORYSLOTUSED:
168 		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
169 	case MAC_DIAG_MACFACTORYSLOTALLUSED:
170 		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
171 	case MAC_DIAG_MACFACTORYNOTSUP:
172 		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
173 	case MAC_DIAG_MACPREFIX_INVALID:
174 		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
175 	case MAC_DIAG_MACPREFIXLEN_INVALID:
176 		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
177 	case MAC_DIAG_MACNO_HWRINGS:
178 		return (VNIC_IOC_DIAG_NO_HWRINGS);
179 	default:
180 		return (VNIC_IOC_DIAG_NONE);
181 	}
182 }
183 
184 static int
185 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
186     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
187     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
188     uint16_t vid)
189 {
190 	mac_diag_t mac_diag;
191 	uint16_t mac_flags = 0;
192 	int err;
193 	uint_t addr_len;
194 
195 	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
196 		mac_flags |= MAC_UNICAST_NODUPCHECK;
197 
198 	switch (vnic_addr_type) {
199 	case VNIC_MAC_ADDR_TYPE_FIXED:
200 		/*
201 		 * The MAC address value to assign to the VNIC
202 		 * is already provided in mac_addr_arg. addr_len_ptr_arg
203 		 * already contains the MAC address length.
204 		 */
205 		break;
206 
207 	case VNIC_MAC_ADDR_TYPE_RANDOM:
208 		/*
209 		 * Random MAC address. There are two sub-cases:
210 		 *
211 		 * 1 - If mac_len == 0, a new MAC address is generated.
212 		 *	The length of the MAC address to generated depends
213 		 *	on the type of MAC used. The prefix to use for the MAC
214 		 *	address is stored in the most significant bytes
215 		 *	of the mac_addr argument, and its length is specified
216 		 *	by the mac_prefix_len argument. This prefix can
217 		 *	correspond to a IEEE OUI in the case of Ethernet,
218 		 *	for example.
219 		 *
220 		 * 2 - If mac_len > 0, the address was already picked
221 		 *	randomly, and is now passed back during VNIC
222 		 *	re-creation. The mac_addr argument contains the MAC
223 		 *	address that was generated. We distinguish this
224 		 *	case from the fixed MAC address case, since we
225 		 *	want the user consumers to know, when they query
226 		 *	the list of VNICs, that a VNIC was assigned a
227 		 *	random MAC address vs assigned a fixed address
228 		 *	specified by the user.
229 		 */
230 
231 		/*
232 		 * If it's a pre-generated address, we're done. mac_addr_arg
233 		 * and addr_len_ptr_arg already contain the MAC address
234 		 * value and length.
235 		 */
236 		if (*addr_len_ptr_arg > 0)
237 			break;
238 
239 		/* generate a new random MAC address */
240 		if ((err = mac_addr_random(vnic->vn_mch,
241 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
242 			*diag = vnic_mac2vnic_diag(mac_diag);
243 			return (err);
244 		}
245 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
246 		break;
247 
248 	case VNIC_MAC_ADDR_TYPE_FACTORY:
249 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
250 		if (err != 0) {
251 			if (err == EINVAL)
252 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
253 			if (err == EBUSY)
254 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
255 			if (err == ENOSPC)
256 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
257 			return (err);
258 		}
259 
260 		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
261 		    mac_addr_arg, &addr_len, NULL, NULL);
262 		*addr_len_ptr_arg = addr_len;
263 		break;
264 
265 	case VNIC_MAC_ADDR_TYPE_AUTO:
266 		/* first try to allocate a factory MAC address */
267 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
268 		if (err == 0) {
269 			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
270 			    mac_addr_arg, &addr_len, NULL, NULL);
271 			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
272 			*addr_len_ptr_arg = addr_len;
273 			break;
274 		}
275 
276 		/*
277 		 * Allocating a factory MAC address failed, generate a
278 		 * random MAC address instead.
279 		 */
280 		if ((err = mac_addr_random(vnic->vn_mch,
281 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
282 			*diag = vnic_mac2vnic_diag(mac_diag);
283 			return (err);
284 		}
285 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
286 		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
287 		break;
288 	case VNIC_MAC_ADDR_TYPE_PRIMARY:
289 		/*
290 		 * We get the address here since we copy it in the
291 		 * vnic's vn_addr.
292 		 */
293 		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
294 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
295 		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
296 		break;
297 	}
298 
299 	vnic->vn_addr_type = vnic_addr_type;
300 
301 	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
302 	    &vnic->vn_muh, vid, &mac_diag);
303 	if (err != 0) {
304 		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
305 			/* release factory MAC address */
306 			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
307 		}
308 		*diag = vnic_mac2vnic_diag(mac_diag);
309 	}
310 
311 	return (err);
312 }
313 
314 /*
315  * Create a new VNIC upon request from administrator.
316  * Returns 0 on success, an errno on failure.
317  */
318 /* ARGSUSED */
319 int
320 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
321     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
322     int *mac_slot, uint_t mac_prefix_len, uint16_t vid,
323     mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
324     cred_t *credp)
325 {
326 	vnic_t *vnic;
327 	mac_register_t *mac;
328 	int err;
329 	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
330 	char vnic_name[MAXNAMELEN];
331 	const mac_info_t *minfop;
332 	uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ?
333 	    MAC_OPEN_FLAGS_REQ_HWRINGS : 0;
334 
335 	*diag = VNIC_IOC_DIAG_NONE;
336 
337 	rw_enter(&vnic_lock, RW_WRITER);
338 
339 	/* does a VNIC with the same id already exist? */
340 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
341 	    (mod_hash_val_t *)&vnic);
342 	if (err == 0) {
343 		rw_exit(&vnic_lock);
344 		return (EEXIST);
345 	}
346 
347 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
348 	if (vnic == NULL) {
349 		rw_exit(&vnic_lock);
350 		return (ENOMEM);
351 	}
352 
353 	bzero(vnic, sizeof (*vnic));
354 
355 	vnic->vn_id = vnic_id;
356 	vnic->vn_link_id = linkid;
357 
358 	if (!is_anchor) {
359 		if (linkid == DATALINK_INVALID_LINKID) {
360 			err = EINVAL;
361 			goto bail;
362 		}
363 
364 		/*
365 		 * Open the lower MAC and assign its initial bandwidth and
366 		 * MAC address. We do this here during VNIC creation and
367 		 * do not wait until the upper MAC client open so that we
368 		 * can validate the VNIC creation parameters (bandwidth,
369 		 * MAC address, etc) and reserve a factory MAC address if
370 		 * one was requested.
371 		 */
372 		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
373 		if (err != 0)
374 			goto bail;
375 
376 		/*
377 		 * VNIC(vlan) over VNICs(vlans) is not supported.
378 		 */
379 		if (mac_is_vnic(vnic->vn_lower_mh)) {
380 			err = EINVAL;
381 			goto bail;
382 		}
383 
384 		/* only ethernet support for now */
385 		minfop = mac_info(vnic->vn_lower_mh);
386 		if (minfop->mi_nativemedia != DL_ETHER) {
387 			err = ENOTSUP;
388 			goto bail;
389 		}
390 
391 		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
392 		    NULL);
393 		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
394 		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag);
395 		if (err != 0)
396 			goto bail;
397 
398 		if (mrp != NULL) {
399 			err = mac_client_set_resources(vnic->vn_mch, mrp);
400 			if (err != 0)
401 				goto bail;
402 		}
403 		/* assign a MAC address to the VNIC */
404 
405 		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
406 		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid);
407 		if (err != 0) {
408 			vnic->vn_muh = NULL;
409 			if (diag != NULL && req_hwgrp_flag != 0)
410 				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
411 			goto bail;
412 		}
413 
414 		/* register to receive notification from underlying MAC */
415 		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
416 		    vnic);
417 
418 		*vnic_addr_type = vnic->vn_addr_type;
419 		vnic->vn_addr_len = *mac_len;
420 		vnic->vn_vid = vid;
421 
422 		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
423 
424 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
425 			vnic->vn_slot_id = *mac_slot;
426 
427 		/* set the initial VNIC capabilities */
428 		if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
429 		    &vnic->vn_hcksum_txflags))
430 			vnic->vn_hcksum_txflags = 0;
431 	}
432 
433 	/* register with the MAC module */
434 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
435 		goto bail;
436 
437 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
438 	mac->m_driver = vnic;
439 	mac->m_dip = vnic_get_dip();
440 	mac->m_instance = (uint_t)-1;
441 	mac->m_src_addr = vnic->vn_addr;
442 	mac->m_callbacks = &vnic_m_callbacks;
443 
444 	if (!is_anchor) {
445 		/*
446 		 * If this is a VNIC based VLAN, then we check for the
447 		 * margin unless it has been created with the force
448 		 * flag. If we are configuring a VLAN over an etherstub,
449 		 * we don't check the margin even if force is not set.
450 		 */
451 		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
452 			if (vid != VLAN_ID_NONE)
453 				vnic->vn_force = B_TRUE;
454 			/*
455 			 * As the current margin size of the underlying mac is
456 			 * used to determine the margin size of the VNIC
457 			 * itself, request the underlying mac not to change
458 			 * to a smaller margin size.
459 			 */
460 			err = mac_margin_add(vnic->vn_lower_mh,
461 			    &vnic->vn_margin, B_TRUE);
462 			ASSERT(err == 0);
463 		} else {
464 			vnic->vn_margin = VLAN_TAGSZ;
465 			err = mac_margin_add(vnic->vn_lower_mh,
466 			    &vnic->vn_margin, B_FALSE);
467 			if (err != 0) {
468 				mac_free(mac);
469 				if (diag != NULL)
470 					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
471 				goto bail;
472 			}
473 		}
474 
475 		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
476 		    &mac->m_max_sdu);
477 	} else {
478 		vnic->vn_margin = VLAN_TAGSZ;
479 		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
480 		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
481 	}
482 
483 	mac->m_margin = vnic->vn_margin;
484 
485 	err = mac_register(mac, &vnic->vn_mh);
486 	mac_free(mac);
487 	if (err != 0) {
488 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
489 		    vnic->vn_margin) == 0);
490 		goto bail;
491 	}
492 
493 	/* Set the VNIC's MAC in the client */
494 	if (!is_anchor)
495 		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh);
496 
497 	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
498 	if (err != 0) {
499 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
500 		    vnic->vn_margin) == 0);
501 		(void) mac_unregister(vnic->vn_mh);
502 		goto bail;
503 	}
504 
505 	/* add new VNIC to hash table */
506 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
507 	    (mod_hash_val_t)vnic);
508 	ASSERT(err == 0);
509 	vnic_count++;
510 
511 	vnic->vn_enabled = B_TRUE;
512 	rw_exit(&vnic_lock);
513 
514 	return (0);
515 
516 bail:
517 	rw_exit(&vnic_lock);
518 	if (!is_anchor) {
519 		if (vnic->vn_mnh != NULL)
520 			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
521 		if (vnic->vn_muh != NULL)
522 			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
523 		if (vnic->vn_mch != NULL)
524 			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
525 		if (vnic->vn_lower_mh != NULL)
526 			mac_close(vnic->vn_lower_mh);
527 	}
528 
529 	kmem_cache_free(vnic_cache, vnic);
530 	return (err);
531 }
532 
533 /*
534  * Modify the properties of an existing VNIC.
535  */
536 /* ARGSUSED */
537 int
538 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
539     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
540     uint_t mac_slot, mac_resource_props_t *mrp)
541 {
542 	vnic_t *vnic = NULL;
543 
544 	rw_enter(&vnic_lock, RW_WRITER);
545 
546 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
547 	    (mod_hash_val_t *)&vnic) != 0) {
548 		rw_exit(&vnic_lock);
549 		return (ENOENT);
550 	}
551 
552 	rw_exit(&vnic_lock);
553 
554 	return (0);
555 }
556 
557 /* ARGSUSED */
558 int
559 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
560 {
561 	vnic_t *vnic = NULL;
562 	mod_hash_val_t val;
563 	datalink_id_t tmpid;
564 	int rc;
565 
566 	rw_enter(&vnic_lock, RW_WRITER);
567 
568 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
569 	    (mod_hash_val_t *)&vnic) != 0) {
570 		rw_exit(&vnic_lock);
571 		return (ENOENT);
572 	}
573 
574 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
575 		rw_exit(&vnic_lock);
576 		return (rc);
577 	}
578 
579 	ASSERT(vnic_id == tmpid);
580 
581 	/*
582 	 * We cannot unregister the MAC yet. Unregistering would
583 	 * free up mac_impl_t which should not happen at this time.
584 	 * So disable mac_impl_t by calling mac_disable(). This will prevent
585 	 * any new claims on mac_impl_t.
586 	 */
587 	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
588 		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
589 		    crgetzoneid(credp));
590 		rw_exit(&vnic_lock);
591 		return (rc);
592 	}
593 
594 	vnic->vn_enabled = B_FALSE;
595 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
596 	ASSERT(vnic == (vnic_t *)val);
597 	vnic_count--;
598 	rw_exit(&vnic_lock);
599 
600 	/*
601 	 * XXX-nicolas shouldn't have a void cast here, if it's
602 	 * expected that the function will never fail, then we should
603 	 * have an ASSERT().
604 	 */
605 	(void) mac_unregister(vnic->vn_mh);
606 
607 	if (vnic->vn_lower_mh != NULL) {
608 		/*
609 		 * Check if MAC address for the vnic was obtained from the
610 		 * factory MAC addresses. If yes, release it.
611 		 */
612 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
613 			(void) mac_addr_factory_release(vnic->vn_mch,
614 			    vnic->vn_slot_id);
615 		}
616 		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
617 		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
618 		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
619 		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
620 		mac_close(vnic->vn_lower_mh);
621 	}
622 
623 	kmem_cache_free(vnic_cache, vnic);
624 	return (0);
625 }
626 
627 /* ARGSUSED */
628 mblk_t *
629 vnic_m_tx(void *arg, mblk_t *mp_chain)
630 {
631 	/*
632 	 * This function could be invoked for an anchor VNIC when sending
633 	 * broadcast and multicast packets, and unicast packets which did
634 	 * not match any local known destination.
635 	 */
636 	freemsgchain(mp_chain);
637 	return (NULL);
638 }
639 
640 /*ARGSUSED*/
641 static void
642 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
643 {
644 	miocnak(q, mp, 0, ENOTSUP);
645 }
646 
647 /*
648  * This entry point cannot be passed-through, since it is invoked
649  * for the per-VNIC kstats which must be exported independently
650  * of the existence of VNIC MAC clients.
651  */
652 static int
653 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
654 {
655 	vnic_t *vnic = arg;
656 	int rval = 0;
657 
658 	if (vnic->vn_lower_mh == NULL) {
659 		/*
660 		 * It's an anchor VNIC, which does not have any
661 		 * statistics in itself.
662 		 */
663 		return (ENOTSUP);
664 	}
665 
666 	/*
667 	 * ENOTSUP must be reported for unsupported stats, the VNIC
668 	 * driver reports a subset of the stats that would
669 	 * be returned by a real piece of hardware.
670 	 */
671 
672 	switch (stat) {
673 	case MAC_STAT_LINK_STATE:
674 	case MAC_STAT_LINK_UP:
675 	case MAC_STAT_PROMISC:
676 	case MAC_STAT_IFSPEED:
677 	case MAC_STAT_MULTIRCV:
678 	case MAC_STAT_MULTIXMT:
679 	case MAC_STAT_BRDCSTRCV:
680 	case MAC_STAT_BRDCSTXMT:
681 	case MAC_STAT_OPACKETS:
682 	case MAC_STAT_OBYTES:
683 	case MAC_STAT_IERRORS:
684 	case MAC_STAT_OERRORS:
685 	case MAC_STAT_RBYTES:
686 	case MAC_STAT_IPACKETS:
687 		*val = mac_client_stat_get(vnic->vn_mch, stat);
688 		break;
689 	default:
690 		rval = ENOTSUP;
691 	}
692 
693 	return (rval);
694 }
695 
696 /*
697  * Invoked by the upper MAC to retrieve the lower MAC client handle
698  * corresponding to a VNIC. A pointer to this function is obtained
699  * by the upper MAC via capability query.
700  *
701  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
702  * receive the same MAC client handle for the same VNIC. This is ok
703  * as long as we have only one VNIC MAC client which sends and
704  * receives data, but we don't currently enforce this at the MAC layer.
705  */
706 static void *
707 vnic_mac_client_handle(void *vnic_arg)
708 {
709 	vnic_t *vnic = vnic_arg;
710 
711 	return (vnic->vn_mch);
712 }
713 
714 
715 /*
716  * Return information about the specified capability.
717  */
718 /* ARGSUSED */
719 static boolean_t
720 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
721 {
722 	vnic_t *vnic = arg;
723 
724 	switch (cap) {
725 	case MAC_CAPAB_HCKSUM: {
726 		uint32_t *hcksum_txflags = cap_data;
727 
728 		*hcksum_txflags = vnic->vn_hcksum_txflags &
729 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
730 		    HCKSUM_INET_PARTIAL);
731 		break;
732 	}
733 	case MAC_CAPAB_VNIC: {
734 		mac_capab_vnic_t *vnic_capab = cap_data;
735 
736 		if (vnic->vn_lower_mh == NULL) {
737 			/*
738 			 * It's an anchor VNIC, we don't have an underlying
739 			 * NIC and MAC client handle.
740 			 */
741 			return (B_FALSE);
742 		}
743 
744 		if (vnic_capab != NULL) {
745 			vnic_capab->mcv_arg = vnic;
746 			vnic_capab->mcv_mac_client_handle =
747 			    vnic_mac_client_handle;
748 		}
749 		break;
750 	}
751 	case MAC_CAPAB_ANCHOR_VNIC: {
752 		/* since it's an anchor VNIC we don't have lower mac handle */
753 		if (vnic->vn_lower_mh == NULL) {
754 			ASSERT(vnic->vn_link_id == 0);
755 			return (B_TRUE);
756 		}
757 		return (B_FALSE);
758 	}
759 	case MAC_CAPAB_NO_NATIVEVLAN:
760 	case MAC_CAPAB_NO_ZCOPY:
761 		return (B_TRUE);
762 	default:
763 		return (B_FALSE);
764 	}
765 	return (B_TRUE);
766 }
767 
768 /* ARGSUSED */
769 static int
770 vnic_m_start(void *arg)
771 {
772 	return (0);
773 }
774 
775 /* ARGSUSED */
776 static void
777 vnic_m_stop(void *arg)
778 {
779 }
780 
781 /* ARGSUSED */
782 static int
783 vnic_m_promisc(void *arg, boolean_t on)
784 {
785 	return (0);
786 }
787 
788 /* ARGSUSED */
789 static int
790 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
791 {
792 	return (0);
793 }
794 
795 static int
796 vnic_m_unicst(void *arg, const uint8_t *macaddr)
797 {
798 	vnic_t *vnic = arg;
799 
800 	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
801 }
802 
803 /*
804  * Callback functions for set/get of properties
805  */
806 /*ARGSUSED*/
807 static int
808 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
809     uint_t pr_valsize, const void *pr_val)
810 {
811 	int 		err = ENOTSUP;
812 	vnic_t		*vn = m_driver;
813 
814 	/* allow setting MTU only on an etherstub */
815 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
816 		return (err);
817 
818 	switch (pr_num) {
819 	case MAC_PROP_MTU: {
820 		uint32_t	mtu;
821 
822 		if (pr_valsize < sizeof (mtu)) {
823 			err = EINVAL;
824 			break;
825 		}
826 		bcopy(pr_val, &mtu, sizeof (mtu));
827 		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
828 			err = EINVAL;
829 			break;
830 		}
831 		err = mac_maxsdu_update(vn->vn_mh, mtu);
832 		break;
833 	}
834 	default:
835 		break;
836 	}
837 	return (err);
838 }
839 
840 /*ARGSUSED*/
841 static int
842 vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
843     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
844 {
845 	mac_propval_range_t 	range;
846 	vnic_t			*vn = m_driver;
847 	int 			err = ENOTSUP;
848 
849 	/* MTU setting allowed only on an etherstub */
850 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
851 		return (err);
852 
853 	switch (pr_num) {
854 	case MAC_PROP_MTU:
855 		if (!(pr_flags & MAC_PROP_POSSIBLE))
856 			return (ENOTSUP);
857 		if (pr_valsize < sizeof (mac_propval_range_t))
858 			return (EINVAL);
859 		range.mpr_count = 1;
860 		range.mpr_type = MAC_PROPVAL_UINT32;
861 		range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU;
862 		range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU;
863 		bcopy(&range, pr_val, sizeof (range));
864 		return (0);
865 	default:
866 		break;
867 	}
868 
869 	return (err);
870 }
871 
872 int
873 vnic_info(vnic_info_t *info, cred_t *credp)
874 {
875 	vnic_t		*vnic;
876 	int		err;
877 
878 	/* Make sure that the VNIC link is visible from the caller's zone. */
879 	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
880 		return (ENOENT);
881 
882 	rw_enter(&vnic_lock, RW_WRITER);
883 
884 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
885 	    (mod_hash_val_t *)&vnic);
886 	if (err != 0) {
887 		rw_exit(&vnic_lock);
888 		return (ENOENT);
889 	}
890 
891 	info->vn_link_id = vnic->vn_link_id;
892 	info->vn_mac_addr_type = vnic->vn_addr_type;
893 	info->vn_mac_len = vnic->vn_addr_len;
894 	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
895 	info->vn_mac_slot = vnic->vn_slot_id;
896 	info->vn_mac_prefix_len = 0;
897 	info->vn_vid = vnic->vn_vid;
898 	info->vn_force = vnic->vn_force;
899 
900 	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
901 	if (vnic->vn_mch != NULL)
902 		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
903 
904 	rw_exit(&vnic_lock);
905 	return (0);
906 }
907 
908 static void
909 vnic_notify_cb(void *arg, mac_notify_type_t type)
910 {
911 	vnic_t *vnic = arg;
912 
913 	/*
914 	 * Do not deliver notifications if the vnic is not fully initialized
915 	 * or is in process of being torn down.
916 	 */
917 	if (!vnic->vn_enabled)
918 		return;
919 
920 	switch (type) {
921 	case MAC_NOTE_UNICST:
922 		/*
923 		 * Only the VLAN VNIC needs to be notified with primary MAC
924 		 * address change.
925 		 */
926 		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
927 			return;
928 
929 		/*  the unicast MAC address value */
930 		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
931 
932 		/* notify its upper layer MAC about MAC address change */
933 		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
934 		break;
935 
936 	case MAC_NOTE_LINK:
937 		mac_link_update(vnic->vn_mh,
938 		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
939 		break;
940 
941 	default:
942 		break;
943 	}
944 }
945