xref: /illumos-gate/usr/src/uts/common/io/vnic/vnic_dev.c (revision c0dd49bdd68c0d758a67d56f07826f3b45cfc664)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/cred.h>
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/cmn_err.h>
31 #include <sys/list.h>
32 #include <sys/ksynch.h>
33 #include <sys/kmem.h>
34 #include <sys/stream.h>
35 #include <sys/modctl.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/atomic.h>
39 #include <sys/stat.h>
40 #include <sys/modhash.h>
41 #include <sys/strsubr.h>
42 #include <sys/strsun.h>
43 #include <sys/dlpi.h>
44 #include <sys/mac.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client.h>
47 #include <sys/mac_client_priv.h>
48 #include <sys/mac_ether.h>
49 #include <sys/dls.h>
50 #include <sys/pattr.h>
51 #include <sys/time.h>
52 #include <sys/vlan.h>
53 #include <sys/vnic.h>
54 #include <sys/vnic_impl.h>
55 #include <sys/mac_flow_impl.h>
56 #include <inet/ip_impl.h>
57 
58 /*
59  * Note that for best performance, the VNIC is a passthrough design.
60  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
61  * This MAC client is opened by the VNIC driver at VNIC creation,
62  * and closed when the VNIC is deleted.
63  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
64  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
65  * of allocating a new MAC client, it asks the VNIC driver to return
66  * the lower MAC client handle associated with the VNIC, and that handle
67  * is returned to the upper MAC client directly. This allows access
68  * by upper MAC clients of the VNIC to have direct access to the lower
69  * MAC client for the control path and data path.
70  *
71  * Due to this passthrough, some of the entry points exported by the
72  * VNIC driver are never directly invoked. These entry points include
73  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
74  */
75 
76 static int vnic_m_start(void *);
77 static void vnic_m_stop(void *);
78 static int vnic_m_promisc(void *, boolean_t);
79 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
80 static int vnic_m_unicst(void *, const uint8_t *);
81 static int vnic_m_stat(void *, uint_t, uint64_t *);
82 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
83 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
84     const void *);
85 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
86     mac_prop_info_handle_t);
87 static mblk_t *vnic_m_tx(void *, mblk_t *);
88 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
89 static void vnic_notify_cb(void *, mac_notify_type_t);
90 
91 static kmem_cache_t	*vnic_cache;
92 static krwlock_t	vnic_lock;
93 static uint_t		vnic_count;
94 
95 #define	ANCHOR_VNIC_MIN_MTU	576
96 #define	ANCHOR_VNIC_MAX_MTU	9000
97 
98 /* hash of VNICs (vnic_t's), keyed by VNIC id */
99 static mod_hash_t	*vnic_hash;
100 #define	VNIC_HASHSZ	64
101 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
102 
103 #define	VNIC_M_CALLBACK_FLAGS	\
104 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
105 
106 static mac_callbacks_t vnic_m_callbacks = {
107 	VNIC_M_CALLBACK_FLAGS,
108 	vnic_m_stat,
109 	vnic_m_start,
110 	vnic_m_stop,
111 	vnic_m_promisc,
112 	vnic_m_multicst,
113 	vnic_m_unicst,
114 	vnic_m_tx,
115 	NULL,
116 	vnic_m_ioctl,
117 	vnic_m_capab_get,
118 	NULL,
119 	NULL,
120 	vnic_m_setprop,
121 	NULL,
122 	vnic_m_propinfo
123 };
124 
125 void
126 vnic_dev_init(void)
127 {
128 	vnic_cache = kmem_cache_create("vnic_cache",
129 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
130 
131 	vnic_hash = mod_hash_create_idhash("vnic_hash",
132 	    VNIC_HASHSZ, mod_hash_null_valdtor);
133 
134 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
135 
136 	vnic_count = 0;
137 }
138 
139 void
140 vnic_dev_fini(void)
141 {
142 	ASSERT(vnic_count == 0);
143 
144 	rw_destroy(&vnic_lock);
145 	mod_hash_destroy_idhash(vnic_hash);
146 	kmem_cache_destroy(vnic_cache);
147 }
148 
149 uint_t
150 vnic_dev_count(void)
151 {
152 	return (vnic_count);
153 }
154 
155 static vnic_ioc_diag_t
156 vnic_mac2vnic_diag(mac_diag_t diag)
157 {
158 	switch (diag) {
159 	case MAC_DIAG_MACADDR_NIC:
160 		return (VNIC_IOC_DIAG_MACADDR_NIC);
161 	case MAC_DIAG_MACADDR_INUSE:
162 		return (VNIC_IOC_DIAG_MACADDR_INUSE);
163 	case MAC_DIAG_MACADDR_INVALID:
164 		return (VNIC_IOC_DIAG_MACADDR_INVALID);
165 	case MAC_DIAG_MACADDRLEN_INVALID:
166 		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
167 	case MAC_DIAG_MACFACTORYSLOTINVALID:
168 		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
169 	case MAC_DIAG_MACFACTORYSLOTUSED:
170 		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
171 	case MAC_DIAG_MACFACTORYSLOTALLUSED:
172 		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
173 	case MAC_DIAG_MACFACTORYNOTSUP:
174 		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
175 	case MAC_DIAG_MACPREFIX_INVALID:
176 		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
177 	case MAC_DIAG_MACPREFIXLEN_INVALID:
178 		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
179 	case MAC_DIAG_MACNO_HWRINGS:
180 		return (VNIC_IOC_DIAG_NO_HWRINGS);
181 	default:
182 		return (VNIC_IOC_DIAG_NONE);
183 	}
184 }
185 
186 static int
187 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
188     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
189     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
190     uint16_t vid, boolean_t req_hwgrp_flag)
191 {
192 	mac_diag_t mac_diag;
193 	uint16_t mac_flags = 0;
194 	int err;
195 	uint_t addr_len;
196 
197 	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
198 		mac_flags |= MAC_UNICAST_NODUPCHECK;
199 
200 	switch (vnic_addr_type) {
201 	case VNIC_MAC_ADDR_TYPE_FIXED:
202 	case VNIC_MAC_ADDR_TYPE_VRID:
203 		/*
204 		 * The MAC address value to assign to the VNIC
205 		 * is already provided in mac_addr_arg. addr_len_ptr_arg
206 		 * already contains the MAC address length.
207 		 */
208 		break;
209 
210 	case VNIC_MAC_ADDR_TYPE_RANDOM:
211 		/*
212 		 * Random MAC address. There are two sub-cases:
213 		 *
214 		 * 1 - If mac_len == 0, a new MAC address is generated.
215 		 *	The length of the MAC address to generated depends
216 		 *	on the type of MAC used. The prefix to use for the MAC
217 		 *	address is stored in the most significant bytes
218 		 *	of the mac_addr argument, and its length is specified
219 		 *	by the mac_prefix_len argument. This prefix can
220 		 *	correspond to a IEEE OUI in the case of Ethernet,
221 		 *	for example.
222 		 *
223 		 * 2 - If mac_len > 0, the address was already picked
224 		 *	randomly, and is now passed back during VNIC
225 		 *	re-creation. The mac_addr argument contains the MAC
226 		 *	address that was generated. We distinguish this
227 		 *	case from the fixed MAC address case, since we
228 		 *	want the user consumers to know, when they query
229 		 *	the list of VNICs, that a VNIC was assigned a
230 		 *	random MAC address vs assigned a fixed address
231 		 *	specified by the user.
232 		 */
233 
234 		/*
235 		 * If it's a pre-generated address, we're done. mac_addr_arg
236 		 * and addr_len_ptr_arg already contain the MAC address
237 		 * value and length.
238 		 */
239 		if (*addr_len_ptr_arg > 0)
240 			break;
241 
242 		/* generate a new random MAC address */
243 		if ((err = mac_addr_random(vnic->vn_mch,
244 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
245 			*diag = vnic_mac2vnic_diag(mac_diag);
246 			return (err);
247 		}
248 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
249 		break;
250 
251 	case VNIC_MAC_ADDR_TYPE_FACTORY:
252 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
253 		if (err != 0) {
254 			if (err == EINVAL)
255 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
256 			if (err == EBUSY)
257 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
258 			if (err == ENOSPC)
259 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
260 			return (err);
261 		}
262 
263 		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
264 		    mac_addr_arg, &addr_len, NULL, NULL);
265 		*addr_len_ptr_arg = addr_len;
266 		break;
267 
268 	case VNIC_MAC_ADDR_TYPE_AUTO:
269 		/* first try to allocate a factory MAC address */
270 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
271 		if (err == 0) {
272 			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
273 			    mac_addr_arg, &addr_len, NULL, NULL);
274 			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
275 			*addr_len_ptr_arg = addr_len;
276 			break;
277 		}
278 
279 		/*
280 		 * Allocating a factory MAC address failed, generate a
281 		 * random MAC address instead.
282 		 */
283 		if ((err = mac_addr_random(vnic->vn_mch,
284 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
285 			*diag = vnic_mac2vnic_diag(mac_diag);
286 			return (err);
287 		}
288 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
289 		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
290 		break;
291 	case VNIC_MAC_ADDR_TYPE_PRIMARY:
292 		/*
293 		 * We get the address here since we copy it in the
294 		 * vnic's vn_addr.
295 		 * We can't ask for hardware resources since we
296 		 * don't currently support hardware classification
297 		 * for these MAC clients.
298 		 */
299 		if (req_hwgrp_flag) {
300 			*diag = VNIC_IOC_DIAG_NO_HWRINGS;
301 			return (ENOTSUP);
302 		}
303 		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
304 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
305 		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
306 		break;
307 	}
308 
309 	vnic->vn_addr_type = vnic_addr_type;
310 
311 	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
312 	    &vnic->vn_muh, vid, &mac_diag);
313 	if (err != 0) {
314 		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
315 			/* release factory MAC address */
316 			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
317 		}
318 		*diag = vnic_mac2vnic_diag(mac_diag);
319 	}
320 
321 	return (err);
322 }
323 
324 /*
325  * Create a new VNIC upon request from administrator.
326  * Returns 0 on success, an errno on failure.
327  */
328 /* ARGSUSED */
329 int
330 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
331     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
332     int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
333     int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
334     cred_t *credp)
335 {
336 	vnic_t *vnic;
337 	mac_register_t *mac;
338 	int err;
339 	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
340 	char vnic_name[MAXNAMELEN];
341 	const mac_info_t *minfop;
342 	uint32_t req_hwgrp_flag = B_FALSE;
343 
344 	*diag = VNIC_IOC_DIAG_NONE;
345 
346 	rw_enter(&vnic_lock, RW_WRITER);
347 
348 	/* does a VNIC with the same id already exist? */
349 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
350 	    (mod_hash_val_t *)&vnic);
351 	if (err == 0) {
352 		rw_exit(&vnic_lock);
353 		return (EEXIST);
354 	}
355 
356 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
357 	if (vnic == NULL) {
358 		rw_exit(&vnic_lock);
359 		return (ENOMEM);
360 	}
361 
362 	bzero(vnic, sizeof (*vnic));
363 
364 	vnic->vn_id = vnic_id;
365 	vnic->vn_link_id = linkid;
366 	vnic->vn_vrid = vrid;
367 	vnic->vn_af = af;
368 
369 	if (!is_anchor) {
370 		if (linkid == DATALINK_INVALID_LINKID) {
371 			err = EINVAL;
372 			goto bail;
373 		}
374 
375 		/*
376 		 * Open the lower MAC and assign its initial bandwidth and
377 		 * MAC address. We do this here during VNIC creation and
378 		 * do not wait until the upper MAC client open so that we
379 		 * can validate the VNIC creation parameters (bandwidth,
380 		 * MAC address, etc) and reserve a factory MAC address if
381 		 * one was requested.
382 		 */
383 		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
384 		if (err != 0)
385 			goto bail;
386 
387 		/*
388 		 * VNIC(vlan) over VNICs(vlans) is not supported.
389 		 */
390 		if (mac_is_vnic(vnic->vn_lower_mh)) {
391 			err = EINVAL;
392 			goto bail;
393 		}
394 
395 		/* only ethernet support for now */
396 		minfop = mac_info(vnic->vn_lower_mh);
397 		if (minfop->mi_nativemedia != DL_ETHER) {
398 			err = ENOTSUP;
399 			goto bail;
400 		}
401 
402 		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
403 		    NULL);
404 		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
405 		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
406 		if (err != 0)
407 			goto bail;
408 
409 		if (mrp != NULL) {
410 			if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
411 			    (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
412 				req_hwgrp_flag = B_TRUE;
413 			}
414 			err = mac_client_set_resources(vnic->vn_mch, mrp);
415 			if (err != 0)
416 				goto bail;
417 		}
418 		/* assign a MAC address to the VNIC */
419 
420 		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
421 		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
422 		    req_hwgrp_flag);
423 		if (err != 0) {
424 			vnic->vn_muh = NULL;
425 			if (diag != NULL && req_hwgrp_flag)
426 				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
427 			goto bail;
428 		}
429 
430 		/* register to receive notification from underlying MAC */
431 		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
432 		    vnic);
433 
434 		*vnic_addr_type = vnic->vn_addr_type;
435 		vnic->vn_addr_len = *mac_len;
436 		vnic->vn_vid = vid;
437 
438 		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
439 
440 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
441 			vnic->vn_slot_id = *mac_slot;
442 
443 		/* set the initial VNIC capabilities */
444 		if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
445 		    &vnic->vn_hcksum_txflags))
446 			vnic->vn_hcksum_txflags = 0;
447 	}
448 
449 	/* register with the MAC module */
450 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
451 		goto bail;
452 
453 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
454 	mac->m_driver = vnic;
455 	mac->m_dip = vnic_get_dip();
456 	mac->m_instance = (uint_t)-1;
457 	mac->m_src_addr = vnic->vn_addr;
458 	mac->m_callbacks = &vnic_m_callbacks;
459 
460 	if (!is_anchor) {
461 		/*
462 		 * If this is a VNIC based VLAN, then we check for the
463 		 * margin unless it has been created with the force
464 		 * flag. If we are configuring a VLAN over an etherstub,
465 		 * we don't check the margin even if force is not set.
466 		 */
467 		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
468 			if (vid != VLAN_ID_NONE)
469 				vnic->vn_force = B_TRUE;
470 			/*
471 			 * As the current margin size of the underlying mac is
472 			 * used to determine the margin size of the VNIC
473 			 * itself, request the underlying mac not to change
474 			 * to a smaller margin size.
475 			 */
476 			err = mac_margin_add(vnic->vn_lower_mh,
477 			    &vnic->vn_margin, B_TRUE);
478 			ASSERT(err == 0);
479 		} else {
480 			vnic->vn_margin = VLAN_TAGSZ;
481 			err = mac_margin_add(vnic->vn_lower_mh,
482 			    &vnic->vn_margin, B_FALSE);
483 			if (err != 0) {
484 				mac_free(mac);
485 				if (diag != NULL)
486 					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
487 				goto bail;
488 			}
489 		}
490 
491 		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
492 		    &mac->m_max_sdu);
493 	} else {
494 		vnic->vn_margin = VLAN_TAGSZ;
495 		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
496 		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
497 	}
498 
499 	mac->m_margin = vnic->vn_margin;
500 
501 	err = mac_register(mac, &vnic->vn_mh);
502 	mac_free(mac);
503 	if (err != 0) {
504 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
505 		    vnic->vn_margin) == 0);
506 		goto bail;
507 	}
508 
509 	/* Set the VNIC's MAC in the client */
510 	if (!is_anchor)
511 		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
512 
513 	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
514 	if (err != 0) {
515 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
516 		    vnic->vn_margin) == 0);
517 		(void) mac_unregister(vnic->vn_mh);
518 		goto bail;
519 	}
520 
521 	/* add new VNIC to hash table */
522 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
523 	    (mod_hash_val_t)vnic);
524 	ASSERT(err == 0);
525 	vnic_count++;
526 
527 	vnic->vn_enabled = B_TRUE;
528 	rw_exit(&vnic_lock);
529 
530 	return (0);
531 
532 bail:
533 	rw_exit(&vnic_lock);
534 	if (!is_anchor) {
535 		if (vnic->vn_mnh != NULL)
536 			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
537 		if (vnic->vn_muh != NULL)
538 			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
539 		if (vnic->vn_mch != NULL)
540 			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
541 		if (vnic->vn_lower_mh != NULL)
542 			mac_close(vnic->vn_lower_mh);
543 	}
544 
545 	kmem_cache_free(vnic_cache, vnic);
546 	return (err);
547 }
548 
549 /*
550  * Modify the properties of an existing VNIC.
551  */
552 /* ARGSUSED */
553 int
554 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
555     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
556     uint_t mac_slot, mac_resource_props_t *mrp)
557 {
558 	vnic_t *vnic = NULL;
559 
560 	rw_enter(&vnic_lock, RW_WRITER);
561 
562 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
563 	    (mod_hash_val_t *)&vnic) != 0) {
564 		rw_exit(&vnic_lock);
565 		return (ENOENT);
566 	}
567 
568 	rw_exit(&vnic_lock);
569 
570 	return (0);
571 }
572 
573 /* ARGSUSED */
574 int
575 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
576 {
577 	vnic_t *vnic = NULL;
578 	mod_hash_val_t val;
579 	datalink_id_t tmpid;
580 	int rc;
581 
582 	rw_enter(&vnic_lock, RW_WRITER);
583 
584 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
585 	    (mod_hash_val_t *)&vnic) != 0) {
586 		rw_exit(&vnic_lock);
587 		return (ENOENT);
588 	}
589 
590 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
591 		rw_exit(&vnic_lock);
592 		return (rc);
593 	}
594 
595 	ASSERT(vnic_id == tmpid);
596 
597 	/*
598 	 * We cannot unregister the MAC yet. Unregistering would
599 	 * free up mac_impl_t which should not happen at this time.
600 	 * So disable mac_impl_t by calling mac_disable(). This will prevent
601 	 * any new claims on mac_impl_t.
602 	 */
603 	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
604 		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
605 		    crgetzoneid(credp));
606 		rw_exit(&vnic_lock);
607 		return (rc);
608 	}
609 
610 	vnic->vn_enabled = B_FALSE;
611 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
612 	ASSERT(vnic == (vnic_t *)val);
613 	vnic_count--;
614 	rw_exit(&vnic_lock);
615 
616 	/*
617 	 * XXX-nicolas shouldn't have a void cast here, if it's
618 	 * expected that the function will never fail, then we should
619 	 * have an ASSERT().
620 	 */
621 	(void) mac_unregister(vnic->vn_mh);
622 
623 	if (vnic->vn_lower_mh != NULL) {
624 		/*
625 		 * Check if MAC address for the vnic was obtained from the
626 		 * factory MAC addresses. If yes, release it.
627 		 */
628 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
629 			(void) mac_addr_factory_release(vnic->vn_mch,
630 			    vnic->vn_slot_id);
631 		}
632 		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
633 		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
634 		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
635 		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
636 		mac_close(vnic->vn_lower_mh);
637 	}
638 
639 	kmem_cache_free(vnic_cache, vnic);
640 	return (0);
641 }
642 
643 /* ARGSUSED */
644 mblk_t *
645 vnic_m_tx(void *arg, mblk_t *mp_chain)
646 {
647 	/*
648 	 * This function could be invoked for an anchor VNIC when sending
649 	 * broadcast and multicast packets, and unicast packets which did
650 	 * not match any local known destination.
651 	 */
652 	freemsgchain(mp_chain);
653 	return (NULL);
654 }
655 
656 /*ARGSUSED*/
657 static void
658 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
659 {
660 	miocnak(q, mp, 0, ENOTSUP);
661 }
662 
663 /*
664  * This entry point cannot be passed-through, since it is invoked
665  * for the per-VNIC kstats which must be exported independently
666  * of the existence of VNIC MAC clients.
667  */
668 static int
669 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
670 {
671 	vnic_t *vnic = arg;
672 	int rval = 0;
673 
674 	if (vnic->vn_lower_mh == NULL) {
675 		/*
676 		 * It's an anchor VNIC, which does not have any
677 		 * statistics in itself.
678 		 */
679 		return (ENOTSUP);
680 	}
681 
682 	/*
683 	 * ENOTSUP must be reported for unsupported stats, the VNIC
684 	 * driver reports a subset of the stats that would
685 	 * be returned by a real piece of hardware.
686 	 */
687 
688 	switch (stat) {
689 	case MAC_STAT_LINK_STATE:
690 	case MAC_STAT_LINK_UP:
691 	case MAC_STAT_PROMISC:
692 	case MAC_STAT_IFSPEED:
693 	case MAC_STAT_MULTIRCV:
694 	case MAC_STAT_MULTIXMT:
695 	case MAC_STAT_BRDCSTRCV:
696 	case MAC_STAT_BRDCSTXMT:
697 	case MAC_STAT_OPACKETS:
698 	case MAC_STAT_OBYTES:
699 	case MAC_STAT_IERRORS:
700 	case MAC_STAT_OERRORS:
701 	case MAC_STAT_RBYTES:
702 	case MAC_STAT_IPACKETS:
703 		*val = mac_client_stat_get(vnic->vn_mch, stat);
704 		break;
705 	default:
706 		rval = ENOTSUP;
707 	}
708 
709 	return (rval);
710 }
711 
712 /*
713  * Invoked by the upper MAC to retrieve the lower MAC client handle
714  * corresponding to a VNIC. A pointer to this function is obtained
715  * by the upper MAC via capability query.
716  *
717  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
718  * receive the same MAC client handle for the same VNIC. This is ok
719  * as long as we have only one VNIC MAC client which sends and
720  * receives data, but we don't currently enforce this at the MAC layer.
721  */
722 static void *
723 vnic_mac_client_handle(void *vnic_arg)
724 {
725 	vnic_t *vnic = vnic_arg;
726 
727 	return (vnic->vn_mch);
728 }
729 
730 
731 /*
732  * Return information about the specified capability.
733  */
734 /* ARGSUSED */
735 static boolean_t
736 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
737 {
738 	vnic_t *vnic = arg;
739 
740 	switch (cap) {
741 	case MAC_CAPAB_HCKSUM: {
742 		uint32_t *hcksum_txflags = cap_data;
743 
744 		*hcksum_txflags = vnic->vn_hcksum_txflags &
745 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
746 		    HCKSUM_INET_PARTIAL);
747 		break;
748 	}
749 	case MAC_CAPAB_VNIC: {
750 		mac_capab_vnic_t *vnic_capab = cap_data;
751 
752 		if (vnic->vn_lower_mh == NULL) {
753 			/*
754 			 * It's an anchor VNIC, we don't have an underlying
755 			 * NIC and MAC client handle.
756 			 */
757 			return (B_FALSE);
758 		}
759 
760 		if (vnic_capab != NULL) {
761 			vnic_capab->mcv_arg = vnic;
762 			vnic_capab->mcv_mac_client_handle =
763 			    vnic_mac_client_handle;
764 		}
765 		break;
766 	}
767 	case MAC_CAPAB_ANCHOR_VNIC: {
768 		/* since it's an anchor VNIC we don't have lower mac handle */
769 		if (vnic->vn_lower_mh == NULL) {
770 			ASSERT(vnic->vn_link_id == 0);
771 			return (B_TRUE);
772 		}
773 		return (B_FALSE);
774 	}
775 	case MAC_CAPAB_NO_NATIVEVLAN:
776 	case MAC_CAPAB_NO_ZCOPY:
777 		return (B_TRUE);
778 	case MAC_CAPAB_VRRP: {
779 		mac_capab_vrrp_t *vrrp_capab = cap_data;
780 
781 		if (vnic->vn_vrid != 0) {
782 			if (vrrp_capab != NULL)
783 				vrrp_capab->mcv_af = vnic->vn_af;
784 			return (B_TRUE);
785 		}
786 		return (B_FALSE);
787 	}
788 	default:
789 		return (B_FALSE);
790 	}
791 	return (B_TRUE);
792 }
793 
794 /* ARGSUSED */
795 static int
796 vnic_m_start(void *arg)
797 {
798 	return (0);
799 }
800 
801 /* ARGSUSED */
802 static void
803 vnic_m_stop(void *arg)
804 {
805 }
806 
807 /* ARGSUSED */
808 static int
809 vnic_m_promisc(void *arg, boolean_t on)
810 {
811 	return (0);
812 }
813 
814 /* ARGSUSED */
815 static int
816 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
817 {
818 	return (0);
819 }
820 
821 static int
822 vnic_m_unicst(void *arg, const uint8_t *macaddr)
823 {
824 	vnic_t *vnic = arg;
825 
826 	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
827 }
828 
829 /*
830  * Callback functions for set/get of properties
831  */
832 /*ARGSUSED*/
833 static int
834 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
835     uint_t pr_valsize, const void *pr_val)
836 {
837 	int 		err = ENOTSUP;
838 	vnic_t		*vn = m_driver;
839 
840 	/* allow setting MTU only on an etherstub */
841 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
842 		return (err);
843 
844 	switch (pr_num) {
845 	case MAC_PROP_MTU: {
846 		uint32_t	mtu;
847 
848 		if (pr_valsize < sizeof (mtu)) {
849 			err = EINVAL;
850 			break;
851 		}
852 		bcopy(pr_val, &mtu, sizeof (mtu));
853 		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
854 			err = EINVAL;
855 			break;
856 		}
857 		err = mac_maxsdu_update(vn->vn_mh, mtu);
858 		break;
859 	}
860 	default:
861 		break;
862 	}
863 	return (err);
864 }
865 
866 /* ARGSUSED */
867 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
868     mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
869 {
870 	vnic_t		*vn = m_driver;
871 
872 	/* MTU setting allowed only on an etherstub */
873 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
874 		return;
875 
876 	switch (pr_num) {
877 	case MAC_PROP_MTU:
878 		mac_prop_info_set_range_uint32(prh,
879 		    ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
880 		break;
881 	}
882 }
883 
884 
885 int
886 vnic_info(vnic_info_t *info, cred_t *credp)
887 {
888 	vnic_t		*vnic;
889 	int		err;
890 
891 	/* Make sure that the VNIC link is visible from the caller's zone. */
892 	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
893 		return (ENOENT);
894 
895 	rw_enter(&vnic_lock, RW_WRITER);
896 
897 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
898 	    (mod_hash_val_t *)&vnic);
899 	if (err != 0) {
900 		rw_exit(&vnic_lock);
901 		return (ENOENT);
902 	}
903 
904 	info->vn_link_id = vnic->vn_link_id;
905 	info->vn_mac_addr_type = vnic->vn_addr_type;
906 	info->vn_mac_len = vnic->vn_addr_len;
907 	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
908 	info->vn_mac_slot = vnic->vn_slot_id;
909 	info->vn_mac_prefix_len = 0;
910 	info->vn_vid = vnic->vn_vid;
911 	info->vn_force = vnic->vn_force;
912 	info->vn_vrid = vnic->vn_vrid;
913 	info->vn_af = vnic->vn_af;
914 
915 	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
916 	if (vnic->vn_mch != NULL)
917 		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
918 
919 	rw_exit(&vnic_lock);
920 	return (0);
921 }
922 
923 static void
924 vnic_notify_cb(void *arg, mac_notify_type_t type)
925 {
926 	vnic_t *vnic = arg;
927 
928 	/*
929 	 * Do not deliver notifications if the vnic is not fully initialized
930 	 * or is in process of being torn down.
931 	 */
932 	if (!vnic->vn_enabled)
933 		return;
934 
935 	switch (type) {
936 	case MAC_NOTE_UNICST:
937 		/*
938 		 * Only the VLAN VNIC needs to be notified with primary MAC
939 		 * address change.
940 		 */
941 		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
942 			return;
943 
944 		/*  the unicast MAC address value */
945 		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
946 
947 		/* notify its upper layer MAC about MAC address change */
948 		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
949 		break;
950 
951 	case MAC_NOTE_LINK:
952 		mac_link_update(vnic->vn_mh,
953 		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
954 		break;
955 
956 	default:
957 		break;
958 	}
959 }
960