xref: /titanic_50/usr/src/uts/common/io/vnic/vnic_dev.c (revision 6e1fa242609208de48dfe1939b8814d4dff455a5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/conf.h>
29 #include <sys/cmn_err.h>
30 #include <sys/list.h>
31 #include <sys/ksynch.h>
32 #include <sys/kmem.h>
33 #include <sys/stream.h>
34 #include <sys/modctl.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/atomic.h>
38 #include <sys/stat.h>
39 #include <sys/modhash.h>
40 #include <sys/strsubr.h>
41 #include <sys/strsun.h>
42 #include <sys/dlpi.h>
43 #include <sys/mac.h>
44 #include <sys/mac_provider.h>
45 #include <sys/mac_client.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/mac_ether.h>
48 #include <sys/dls.h>
49 #include <sys/pattr.h>
50 #include <sys/time.h>
51 #include <sys/vlan.h>
52 #include <sys/vnic.h>
53 #include <sys/vnic_impl.h>
54 #include <sys/mac_flow_impl.h>
55 #include <inet/ip_impl.h>
56 
57 /*
58  * Note that for best performance, the VNIC is a passthrough design.
59  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
60  * This MAC client is opened by the VNIC driver at VNIC creation,
61  * and closed when the VNIC is deleted.
62  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
63  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
64  * of allocating a new MAC client, it asks the VNIC driver to return
65  * the lower MAC client handle associated with the VNIC, and that handle
66  * is returned to the upper MAC client directly. This allows access
67  * by upper MAC clients of the VNIC to have direct access to the lower
68  * MAC client for the control path and data path.
69  *
70  * Due to this passthrough, some of the entry points exported by the
71  * VNIC driver are never directly invoked. These entry points include
72  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
73  */
74 
75 static int vnic_m_start(void *);
76 static void vnic_m_stop(void *);
77 static int vnic_m_promisc(void *, boolean_t);
78 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
79 static int vnic_m_unicst(void *, const uint8_t *);
80 static int vnic_m_stat(void *, uint_t, uint64_t *);
81 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
82 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
83     const void *);
84 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
85     uint_t, void *, uint_t *);
86 static mblk_t *vnic_m_tx(void *, mblk_t *);
87 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
88 static void vnic_notify_cb(void *, mac_notify_type_t);
89 
90 static kmem_cache_t	*vnic_cache;
91 static krwlock_t	vnic_lock;
92 static uint_t		vnic_count;
93 
94 #define	ANCHOR_VNIC_MIN_MTU	576
95 #define	ANCHOR_VNIC_MAX_MTU	9000
96 
97 /* hash of VNICs (vnic_t's), keyed by VNIC id */
98 static mod_hash_t	*vnic_hash;
99 #define	VNIC_HASHSZ	64
100 #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
101 
102 #define	VNIC_M_CALLBACK_FLAGS	\
103 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
104 
105 static mac_callbacks_t vnic_m_callbacks = {
106 	VNIC_M_CALLBACK_FLAGS,
107 	vnic_m_stat,
108 	vnic_m_start,
109 	vnic_m_stop,
110 	vnic_m_promisc,
111 	vnic_m_multicst,
112 	vnic_m_unicst,
113 	vnic_m_tx,
114 	vnic_m_ioctl,
115 	vnic_m_capab_get,
116 	NULL,
117 	NULL,
118 	vnic_m_setprop,
119 	vnic_m_getprop
120 };
121 
122 void
123 vnic_dev_init(void)
124 {
125 	vnic_cache = kmem_cache_create("vnic_cache",
126 	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
127 
128 	vnic_hash = mod_hash_create_idhash("vnic_hash",
129 	    VNIC_HASHSZ, mod_hash_null_valdtor);
130 
131 	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
132 
133 	vnic_count = 0;
134 }
135 
136 void
137 vnic_dev_fini(void)
138 {
139 	ASSERT(vnic_count == 0);
140 
141 	rw_destroy(&vnic_lock);
142 	mod_hash_destroy_idhash(vnic_hash);
143 	kmem_cache_destroy(vnic_cache);
144 }
145 
146 uint_t
147 vnic_dev_count(void)
148 {
149 	return (vnic_count);
150 }
151 
152 static vnic_ioc_diag_t
153 vnic_mac2vnic_diag(mac_diag_t diag)
154 {
155 	switch (diag) {
156 	case MAC_DIAG_MACADDR_NIC:
157 		return (VNIC_IOC_DIAG_MACADDR_NIC);
158 	case MAC_DIAG_MACADDR_INUSE:
159 		return (VNIC_IOC_DIAG_MACADDR_INUSE);
160 	case MAC_DIAG_MACADDR_INVALID:
161 		return (VNIC_IOC_DIAG_MACADDR_INVALID);
162 	case MAC_DIAG_MACADDRLEN_INVALID:
163 		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
164 	case MAC_DIAG_MACFACTORYSLOTINVALID:
165 		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
166 	case MAC_DIAG_MACFACTORYSLOTUSED:
167 		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
168 	case MAC_DIAG_MACFACTORYSLOTALLUSED:
169 		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
170 	case MAC_DIAG_MACFACTORYNOTSUP:
171 		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
172 	case MAC_DIAG_MACPREFIX_INVALID:
173 		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
174 	case MAC_DIAG_MACPREFIXLEN_INVALID:
175 		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
176 	case MAC_DIAG_MACNO_HWRINGS:
177 		return (VNIC_IOC_DIAG_NO_HWRINGS);
178 	default:
179 		return (VNIC_IOC_DIAG_NONE);
180 	}
181 }
182 
183 static int
184 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
185     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
186     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
187     uint16_t vid)
188 {
189 	mac_diag_t mac_diag;
190 	uint16_t mac_flags = 0;
191 	int err;
192 	uint_t addr_len;
193 
194 	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
195 		mac_flags |= MAC_UNICAST_NODUPCHECK;
196 
197 	switch (vnic_addr_type) {
198 	case VNIC_MAC_ADDR_TYPE_FIXED:
199 		/*
200 		 * The MAC address value to assign to the VNIC
201 		 * is already provided in mac_addr_arg. addr_len_ptr_arg
202 		 * already contains the MAC address length.
203 		 */
204 		break;
205 
206 	case VNIC_MAC_ADDR_TYPE_RANDOM:
207 		/*
208 		 * Random MAC address. There are two sub-cases:
209 		 *
210 		 * 1 - If mac_len == 0, a new MAC address is generated.
211 		 *	The length of the MAC address to generated depends
212 		 *	on the type of MAC used. The prefix to use for the MAC
213 		 *	address is stored in the most significant bytes
214 		 *	of the mac_addr argument, and its length is specified
215 		 *	by the mac_prefix_len argument. This prefix can
216 		 *	correspond to a IEEE OUI in the case of Ethernet,
217 		 *	for example.
218 		 *
219 		 * 2 - If mac_len > 0, the address was already picked
220 		 *	randomly, and is now passed back during VNIC
221 		 *	re-creation. The mac_addr argument contains the MAC
222 		 *	address that was generated. We distinguish this
223 		 *	case from the fixed MAC address case, since we
224 		 *	want the user consumers to know, when they query
225 		 *	the list of VNICs, that a VNIC was assigned a
226 		 *	random MAC address vs assigned a fixed address
227 		 *	specified by the user.
228 		 */
229 
230 		/*
231 		 * If it's a pre-generated address, we're done. mac_addr_arg
232 		 * and addr_len_ptr_arg already contain the MAC address
233 		 * value and length.
234 		 */
235 		if (*addr_len_ptr_arg > 0)
236 			break;
237 
238 		/* generate a new random MAC address */
239 		if ((err = mac_addr_random(vnic->vn_mch,
240 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
241 			*diag = vnic_mac2vnic_diag(mac_diag);
242 			return (err);
243 		}
244 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
245 		break;
246 
247 	case VNIC_MAC_ADDR_TYPE_FACTORY:
248 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
249 		if (err != 0) {
250 			if (err == EINVAL)
251 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
252 			if (err == EBUSY)
253 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
254 			if (err == ENOSPC)
255 				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
256 			return (err);
257 		}
258 
259 		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
260 		    mac_addr_arg, &addr_len, NULL, NULL);
261 		*addr_len_ptr_arg = addr_len;
262 		break;
263 
264 	case VNIC_MAC_ADDR_TYPE_AUTO:
265 		/* first try to allocate a factory MAC address */
266 		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
267 		if (err == 0) {
268 			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
269 			    mac_addr_arg, &addr_len, NULL, NULL);
270 			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
271 			*addr_len_ptr_arg = addr_len;
272 			break;
273 		}
274 
275 		/*
276 		 * Allocating a factory MAC address failed, generate a
277 		 * random MAC address instead.
278 		 */
279 		if ((err = mac_addr_random(vnic->vn_mch,
280 		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
281 			*diag = vnic_mac2vnic_diag(mac_diag);
282 			return (err);
283 		}
284 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
285 		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
286 		break;
287 	case VNIC_MAC_ADDR_TYPE_PRIMARY:
288 		/*
289 		 * We get the address here since we copy it in the
290 		 * vnic's vn_addr.
291 		 */
292 		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
293 		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
294 		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
295 		break;
296 	}
297 
298 	vnic->vn_addr_type = vnic_addr_type;
299 
300 	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
301 	    &vnic->vn_muh, vid, &mac_diag);
302 	if (err != 0) {
303 		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
304 			/* release factory MAC address */
305 			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
306 		}
307 		*diag = vnic_mac2vnic_diag(mac_diag);
308 	}
309 
310 	return (err);
311 }
312 
313 /*
314  * Create a new VNIC upon request from administrator.
315  * Returns 0 on success, an errno on failure.
316  */
317 /* ARGSUSED */
318 int
319 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
320     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
321     int *mac_slot, uint_t mac_prefix_len, uint16_t vid,
322     mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag)
323 {
324 	vnic_t *vnic;
325 	mac_register_t *mac;
326 	int err;
327 	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
328 	char vnic_name[MAXNAMELEN];
329 	const mac_info_t *minfop;
330 	uint32_t req_hwgrp_flag = ((flags & VNIC_IOC_CREATE_REQ_HWRINGS) != 0) ?
331 	    MAC_OPEN_FLAGS_REQ_HWRINGS : 0;
332 
333 	*diag = VNIC_IOC_DIAG_NONE;
334 
335 	rw_enter(&vnic_lock, RW_WRITER);
336 
337 	/* does a VNIC with the same id already exist? */
338 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
339 	    (mod_hash_val_t *)&vnic);
340 	if (err == 0) {
341 		rw_exit(&vnic_lock);
342 		return (EEXIST);
343 	}
344 
345 	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
346 	if (vnic == NULL) {
347 		rw_exit(&vnic_lock);
348 		return (ENOMEM);
349 	}
350 
351 	bzero(vnic, sizeof (*vnic));
352 
353 	vnic->vn_id = vnic_id;
354 	vnic->vn_link_id = linkid;
355 
356 	if (!is_anchor) {
357 		if (linkid == DATALINK_INVALID_LINKID) {
358 			err = EINVAL;
359 			goto bail;
360 		}
361 
362 		/*
363 		 * Open the lower MAC and assign its initial bandwidth and
364 		 * MAC address. We do this here during VNIC creation and
365 		 * do not wait until the upper MAC client open so that we
366 		 * can validate the VNIC creation parameters (bandwidth,
367 		 * MAC address, etc) and reserve a factory MAC address if
368 		 * one was requested.
369 		 */
370 		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
371 		if (err != 0)
372 			goto bail;
373 
374 		/*
375 		 * VNIC(vlan) over VNICs(vlans) is not supported.
376 		 */
377 		if (mac_is_vnic(vnic->vn_lower_mh)) {
378 			err = EINVAL;
379 			goto bail;
380 		}
381 
382 		/* only ethernet support for now */
383 		minfop = mac_info(vnic->vn_lower_mh);
384 		if (minfop->mi_nativemedia != DL_ETHER) {
385 			err = ENOTSUP;
386 			goto bail;
387 		}
388 
389 		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
390 		    NULL);
391 		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
392 		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC | req_hwgrp_flag);
393 		if (err != 0)
394 			goto bail;
395 
396 		if (mrp != NULL) {
397 			err = mac_client_set_resources(vnic->vn_mch, mrp);
398 			if (err != 0)
399 				goto bail;
400 		}
401 		/* assign a MAC address to the VNIC */
402 
403 		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
404 		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid);
405 		if (err != 0) {
406 			vnic->vn_muh = NULL;
407 			if (diag != NULL && req_hwgrp_flag != 0)
408 				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
409 			goto bail;
410 		}
411 
412 		/* register to receive notification from underlying MAC */
413 		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
414 		    vnic);
415 
416 		*vnic_addr_type = vnic->vn_addr_type;
417 		vnic->vn_addr_len = *mac_len;
418 		vnic->vn_vid = vid;
419 
420 		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
421 
422 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
423 			vnic->vn_slot_id = *mac_slot;
424 
425 		/* set the initial VNIC capabilities */
426 		if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
427 		    &vnic->vn_hcksum_txflags))
428 			vnic->vn_hcksum_txflags = 0;
429 	}
430 
431 	/* register with the MAC module */
432 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
433 		goto bail;
434 
435 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
436 	mac->m_driver = vnic;
437 	mac->m_dip = vnic_get_dip();
438 	mac->m_instance = (uint_t)-1;
439 	mac->m_src_addr = vnic->vn_addr;
440 	mac->m_callbacks = &vnic_m_callbacks;
441 
442 	if (!is_anchor) {
443 		/*
444 		 * If this is a VNIC based VLAN, then we check for the
445 		 * margin unless it has been created with the force
446 		 * flag. If we are configuring a VLAN over an etherstub,
447 		 * we don't check the margin even if force is not set.
448 		 */
449 		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
450 			if (vid != VLAN_ID_NONE)
451 				vnic->vn_force = B_TRUE;
452 			/*
453 			 * As the current margin size of the underlying mac is
454 			 * used to determine the margin size of the VNIC
455 			 * itself, request the underlying mac not to change
456 			 * to a smaller margin size.
457 			 */
458 			err = mac_margin_add(vnic->vn_lower_mh,
459 			    &vnic->vn_margin, B_TRUE);
460 			ASSERT(err == 0);
461 		} else {
462 			vnic->vn_margin = VLAN_TAGSZ;
463 			err = mac_margin_add(vnic->vn_lower_mh,
464 			    &vnic->vn_margin, B_FALSE);
465 			if (err != 0) {
466 				mac_free(mac);
467 				if (diag != NULL)
468 					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
469 				goto bail;
470 			}
471 		}
472 
473 		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
474 		    &mac->m_max_sdu);
475 	} else {
476 		vnic->vn_margin = VLAN_TAGSZ;
477 		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
478 		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
479 	}
480 
481 	mac->m_margin = vnic->vn_margin;
482 
483 	err = mac_register(mac, &vnic->vn_mh);
484 	mac_free(mac);
485 	if (err != 0) {
486 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
487 		    vnic->vn_margin) == 0);
488 		goto bail;
489 	}
490 
491 	/* Set the VNIC's MAC in the client */
492 	if (!is_anchor)
493 		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh);
494 
495 	if ((err = dls_devnet_create(vnic->vn_mh, vnic->vn_id)) != 0) {
496 		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
497 		    vnic->vn_margin) == 0);
498 		(void) mac_unregister(vnic->vn_mh);
499 		goto bail;
500 	}
501 
502 	/* add new VNIC to hash table */
503 	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
504 	    (mod_hash_val_t)vnic);
505 	ASSERT(err == 0);
506 	vnic_count++;
507 
508 	vnic->vn_enabled = B_TRUE;
509 	rw_exit(&vnic_lock);
510 
511 	return (0);
512 
513 bail:
514 	rw_exit(&vnic_lock);
515 	if (!is_anchor) {
516 		if (vnic->vn_mnh != NULL)
517 			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
518 		if (vnic->vn_muh != NULL)
519 			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
520 		if (vnic->vn_mch != NULL)
521 			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
522 		if (vnic->vn_lower_mh != NULL)
523 			mac_close(vnic->vn_lower_mh);
524 	}
525 
526 	kmem_cache_free(vnic_cache, vnic);
527 	return (err);
528 }
529 
530 /*
531  * Modify the properties of an existing VNIC.
532  */
533 /* ARGSUSED */
534 int
535 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
536     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
537     uint_t mac_slot, mac_resource_props_t *mrp)
538 {
539 	vnic_t *vnic = NULL;
540 
541 	rw_enter(&vnic_lock, RW_WRITER);
542 
543 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
544 	    (mod_hash_val_t *)&vnic) != 0) {
545 		rw_exit(&vnic_lock);
546 		return (ENOENT);
547 	}
548 
549 	rw_exit(&vnic_lock);
550 
551 	return (0);
552 }
553 
554 /* ARGSUSED */
555 int
556 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags)
557 {
558 	vnic_t *vnic = NULL;
559 	mod_hash_val_t val;
560 	datalink_id_t tmpid;
561 	int rc;
562 
563 	rw_enter(&vnic_lock, RW_WRITER);
564 
565 	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
566 	    (mod_hash_val_t *)&vnic) != 0) {
567 		rw_exit(&vnic_lock);
568 		return (ENOENT);
569 	}
570 
571 	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
572 		rw_exit(&vnic_lock);
573 		return (rc);
574 	}
575 
576 	ASSERT(vnic_id == tmpid);
577 
578 	/*
579 	 * We cannot unregister the MAC yet. Unregistering would
580 	 * free up mac_impl_t which should not happen at this time.
581 	 * So disable mac_impl_t by calling mac_disable(). This will prevent
582 	 * any new claims on mac_impl_t.
583 	 */
584 	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
585 		(void) dls_devnet_create(vnic->vn_mh, vnic_id);
586 		rw_exit(&vnic_lock);
587 		return (rc);
588 	}
589 
590 	vnic->vn_enabled = B_FALSE;
591 	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
592 	ASSERT(vnic == (vnic_t *)val);
593 	vnic_count--;
594 	rw_exit(&vnic_lock);
595 
596 	/*
597 	 * XXX-nicolas shouldn't have a void cast here, if it's
598 	 * expected that the function will never fail, then we should
599 	 * have an ASSERT().
600 	 */
601 	(void) mac_unregister(vnic->vn_mh);
602 
603 	if (vnic->vn_lower_mh != NULL) {
604 		/*
605 		 * Check if MAC address for the vnic was obtained from the
606 		 * factory MAC addresses. If yes, release it.
607 		 */
608 		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
609 			(void) mac_addr_factory_release(vnic->vn_mch,
610 			    vnic->vn_slot_id);
611 		}
612 		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
613 		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
614 		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
615 		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
616 		mac_close(vnic->vn_lower_mh);
617 	}
618 
619 	kmem_cache_free(vnic_cache, vnic);
620 	return (0);
621 }
622 
623 /* ARGSUSED */
624 mblk_t *
625 vnic_m_tx(void *arg, mblk_t *mp_chain)
626 {
627 	/*
628 	 * This function could be invoked for an anchor VNIC when sending
629 	 * broadcast and multicast packets, and unicast packets which did
630 	 * not match any local known destination.
631 	 */
632 	freemsgchain(mp_chain);
633 	return (NULL);
634 }
635 
636 /*ARGSUSED*/
637 static void
638 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
639 {
640 	miocnak(q, mp, 0, ENOTSUP);
641 }
642 
643 /*
644  * This entry point cannot be passed-through, since it is invoked
645  * for the per-VNIC kstats which must be exported independently
646  * of the existence of VNIC MAC clients.
647  */
648 static int
649 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
650 {
651 	vnic_t *vnic = arg;
652 	int rval = 0;
653 
654 	if (vnic->vn_lower_mh == NULL) {
655 		/*
656 		 * It's an anchor VNIC, which does not have any
657 		 * statistics in itself.
658 		 */
659 		return (ENOTSUP);
660 	}
661 
662 	/*
663 	 * ENOTSUP must be reported for unsupported stats, the VNIC
664 	 * driver reports a subset of the stats that would
665 	 * be returned by a real piece of hardware.
666 	 */
667 
668 	switch (stat) {
669 	case MAC_STAT_LINK_STATE:
670 	case MAC_STAT_LINK_UP:
671 	case MAC_STAT_PROMISC:
672 	case MAC_STAT_IFSPEED:
673 	case MAC_STAT_MULTIRCV:
674 	case MAC_STAT_MULTIXMT:
675 	case MAC_STAT_BRDCSTRCV:
676 	case MAC_STAT_BRDCSTXMT:
677 	case MAC_STAT_OPACKETS:
678 	case MAC_STAT_OBYTES:
679 	case MAC_STAT_IERRORS:
680 	case MAC_STAT_OERRORS:
681 	case MAC_STAT_RBYTES:
682 	case MAC_STAT_IPACKETS:
683 		*val = mac_client_stat_get(vnic->vn_mch, stat);
684 		break;
685 	default:
686 		rval = ENOTSUP;
687 	}
688 
689 	return (rval);
690 }
691 
692 /*
693  * Invoked by the upper MAC to retrieve the lower MAC client handle
694  * corresponding to a VNIC. A pointer to this function is obtained
695  * by the upper MAC via capability query.
696  *
697  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
698  * receive the same MAC client handle for the same VNIC. This is ok
699  * as long as we have only one VNIC MAC client which sends and
700  * receives data, but we don't currently enforce this at the MAC layer.
701  */
702 static void *
703 vnic_mac_client_handle(void *vnic_arg)
704 {
705 	vnic_t *vnic = vnic_arg;
706 
707 	return (vnic->vn_mch);
708 }
709 
710 
711 /*
712  * Return information about the specified capability.
713  */
714 /* ARGSUSED */
715 static boolean_t
716 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
717 {
718 	vnic_t *vnic = arg;
719 
720 	switch (cap) {
721 	case MAC_CAPAB_HCKSUM: {
722 		uint32_t *hcksum_txflags = cap_data;
723 
724 		*hcksum_txflags = vnic->vn_hcksum_txflags &
725 		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
726 		    HCKSUM_INET_PARTIAL);
727 		break;
728 	}
729 	case MAC_CAPAB_VNIC: {
730 		mac_capab_vnic_t *vnic_capab = cap_data;
731 
732 		if (vnic->vn_lower_mh == NULL) {
733 			/*
734 			 * It's an anchor VNIC, we don't have an underlying
735 			 * NIC and MAC client handle.
736 			 */
737 			return (B_FALSE);
738 		}
739 
740 		if (vnic_capab != NULL) {
741 			vnic_capab->mcv_arg = vnic;
742 			vnic_capab->mcv_mac_client_handle =
743 			    vnic_mac_client_handle;
744 		}
745 		break;
746 	}
747 	case MAC_CAPAB_ANCHOR_VNIC: {
748 		/* since it's an anchor VNIC we don't have lower mac handle */
749 		if (vnic->vn_lower_mh == NULL) {
750 			ASSERT(vnic->vn_link_id == 0);
751 			return (B_TRUE);
752 		}
753 		return (B_FALSE);
754 	}
755 	case MAC_CAPAB_NO_NATIVEVLAN:
756 	case MAC_CAPAB_NO_ZCOPY:
757 		return (B_TRUE);
758 	default:
759 		return (B_FALSE);
760 	}
761 	return (B_TRUE);
762 }
763 
764 /* ARGSUSED */
765 static int
766 vnic_m_start(void *arg)
767 {
768 	return (0);
769 }
770 
771 /* ARGSUSED */
772 static void
773 vnic_m_stop(void *arg)
774 {
775 }
776 
777 /* ARGSUSED */
778 static int
779 vnic_m_promisc(void *arg, boolean_t on)
780 {
781 	return (0);
782 }
783 
784 /* ARGSUSED */
785 static int
786 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
787 {
788 	return (0);
789 }
790 
791 static int
792 vnic_m_unicst(void *arg, const uint8_t *macaddr)
793 {
794 	vnic_t *vnic = arg;
795 
796 	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
797 }
798 
799 /*
800  * Callback functions for set/get of properties
801  */
802 /*ARGSUSED*/
803 static int
804 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
805     uint_t pr_valsize, const void *pr_val)
806 {
807 	int 		err = ENOTSUP;
808 	vnic_t		*vn = m_driver;
809 
810 	/* allow setting MTU only on an etherstub */
811 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
812 		return (err);
813 
814 	switch (pr_num) {
815 	case MAC_PROP_MTU: {
816 		uint32_t	mtu;
817 
818 		if (pr_valsize < sizeof (mtu)) {
819 			err = EINVAL;
820 			break;
821 		}
822 		bcopy(pr_val, &mtu, sizeof (mtu));
823 		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
824 			err = EINVAL;
825 			break;
826 		}
827 		err = mac_maxsdu_update(vn->vn_mh, mtu);
828 		break;
829 	}
830 	default:
831 		break;
832 	}
833 	return (err);
834 }
835 
836 /*ARGSUSED*/
837 static int
838 vnic_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
839     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
840 {
841 	mac_propval_range_t 	range;
842 	vnic_t			*vn = m_driver;
843 	int 			err = ENOTSUP;
844 
845 	/* MTU setting allowed only on an etherstub */
846 	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
847 		return (err);
848 
849 	switch (pr_num) {
850 	case MAC_PROP_MTU:
851 		if (!(pr_flags & MAC_PROP_POSSIBLE))
852 			return (ENOTSUP);
853 		if (pr_valsize < sizeof (mac_propval_range_t))
854 			return (EINVAL);
855 		range.mpr_count = 1;
856 		range.mpr_type = MAC_PROPVAL_UINT32;
857 		range.range_uint32[0].mpur_min = ANCHOR_VNIC_MIN_MTU;
858 		range.range_uint32[0].mpur_max = ANCHOR_VNIC_MAX_MTU;
859 		bcopy(&range, pr_val, sizeof (range));
860 		return (0);
861 	default:
862 		break;
863 	}
864 
865 	return (err);
866 }
867 
868 int
869 vnic_info(vnic_info_t *info)
870 {
871 	vnic_t		*vnic;
872 	int		err;
873 
874 	rw_enter(&vnic_lock, RW_WRITER);
875 
876 	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
877 	    (mod_hash_val_t *)&vnic);
878 	if (err != 0) {
879 		rw_exit(&vnic_lock);
880 		return (ENOENT);
881 	}
882 
883 	info->vn_link_id = vnic->vn_link_id;
884 	info->vn_mac_addr_type = vnic->vn_addr_type;
885 	info->vn_mac_len = vnic->vn_addr_len;
886 	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
887 	info->vn_mac_slot = vnic->vn_slot_id;
888 	info->vn_mac_prefix_len = 0;
889 	info->vn_vid = vnic->vn_vid;
890 	info->vn_force = vnic->vn_force;
891 
892 	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
893 	if (vnic->vn_mch != NULL)
894 		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
895 
896 	rw_exit(&vnic_lock);
897 	return (0);
898 }
899 
900 static void
901 vnic_notify_cb(void *arg, mac_notify_type_t type)
902 {
903 	vnic_t *vnic = arg;
904 
905 	/*
906 	 * Do not deliver notifications if the vnic is not fully initialized
907 	 * or is in process of being torn down.
908 	 */
909 	if (!vnic->vn_enabled)
910 		return;
911 
912 	switch (type) {
913 	case MAC_NOTE_UNICST:
914 		/*
915 		 * Only the VLAN VNIC needs to be notified with primary MAC
916 		 * address change.
917 		 */
918 		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
919 			return;
920 
921 		/*  the unicast MAC address value */
922 		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
923 
924 		/* notify its upper layer MAC about MAC address change */
925 		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
926 		break;
927 
928 	case MAC_NOTE_LINK:
929 		mac_link_update(vnic->vn_mh,
930 		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
931 		break;
932 
933 	default:
934 		break;
935 	}
936 }
937