xref: /titanic_50/usr/src/uts/common/io/vnic/vnic_dev.c (revision 551bc2a66868b5cb5be6b70ab9f55515e77a39a9)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  #pragma ident	"%Z%%M%	%I%	%E% SMI"
27  
28  #include <sys/types.h>
29  #include <sys/sysmacros.h>
30  #include <sys/conf.h>
31  #include <sys/cmn_err.h>
32  #include <sys/list.h>
33  #include <sys/ksynch.h>
34  #include <sys/kmem.h>
35  #include <sys/stream.h>
36  #include <sys/modctl.h>
37  #include <sys/ddi.h>
38  #include <sys/sunddi.h>
39  #include <sys/atomic.h>
40  #include <sys/stat.h>
41  #include <sys/modhash.h>
42  #include <sys/strsubr.h>
43  #include <sys/strsun.h>
44  #include <sys/dlpi.h>
45  #include <sys/mac.h>
46  #include <sys/mac_ether.h>
47  #include <sys/pattr.h>
48  #if 0
49  #include <sys/vlan.h>
50  #endif
51  #include <sys/vnic.h>
52  #include <sys/vnic_impl.h>
53  #include <sys/gld.h>
54  #include <inet/ip.h>
55  #include <inet/ip_impl.h>
56  
57  static int vnic_m_start(void *);
58  static void vnic_m_stop(void *);
59  static int vnic_m_promisc(void *, boolean_t);
60  static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
61  static int vnic_m_unicst(void *, const uint8_t *);
62  static int vnic_m_stat(void *, uint_t, uint64_t *);
63  static void vnic_m_resources(void *);
64  static mblk_t *vnic_m_tx(void *, mblk_t *);
65  static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
66  static void vnic_mac_free(vnic_mac_t *);
67  static uint_t vnic_info_walker(mod_hash_key_t, mod_hash_val_t *, void *);
68  static void vnic_notify_cb(void *, mac_notify_type_t);
69  static int vnic_modify_mac_addr(vnic_t *, uint_t, uchar_t *);
70  static mblk_t *vnic_active_tx(void *, mblk_t *);
71  static int vnic_promisc_set(vnic_t *, boolean_t);
72  
73  static kmem_cache_t	*vnic_cache;
74  static kmem_cache_t	*vnic_mac_cache;
75  static krwlock_t	vnic_lock;
76  static kmutex_t		vnic_mac_lock;
77  static uint_t		vnic_count;
78  
79  /* hash of VNICs (vnic_t's), keyed by VNIC id */
80  static mod_hash_t	*vnic_hash;
81  #define	VNIC_HASHSZ	64
82  #define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
83  
84  /*
85   * Hash of underlying open MACs (vnic_mac_t's), keyed by the string
86   * "<device name><instance number>/<port number>".
87   */
88  static mod_hash_t	*vnic_mac_hash;
89  #define	VNIC_MAC_HASHSZ	64
90  
91  #define	VNIC_MAC_REFHOLD(va) {			\
92  	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
93  	(va)->va_refs++;			\
94  	ASSERT((va)->va_refs != 0);		\
95  }
96  
97  #define	VNIC_MAC_REFRELE(va) {			\
98  	ASSERT(MUTEX_HELD(&vnic_mac_lock));	\
99  	ASSERT((va)->va_refs != 0);		\
100  	if (--((va)->va_refs) == 0)		\
101  		vnic_mac_free(va);		\
102  }
103  
104  static uchar_t vnic_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
105  
106  /* used by vnic_walker */
107  typedef struct vnic_info_state {
108  	uint32_t	vs_vnic_id;
109  	char		vs_dev_name[MAXNAMELEN];
110  	boolean_t	vs_vnic_found;
111  	vnic_info_new_vnic_fn_t	vs_new_vnic_fn;
112  	void		*vs_fn_arg;
113  	int		vs_rc;
114  } vnic_info_state_t;
115  
116  #define	VNIC_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_GETCAPAB)
117  
118  static mac_callbacks_t vnic_m_callbacks = {
119  	VNIC_M_CALLBACK_FLAGS,
120  	vnic_m_stat,
121  	vnic_m_start,
122  	vnic_m_stop,
123  	vnic_m_promisc,
124  	vnic_m_multicst,
125  	vnic_m_unicst,
126  	vnic_m_tx,
127  	vnic_m_resources,
128  	NULL,			/* m_ioctl */
129  	vnic_m_capab_get
130  };
131  
132  /* ARGSUSED */
133  static int
134  vnic_mac_ctor(void *buf, void *arg, int kmflag)
135  {
136  	vnic_mac_t *vnic_mac = buf;
137  
138  	bzero(vnic_mac, sizeof (vnic_mac_t));
139  	rw_init(&vnic_mac->va_bcast_grp_lock, NULL, RW_DRIVER, NULL);
140  	rw_init(&vnic_mac->va_promisc_lock, NULL, RW_DRIVER, NULL);
141  
142  	return (0);
143  }
144  
145  /* ARGSUSED */
146  static void
147  vnic_mac_dtor(void *buf, void *arg)
148  {
149  	vnic_mac_t *vnic_mac = buf;
150  
151  	rw_destroy(&vnic_mac->va_promisc_lock);
152  	rw_destroy(&vnic_mac->va_bcast_grp_lock);
153  }
154  
155  void
156  vnic_dev_init(void)
157  {
158  	vnic_cache = kmem_cache_create("vnic_cache",
159  	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
160  
161  	vnic_mac_cache = kmem_cache_create("vnic_mac_cache",
162  	    sizeof (vnic_mac_t), 0, vnic_mac_ctor, vnic_mac_dtor,
163  	    NULL, NULL, NULL, 0);
164  
165  	vnic_hash = mod_hash_create_idhash("vnic_hash",
166  	    VNIC_HASHSZ, mod_hash_null_valdtor);
167  
168  	vnic_mac_hash = mod_hash_create_strhash("vnic_mac_hash",
169  	    VNIC_MAC_HASHSZ, mod_hash_null_valdtor);
170  
171  	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
172  
173  	mutex_init(&vnic_mac_lock, NULL, MUTEX_DEFAULT, NULL);
174  
175  	vnic_count = 0;
176  }
177  
178  void
179  vnic_dev_fini(void)
180  {
181  	ASSERT(vnic_count == 0);
182  
183  	mutex_destroy(&vnic_mac_lock);
184  	rw_destroy(&vnic_lock);
185  	mod_hash_destroy_strhash(vnic_mac_hash);
186  	mod_hash_destroy_idhash(vnic_hash);
187  	kmem_cache_destroy(vnic_mac_cache);
188  	kmem_cache_destroy(vnic_cache);
189  }
190  
191  uint_t
192  vnic_dev_count(void)
193  {
194  	return (vnic_count);
195  }
196  
197  static int
198  vnic_mac_open(const char *dev_name, vnic_mac_t **vmp)
199  {
200  	char *str_key;
201  	int err;
202  	vnic_mac_t *vnic_mac = NULL;
203  	const mac_info_t *mip;
204  
205  	*vmp = NULL;
206  
207  	mutex_enter(&vnic_mac_lock);
208  
209  	err = mod_hash_find(vnic_mac_hash, (mod_hash_key_t)dev_name,
210  	    (mod_hash_val_t *)&vnic_mac);
211  	if (err == 0) {
212  		/* this MAC is already opened, increment reference count */
213  		VNIC_MAC_REFHOLD(vnic_mac);
214  		mutex_exit(&vnic_mac_lock);
215  		*vmp = vnic_mac;
216  		return (0);
217  	}
218  
219  	vnic_mac = kmem_cache_alloc(vnic_mac_cache, KM_SLEEP);
220  
221  	if ((err = mac_open(dev_name, &vnic_mac->va_mh)) != 0) {
222  		vnic_mac->va_mh = NULL;
223  		goto bail;
224  	}
225  
226  	/* only ethernet support, for now */
227  	mip = mac_info(vnic_mac->va_mh);
228  	if (mip->mi_media != DL_ETHER) {
229  		err = ENOTSUP;
230  		goto bail;
231  	}
232  	if (mip->mi_media != mip->mi_nativemedia) {
233  		err = ENOTSUP;
234  		goto bail;
235  	}
236  
237  	(void) strcpy(vnic_mac->va_dev_name, dev_name);
238  
239  	/* add entry to hash table */
240  	str_key = kmem_alloc(strlen(dev_name) + 1, KM_SLEEP);
241  	(void) strcpy(str_key, dev_name);
242  	err = mod_hash_insert(vnic_mac_hash, (mod_hash_key_t)str_key,
243  	    (mod_hash_val_t)vnic_mac);
244  	ASSERT(err == 0);
245  
246  	/* initialize the flow table associated with lower MAC */
247  	vnic_mac->va_addr_len = ETHERADDRL;
248  	(void) vnic_classifier_flow_tab_init(vnic_mac, vnic_mac->va_addr_len,
249  	    KM_SLEEP);
250  
251  	vnic_mac->va_txinfo = mac_vnic_tx_get(vnic_mac->va_mh);
252  	vnic_mac->va_notify_hdl = mac_notify_add(vnic_mac->va_mh,
253  	    vnic_notify_cb, vnic_mac);
254  
255  	VNIC_MAC_REFHOLD(vnic_mac);
256  	*vmp = vnic_mac;
257  	mutex_exit(&vnic_mac_lock);
258  	return (0);
259  
260  bail:
261  	if (vnic_mac != NULL) {
262  		if (vnic_mac->va_mh != NULL)
263  			mac_close(vnic_mac->va_mh);
264  		kmem_cache_free(vnic_mac_cache, vnic_mac);
265  	}
266  	mutex_exit(&vnic_mac_lock);
267  	return (err);
268  }
269  
270  /*
271   * Create a new flow for the active MAC client sharing the NIC
272   * with the VNICs. This allows the unicast packets for that NIC
273   * to be classified and passed up to the active MAC client. It
274   * also allows packets sent from a VNIC to the active link to
275   * be classified by the VNIC transmit function and delivered via
276   * the MAC module locally. Returns B_TRUE on success, B_FALSE on
277   * failure.
278   */
279  static int
280  vnic_init_active_rx(vnic_mac_t *vnic_mac)
281  {
282  	uchar_t nic_mac_addr[MAXMACADDRLEN];
283  
284  	if (vnic_mac->va_active_flow != NULL)
285  		return (B_TRUE);
286  
287  	mac_unicst_get(vnic_mac->va_mh, nic_mac_addr);
288  
289  	vnic_mac->va_active_flow = vnic_classifier_flow_create(
290  	    vnic_mac->va_addr_len, nic_mac_addr, NULL, B_TRUE, KM_SLEEP);
291  
292  	vnic_classifier_flow_add(vnic_mac, vnic_mac->va_active_flow,
293  	    (vnic_rx_fn_t)mac_active_rx, vnic_mac->va_mh, NULL);
294  	return (B_TRUE);
295  }
296  
297  static void
298  vnic_fini_active_rx(vnic_mac_t *vnic_mac)
299  {
300  	if (vnic_mac->va_active_flow == NULL)
301  		return;
302  
303  	vnic_classifier_flow_remove(vnic_mac, vnic_mac->va_active_flow);
304  	vnic_classifier_flow_destroy(vnic_mac->va_active_flow);
305  	vnic_mac->va_active_flow = NULL;
306  }
307  
308  static void
309  vnic_update_active_rx(vnic_mac_t *vnic_mac)
310  {
311  	if (vnic_mac->va_active_flow == NULL)
312  		return;
313  
314  	vnic_fini_active_rx(vnic_mac);
315  	(void) vnic_init_active_rx(vnic_mac);
316  }
317  
318  /*
319   * Copy an mblk, preserving its hardware checksum flags.
320   */
321  mblk_t *
322  vnic_copymsg_cksum(mblk_t *mp)
323  {
324  	mblk_t *mp1;
325  	uint32_t start, stuff, end, value, flags;
326  
327  	mp1 = copymsg(mp);
328  	if (mp1 == NULL)
329  		return (NULL);
330  
331  	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags);
332  	(void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value,
333  	    flags, KM_NOSLEEP);
334  
335  	return (mp1);
336  }
337  
338  /*
339   * Copy an mblk chain, presenting the hardware checksum flags of the
340   * individual mblks.
341   */
342  mblk_t *
343  vnic_copymsgchain_cksum(mblk_t *mp)
344  {
345  	mblk_t *nmp = NULL;
346  	mblk_t **nmpp = &nmp;
347  
348  	for (; mp != NULL; mp = mp->b_next) {
349  		if ((*nmpp = vnic_copymsg_cksum(mp)) == NULL) {
350  			freemsgchain(nmp);
351  			return (NULL);
352  		}
353  
354  		nmpp = &((*nmpp)->b_next);
355  	}
356  
357  	return (nmp);
358  }
359  
360  
361  /*
362   * Process the specified mblk chain for proper handling of hardware
363   * checksum offload. This routine is invoked for loopback VNIC traffic.
364   * The function handles a NULL mblk chain passed as argument.
365   */
366  mblk_t *
367  vnic_fix_cksum(mblk_t *mp_chain)
368  {
369  	mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1;
370  	uint32_t flags, start, stuff, end, value;
371  
372  	for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) {
373  		uint16_t len;
374  		uint32_t offset;
375  		struct ether_header *ehp;
376  		uint16_t sap;
377  
378  		hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value,
379  		    &flags);
380  		if (flags == 0)
381  			continue;
382  
383  		/*
384  		 * Since the processing of checksum offload for loopback
385  		 * traffic requires modification of the packet contents,
386  		 * ensure sure that we are always modifying our own copy.
387  		 */
388  		if (DB_REF(mp) > 1) {
389  			mp1 = copymsg(mp);
390  			if (mp1 == NULL)
391  				continue;
392  			mp1->b_next = mp->b_next;
393  			mp->b_next = NULL;
394  			freemsg(mp);
395  			if (prev != NULL)
396  				prev->b_next = mp1;
397  			else
398  				new_chain = mp1;
399  			mp = mp1;
400  		}
401  
402  		/*
403  		 * Ethernet, and optionally VLAN header.
404  		 */
405  		/*LINTED*/
406  		ehp = (struct ether_header *)mp->b_rptr;
407  		if (ntohs(ehp->ether_type) == VLAN_TPID) {
408  			struct ether_vlan_header *evhp;
409  
410  			ASSERT(MBLKL(mp) >=
411  			    sizeof (struct ether_vlan_header));
412  			/*LINTED*/
413  			evhp = (struct ether_vlan_header *)mp->b_rptr;
414  			sap = ntohs(evhp->ether_type);
415  			offset = sizeof (struct ether_vlan_header);
416  		} else {
417  			sap = ntohs(ehp->ether_type);
418  			offset = sizeof (struct ether_header);
419  		}
420  
421  		if (MBLKL(mp) <= offset) {
422  			offset -= MBLKL(mp);
423  			if (mp->b_cont == NULL) {
424  				/* corrupted packet, skip it */
425  				if (prev != NULL)
426  					prev->b_next = mp->b_next;
427  				else
428  					new_chain = mp->b_next;
429  				mp1 = mp->b_next;
430  				mp->b_next = NULL;
431  				freemsg(mp);
432  				mp = mp1;
433  				continue;
434  			}
435  			mp = mp->b_cont;
436  		}
437  
438  		if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) {
439  			ipha_t *ipha = NULL;
440  
441  			/*
442  			 * In order to compute the full and header
443  			 * checksums, we need to find and parse
444  			 * the IP and/or ULP headers.
445  			 */
446  
447  			sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
448  
449  			/*
450  			 * IP header.
451  			 */
452  			if (sap != ETHERTYPE_IP)
453  				continue;
454  
455  			ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t));
456  			/*LINTED*/
457  			ipha = (ipha_t *)(mp->b_rptr + offset);
458  
459  			if (flags & HCK_FULLCKSUM) {
460  				ipaddr_t src, dst;
461  				uint32_t cksum;
462  				uint16_t *up;
463  				uint8_t proto;
464  
465  				/*
466  				 * Pointer to checksum field in ULP header.
467  				 */
468  				proto = ipha->ipha_protocol;
469  				ASSERT(ipha->ipha_version_and_hdr_length ==
470  				    IP_SIMPLE_HDR_VERSION);
471  				if (proto == IPPROTO_TCP) {
472  					/*LINTED*/
473  					up = IPH_TCPH_CHECKSUMP(ipha,
474  					    IP_SIMPLE_HDR_LENGTH);
475  				} else {
476  					ASSERT(proto == IPPROTO_UDP);
477  					/*LINTED*/
478  					up = IPH_UDPH_CHECKSUMP(ipha,
479  					    IP_SIMPLE_HDR_LENGTH);
480  				}
481  
482  				/*
483  				 * Pseudo-header checksum.
484  				 */
485  				src = ipha->ipha_src;
486  				dst = ipha->ipha_dst;
487  				len = ntohs(ipha->ipha_length) -
488  				    IP_SIMPLE_HDR_LENGTH;
489  
490  				cksum = (dst >> 16) + (dst & 0xFFFF) +
491  				    (src >> 16) + (src & 0xFFFF);
492  				cksum += htons(len);
493  
494  				/*
495  				 * The checksum value stored in the packet needs
496  				 * to be correct. Compute it here.
497  				 */
498  				*up = 0;
499  				cksum += (((proto) == IPPROTO_UDP) ?
500  				    IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP);
501  				cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH +
502  				    offset, cksum);
503  				*(up) = (uint16_t)(cksum ? cksum : ~cksum);
504  
505  				flags |= HCK_FULLCKSUM_OK;
506  				value = 0xffff;
507  			}
508  
509  			if (flags & HCK_IPV4_HDRCKSUM) {
510  				ASSERT(ipha != NULL);
511  				ipha->ipha_hdr_checksum =
512  				    (uint16_t)ip_csum_hdr(ipha);
513  			}
514  		}
515  
516  		if (flags & HCK_PARTIALCKSUM) {
517  			uint16_t *up, partial, cksum;
518  			uchar_t *ipp; /* ptr to beginning of IP header */
519  
520  			if (mp->b_cont != NULL) {
521  				mblk_t *mp1;
522  
523  				mp1 = msgpullup(mp, offset + end);
524  				if (mp1 == NULL)
525  					continue;
526  				mp1->b_next = mp->b_next;
527  				mp->b_next = NULL;
528  				freemsg(mp);
529  				if (prev != NULL)
530  					prev->b_next = mp1;
531  				else
532  					new_chain = mp1;
533  				mp = mp1;
534  			}
535  
536  			ipp = mp->b_rptr + offset;
537  			/*LINTED*/
538  			up = (uint16_t *)((uchar_t *)ipp + stuff);
539  			partial = *up;
540  			*up = 0;
541  
542  			cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start,
543  			    end - start, partial);
544  			cksum = ~cksum;
545  			*up = cksum ? cksum : ~cksum;
546  
547  			/*
548  			 * Since we already computed the whole checksum,
549  			 * indicate to the stack that it has already
550  			 * been verified by the hardware.
551  			 */
552  			flags &= ~HCK_PARTIALCKSUM;
553  			flags |= (HCK_FULLCKSUM | HCK_FULLCKSUM_OK);
554  			value = 0xffff;
555  		}
556  
557  		(void) hcksum_assoc(mp, NULL, NULL, start, stuff, end,
558  		    value, flags, KM_NOSLEEP);
559  	}
560  
561  	return (new_chain);
562  }
563  
564  static void
565  vnic_mac_close(vnic_mac_t *vnic_mac)
566  {
567  	mutex_enter(&vnic_mac_lock);
568  	VNIC_MAC_REFRELE(vnic_mac);
569  	mutex_exit(&vnic_mac_lock);
570  }
571  
572  static void
573  vnic_mac_free(vnic_mac_t *vnic_mac)
574  {
575  	mod_hash_val_t val;
576  
577  	ASSERT(MUTEX_HELD(&vnic_mac_lock));
578  	vnic_fini_active_rx(vnic_mac);
579  	mac_notify_remove(vnic_mac->va_mh, vnic_mac->va_notify_hdl);
580  	if (vnic_mac->va_mac_set) {
581  		vnic_mac->va_mac_set = B_FALSE;
582  		mac_vnic_clear(vnic_mac->va_mh);
583  	}
584  	vnic_classifier_flow_tab_fini(vnic_mac);
585  	mac_close(vnic_mac->va_mh);
586  
587  	(void) mod_hash_remove(vnic_mac_hash,
588  	    (mod_hash_key_t)vnic_mac->va_dev_name, &val);
589  	ASSERT(vnic_mac == (vnic_mac_t *)val);
590  
591  	kmem_cache_free(vnic_mac_cache, vnic_mac);
592  }
593  
594  /*
595   * Initial VNIC receive routine. Invoked for packets that are steered
596   * to a VNIC but the VNIC has not been started yet.
597   */
598  /* ARGSUSED */
599  static void
600  vnic_rx_initial(void *arg1, void *arg2, mblk_t *mp_chain)
601  {
602  	vnic_t *vnic = arg1;
603  	mblk_t *mp;
604  
605  	/* update stats */
606  	for (mp = mp_chain; mp != NULL; mp = mp->b_next)
607  		vnic->vn_stat_ierrors++;
608  	freemsgchain(mp_chain);
609  }
610  
611  /*
612   * VNIC receive routine invoked after the classifier for the VNIC
613   * has been initialized and the VNIC has been started.
614   */
615  /* ARGSUSED */
616  void
617  vnic_rx(void *arg1, void *arg2, mblk_t *mp_chain)
618  {
619  	vnic_t *vnic = arg1;
620  	mblk_t *mp;
621  
622  	/* update stats */
623  	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
624  		vnic->vn_stat_ipackets++;
625  		vnic->vn_stat_rbytes += msgdsize(mp);
626  	}
627  
628  	/* pass packet up */
629  	mac_rx(vnic->vn_mh, NULL, mp_chain);
630  }
631  
632  /*
633   * Routine to create a MAC-based VNIC. Adds the passed MAC address
634   * to an unused slot in the NIC if one is available. Otherwise it
635   * sets the NIC in promiscuous mode and assigns the MAC address to
636   * a Rx ring if available or a soft ring.
637   */
638  static int
639  vnic_add_unicstaddr(vnic_t *vnic, mac_multi_addr_t *maddr)
640  {
641  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
642  	int err;
643  
644  	if (mac_unicst_verify(vnic_mac->va_mh, maddr->mma_addr,
645  	    maddr->mma_addrlen) == B_FALSE)
646  		return (EINVAL);
647  
648  	if (mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_MULTIADDRESS,
649  	    &(vnic->vn_mma_capab))) {
650  		if (vnic->vn_maddr_naddrfree == 0) {
651  			/*
652  			 * No free address slots available.
653  			 * Enable promiscuous mode.
654  			 */
655  			goto set_promisc;
656  		}
657  
658  		err = vnic->vn_maddr_add(vnic->vn_maddr_handle, maddr);
659  		if (err != 0) {
660  			if (err == ENOSPC) {
661  				/*
662  				 * There was a race to add addresses
663  				 * with other multiple address consumers,
664  				 * and we lost out. Use promisc mode.
665  				 */
666  				goto set_promisc;
667  			}
668  
669  			return (err);
670  		}
671  
672  		vnic->vn_slot_id = maddr->mma_slot;
673  		vnic->vn_multi_mac = B_TRUE;
674  	} else {
675  		/*
676  		 * Either multiple MAC address support is not
677  		 * available or all available addresses have
678  		 * been used up.
679  		 */
680  	set_promisc:
681  		err = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC);
682  		if (err != 0) {
683  			return (err);
684  		}
685  
686  		vnic->vn_promisc_mac = B_TRUE;
687  	}
688  	return (err);
689  }
690  
691  /*
692   * VNIC is getting deleted. Remove the MAC address from the slot.
693   * If promiscuous mode was being used, then unset the promiscuous mode.
694   */
695  static int
696  vnic_remove_unicstaddr(vnic_t *vnic)
697  {
698  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
699  	int err;
700  
701  	if (vnic->vn_multi_mac) {
702  		ASSERT(vnic->vn_promisc_mac == B_FALSE);
703  		err = vnic->vn_maddr_remove(vnic->vn_maddr_handle,
704  		    vnic->vn_slot_id);
705  		vnic->vn_multi_mac = B_FALSE;
706  	}
707  
708  	if (vnic->vn_promisc_mac) {
709  		ASSERT(vnic->vn_multi_mac == B_FALSE);
710  		err = mac_promisc_set(vnic_mac->va_mh, B_FALSE, MAC_DEVPROMISC);
711  		vnic->vn_promisc_mac = B_FALSE;
712  	}
713  
714  	return (err);
715  }
716  
717  /*
718   * Create a new VNIC upon request from administrator.
719   * Returns 0 on success, an errno on failure.
720   */
721  int
722  vnic_dev_create(uint_t vnic_id, char *dev_name, int mac_len, uchar_t *mac_addr)
723  {
724  	vnic_t *vnic = NULL;
725  	mac_register_t *mac;
726  	int err;
727  	vnic_mac_t *vnic_mac;
728  	const mac_info_t *lower_mac_info;
729  	mac_multi_addr_t maddr;
730  	mac_txinfo_t tx_info;
731  
732  	if (mac_len != ETHERADDRL) {
733  		/* currently only ethernet NICs are supported */
734  		return (EINVAL);
735  	}
736  
737  	rw_enter(&vnic_lock, RW_WRITER);
738  
739  	/* does a VNIC with the same id already exist? */
740  	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
741  	    (mod_hash_val_t *)&vnic);
742  	if (err == 0) {
743  		rw_exit(&vnic_lock);
744  		return (EEXIST);
745  	}
746  
747  	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
748  	if (vnic == NULL) {
749  		rw_exit(&vnic_lock);
750  		return (ENOMEM);
751  	}
752  
753  	/* open underlying MAC */
754  	err = vnic_mac_open(dev_name, &vnic_mac);
755  	if (err != 0) {
756  		kmem_cache_free(vnic_cache, vnic);
757  		rw_exit(&vnic_lock);
758  		return (err);
759  	}
760  
761  	bzero(vnic, sizeof (*vnic));
762  	vnic->vn_id = vnic_id;
763  	vnic->vn_vnic_mac = vnic_mac;
764  
765  	vnic->vn_started = B_FALSE;
766  	vnic->vn_promisc = B_FALSE;
767  	vnic->vn_multi_mac = B_FALSE;
768  	vnic->vn_bcast_grp = B_FALSE;
769  
770  	/* set the VNIC MAC address */
771  	maddr.mma_addrlen = mac_len;
772  	maddr.mma_slot = 0;
773  	maddr.mma_flags = 0;
774  	bcopy(mac_addr, maddr.mma_addr, mac_len);
775  	if ((err = vnic_add_unicstaddr(vnic, &maddr)) != 0)
776  		goto bail;
777  	bcopy(mac_addr, vnic->vn_addr, mac_len);
778  
779  	/* set the initial VNIC capabilities */
780  	if (!mac_vnic_capab_get(vnic_mac->va_mh, MAC_CAPAB_HCKSUM,
781  	    &vnic->vn_hcksum_txflags))
782  		vnic->vn_hcksum_txflags = 0;
783  
784  	/* register with the MAC module */
785  	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
786  		goto bail;
787  
788  	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
789  	mac->m_driver = vnic;
790  	mac->m_dip = vnic_get_dip();
791  	mac->m_instance = vnic_id;
792  	mac->m_src_addr = vnic->vn_addr;
793  	mac->m_callbacks = &vnic_m_callbacks;
794  
795  	lower_mac_info = mac_info(vnic_mac->va_mh);
796  	mac->m_min_sdu = lower_mac_info->mi_sdu_min;
797  	mac->m_max_sdu = lower_mac_info->mi_sdu_max;
798  
799  	err = mac_register(mac, &vnic->vn_mh);
800  	mac_free(mac);
801  	if (err != 0)
802  		goto bail;
803  
804  	/* add new VNIC to hash table */
805  	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
806  	    (mod_hash_val_t)vnic);
807  	ASSERT(err == 0);
808  	vnic_count++;
809  
810  	rw_exit(&vnic_lock);
811  
812  	/* Create a flow, initialized with the MAC address of the VNIC */
813  	if ((vnic->vn_flow_ent = vnic_classifier_flow_create(mac_len, mac_addr,
814  	    NULL, B_FALSE, KM_SLEEP)) == NULL) {
815  		(void) vnic_dev_delete(vnic_id);
816  		vnic = NULL;
817  		err = ENOMEM;
818  		goto bail_unlocked;
819  	}
820  
821  	vnic_classifier_flow_add(vnic_mac, vnic->vn_flow_ent, vnic_rx_initial,
822  	    vnic, vnic);
823  
824  	/* setup VNIC to receive broadcast packets */
825  	err = vnic_bcast_add(vnic, vnic_brdcst_mac, MAC_ADDRTYPE_BROADCAST);
826  	if (err != 0) {
827  		(void) vnic_dev_delete(vnic_id);
828  		vnic = NULL;
829  		goto bail_unlocked;
830  	}
831  	vnic->vn_bcast_grp = B_TRUE;
832  
833  	mutex_enter(&vnic_mac_lock);
834  	if (!vnic_mac->va_mac_set) {
835  		/*
836  		 * We want to MAC layer to call the VNIC tx outbound
837  		 * routine, so that local broadcast packets sent by
838  		 * the active interface sharing the underlying NIC (if
839  		 * any), can be broadcast to every VNIC.
840  		 */
841  		tx_info.mt_fn = vnic_active_tx;
842  		tx_info.mt_arg = vnic_mac;
843  		if (!mac_vnic_set(vnic_mac->va_mh, &tx_info,
844  		    vnic_m_capab_get, vnic)) {
845  			mutex_exit(&vnic_mac_lock);
846  			(void) vnic_dev_delete(vnic_id);
847  			vnic = NULL;
848  			err = EBUSY;
849  			goto bail_unlocked;
850  		}
851  		vnic_mac->va_mac_set = B_TRUE;
852  	}
853  	mutex_exit(&vnic_mac_lock);
854  
855  	/* allow passing packets to NIC's active MAC client */
856  	if (!vnic_init_active_rx(vnic_mac)) {
857  		(void) vnic_dev_delete(vnic_id);
858  		vnic = NULL;
859  		err = ENOMEM;
860  		goto bail_unlocked;
861  	}
862  
863  	return (0);
864  
865  bail:
866  	(void) vnic_remove_unicstaddr(vnic);
867  	vnic_mac_close(vnic_mac);
868  	rw_exit(&vnic_lock);
869  
870  bail_unlocked:
871  	if (vnic != NULL) {
872  		kmem_cache_free(vnic_cache, vnic);
873  	}
874  
875  	return (err);
876  }
877  
878  /*
879   * Modify the properties of an existing VNIC.
880   */
881  /* ARGSUSED */
882  int
883  vnic_dev_modify(uint_t vnic_id, uint_t modify_mask,
884      vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr)
885  {
886  	vnic_t *vnic = NULL;
887  	int rv = 0;
888  	boolean_t notify_mac_addr = B_FALSE;
889  
890  	rw_enter(&vnic_lock, RW_WRITER);
891  
892  	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
893  	    (mod_hash_val_t *)&vnic) != 0) {
894  		rw_exit(&vnic_lock);
895  		return (ENOENT);
896  	}
897  
898  	if (modify_mask & VNIC_IOC_MODIFY_ADDR) {
899  		rv = vnic_modify_mac_addr(vnic, mac_len, mac_addr);
900  		if (rv == 0)
901  			notify_mac_addr = B_TRUE;
902  	}
903  
904  	rw_exit(&vnic_lock);
905  
906  	if (notify_mac_addr)
907  		mac_unicst_update(vnic->vn_mh, mac_addr);
908  
909  	return (rv);
910  }
911  
912  int
913  vnic_dev_delete(uint_t vnic_id)
914  {
915  	vnic_t *vnic = NULL;
916  	mod_hash_val_t val;
917  	vnic_flow_t *flent;
918  	int rc;
919  	vnic_mac_t *vnic_mac;
920  
921  	rw_enter(&vnic_lock, RW_WRITER);
922  
923  	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
924  	    (mod_hash_val_t *)&vnic) != 0) {
925  		rw_exit(&vnic_lock);
926  		return (ENOENT);
927  	}
928  
929  	/*
930  	 * We cannot unregister the MAC yet. Unregistering would
931  	 * free up mac_impl_t which should not happen at this time.
932  	 * Packets could be entering vnic_rx() through the
933  	 * flow entry and so mac_impl_t cannot be NULL. So disable
934  	 * mac_impl_t by calling mac_disable(). This will prevent any
935  	 * new claims on mac_impl_t.
936  	 */
937  	if (mac_disable(vnic->vn_mh) != 0) {
938  		rw_exit(&vnic_lock);
939  		return (EBUSY);
940  	}
941  
942  	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
943  	ASSERT(vnic == (vnic_t *)val);
944  
945  	if (vnic->vn_bcast_grp)
946  		(void) vnic_bcast_delete(vnic, vnic_brdcst_mac);
947  
948  	flent = vnic->vn_flow_ent;
949  	if (flent != NULL) {
950  		/*
951  		 * vnic_classifier_flow_destroy() ensures that the
952  		 * flow is no longer used.
953  		 */
954  		vnic_classifier_flow_remove(vnic->vn_vnic_mac, flent);
955  		vnic_classifier_flow_destroy(flent);
956  	}
957  
958  	rc = mac_unregister(vnic->vn_mh);
959  	ASSERT(rc == 0);
960  	(void) vnic_remove_unicstaddr(vnic);
961  	vnic_mac = vnic->vn_vnic_mac;
962  	kmem_cache_free(vnic_cache, vnic);
963  	vnic_count--;
964  	rw_exit(&vnic_lock);
965  	vnic_mac_close(vnic_mac);
966  	return (0);
967  }
968  
969  /*
970   * For the specified packet chain, return a sub-chain to be sent
971   * and the transmit function to be used to send the packet. Also
972   * return a pointer to the sub-chain of packets that should
973   * be re-classified. If the function returns NULL, the packet
974   * should be sent using the underlying NIC.
975   */
976  static vnic_flow_t *
977  vnic_classify(vnic_mac_t *vnic_mac, mblk_t *mp, mblk_t **mp_chain_rest)
978  {
979  	vnic_flow_t *flow_ent;
980  
981  	/* one packet at a time */
982  	*mp_chain_rest = mp->b_next;
983  	mp->b_next = NULL;
984  
985  	/* do classification on the packet */
986  	flow_ent = vnic_classifier_get_flow(vnic_mac, mp);
987  
988  	return (flow_ent);
989  }
990  
991  /*
992   * Send a packet chain to a local VNIC or an active MAC client.
993   */
994  static void
995  vnic_local_tx(vnic_mac_t *vnic_mac, vnic_flow_t *flow_ent, mblk_t *mp_chain)
996  {
997  	mblk_t *mp1;
998  	const vnic_flow_fn_info_t *fn_info;
999  	vnic_t *vnic;
1000  
1001  	if (!vnic_classifier_is_active(flow_ent) &&
1002  	    mac_promisc_get(vnic_mac->va_mh, MAC_PROMISC)) {
1003  		/*
1004  		 * If the MAC is in promiscous mode,
1005  		 * send a copy of the active client.
1006  		 */
1007  		if ((mp1 = vnic_copymsgchain_cksum(mp_chain)) == NULL)
1008  			goto sendit;
1009  		if ((mp1 = vnic_fix_cksum(mp1)) == NULL)
1010  			goto sendit;
1011  		mac_active_rx(vnic_mac->va_mh, NULL, mp1);
1012  	}
1013  sendit:
1014  	fn_info = vnic_classifier_get_fn_info(flow_ent);
1015  	/*
1016  	 * If the vnic to which we would deliver this packet is in
1017  	 * promiscuous mode then it already received the packet via
1018  	 * vnic_promisc_rx().
1019  	 *
1020  	 * XXX assumes that ff_arg2 is a vnic_t pointer if it is
1021  	 * non-NULL (currently always true).
1022  	 */
1023  	vnic = (vnic_t *)fn_info->ff_arg2;
1024  	if ((vnic != NULL) && vnic->vn_promisc)
1025  		freemsg(mp_chain);
1026  	else if ((mp1 = vnic_fix_cksum(mp_chain)) != NULL)
1027  		(fn_info->ff_fn)(fn_info->ff_arg1, fn_info->ff_arg2, mp1);
1028  }
1029  
1030  /*
1031   * This function is invoked when a MAC client needs to send a packet
1032   * to a NIC which is shared by VNICs. It is passed to the MAC layer
1033   * by a call to mac_vnic_set() when the NIC is opened, and is returned
1034   * to MAC clients by mac_tx_get() when VNICs are present.
1035   */
1036  mblk_t *
1037  vnic_active_tx(void *arg, mblk_t *mp_chain)
1038  {
1039  	vnic_mac_t *vnic_mac = arg;
1040  	mblk_t *mp, *extra_mp = NULL;
1041  	vnic_flow_t *flow_ent;
1042  	void *flow_cookie;
1043  	const mac_txinfo_t *mtp = vnic_mac->va_txinfo;
1044  
1045  	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
1046  		mblk_t *next;
1047  
1048  		next = mp->b_next;
1049  		mp->b_next = NULL;
1050  
1051  		vnic_promisc_rx(vnic_mac, (vnic_t *)-1, mp);
1052  
1053  		flow_ent = vnic_classify(vnic_mac, mp, &extra_mp);
1054  		ASSERT(extra_mp == NULL);
1055  		extra_mp = next;
1056  
1057  		if (flow_ent != NULL) {
1058  			flow_cookie = vnic_classifier_get_client_cookie(
1059  			    flow_ent);
1060  			if (flow_cookie != NULL) {
1061  				/*
1062  				 * Send a copy to every VNIC defined on the
1063  				 * interface, as well as the underlying MAC.
1064  				 */
1065  				vnic_bcast_send(flow_cookie, (vnic_t *)-1, mp);
1066  			} else {
1067  				/*
1068  				 * loopback the packet to a local VNIC or
1069  				 * an active MAC client.
1070  				 */
1071  				vnic_local_tx(vnic_mac, flow_ent, mp);
1072  			}
1073  			VNIC_FLOW_REFRELE(flow_ent);
1074  			mp_chain = NULL;
1075  		} else {
1076  			/*
1077  			 * Non-VNIC destination, send via the underlying
1078  			 * NIC. In order to avoid a recursive call
1079  			 * to this function, we ensured that mtp points
1080  			 * to the unerlying NIC transmit function
1081  			 * by inilizating through mac_vnic_tx_get().
1082  			 */
1083  			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
1084  			if (mp_chain != NULL)
1085  				break;
1086  		}
1087  	}
1088  
1089  	if ((mp_chain != NULL) && (extra_mp != NULL)) {
1090  		ASSERT(mp_chain->b_next == NULL);
1091  		mp_chain->b_next = extra_mp;
1092  	}
1093  	return (mp_chain);
1094  }
1095  
1096  /*
1097   * VNIC transmit function.
1098   */
1099  mblk_t *
1100  vnic_m_tx(void *arg, mblk_t *mp_chain)
1101  {
1102  	vnic_t *vnic = arg;
1103  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1104  	mblk_t *mp, *extra_mp = NULL;
1105  	vnic_flow_t *flow_ent;
1106  	void *flow_cookie;
1107  
1108  	/*
1109  	 * Update stats.
1110  	 */
1111  	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
1112  		vnic->vn_stat_opackets++;
1113  		vnic->vn_stat_obytes += msgdsize(mp);
1114  	}
1115  
1116  	for (mp = mp_chain; mp != NULL; mp = extra_mp) {
1117  		mblk_t *next;
1118  
1119  		next = mp->b_next;
1120  		mp->b_next = NULL;
1121  
1122  		vnic_promisc_rx(vnic->vn_vnic_mac, vnic, mp);
1123  
1124  		flow_ent = vnic_classify(vnic->vn_vnic_mac, mp, &extra_mp);
1125  		ASSERT(extra_mp == NULL);
1126  		extra_mp = next;
1127  
1128  		if (flow_ent != NULL) {
1129  			flow_cookie = vnic_classifier_get_client_cookie(
1130  			    flow_ent);
1131  			if (flow_cookie != NULL) {
1132  				/*
1133  				 * The vnic_bcast_send function expects
1134  				 * to receive the sender VNIC as value
1135  				 * for arg2.
1136  				 */
1137  				vnic_bcast_send(flow_cookie, vnic, mp);
1138  			} else {
1139  				/*
1140  				 * loopback the packet to a local VNIC or
1141  				 * an active MAC client.
1142  				 */
1143  				vnic_local_tx(vnic_mac, flow_ent, mp);
1144  			}
1145  			VNIC_FLOW_REFRELE(flow_ent);
1146  			mp_chain = NULL;
1147  		} else {
1148  			/*
1149  			 * Non-local destination, send via the underlying
1150  			 * NIC.
1151  			 */
1152  			const mac_txinfo_t *mtp = vnic->vn_txinfo;
1153  			mp_chain = mtp->mt_fn(mtp->mt_arg, mp);
1154  			if (mp_chain != NULL)
1155  				break;
1156  		}
1157  	}
1158  
1159  	/* update stats to account for unsent packets */
1160  	for (mp = mp_chain; mp != NULL; mp = mp->b_next) {
1161  		vnic->vn_stat_opackets--;
1162  		vnic->vn_stat_obytes -= msgdsize(mp);
1163  		vnic->vn_stat_oerrors++;
1164  		/*
1165  		 * link back in the last portion not counted due to bandwidth
1166  		 * control.
1167  		 */
1168  		if (mp->b_next == NULL) {
1169  			mp->b_next = extra_mp;
1170  			break;
1171  		}
1172  	}
1173  
1174  	return (mp_chain);
1175  }
1176  
1177  /* ARGSUSED */
1178  static void
1179  vnic_m_resources(void *arg)
1180  {
1181  	/* no resources to advertise */
1182  }
1183  
1184  static int
1185  vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
1186  {
1187  	vnic_t *vnic = arg;
1188  	int rval = 0;
1189  
1190  	rw_enter(&vnic_lock, RW_READER);
1191  
1192  	switch (stat) {
1193  	case ETHER_STAT_LINK_DUPLEX:
1194  		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
1195  		    ETHER_STAT_LINK_DUPLEX);
1196  		break;
1197  	case MAC_STAT_IFSPEED:
1198  		*val = mac_stat_get(vnic->vn_vnic_mac->va_mh,
1199  		    MAC_STAT_IFSPEED);
1200  		break;
1201  	case MAC_STAT_MULTIRCV:
1202  		*val = vnic->vn_stat_multircv;
1203  		break;
1204  	case MAC_STAT_BRDCSTRCV:
1205  		*val = vnic->vn_stat_brdcstrcv;
1206  		break;
1207  	case MAC_STAT_MULTIXMT:
1208  		*val = vnic->vn_stat_multixmt;
1209  		break;
1210  	case MAC_STAT_BRDCSTXMT:
1211  		*val = vnic->vn_stat_brdcstxmt;
1212  		break;
1213  	case MAC_STAT_IERRORS:
1214  		*val = vnic->vn_stat_ierrors;
1215  		break;
1216  	case MAC_STAT_OERRORS:
1217  		*val = vnic->vn_stat_oerrors;
1218  		break;
1219  	case MAC_STAT_RBYTES:
1220  		*val = vnic->vn_stat_rbytes;
1221  		break;
1222  	case MAC_STAT_IPACKETS:
1223  		*val = vnic->vn_stat_ipackets;
1224  		break;
1225  	case MAC_STAT_OBYTES:
1226  		*val = vnic->vn_stat_obytes;
1227  		break;
1228  	case MAC_STAT_OPACKETS:
1229  		*val = vnic->vn_stat_opackets;
1230  		break;
1231  	default:
1232  		rval = ENOTSUP;
1233  	}
1234  
1235  	rw_exit(&vnic_lock);
1236  	return (rval);
1237  }
1238  
1239  /*
1240   * Return information about the specified capability.
1241   */
1242  /* ARGSUSED */
1243  static boolean_t
1244  vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1245  {
1246  	vnic_t *vnic = arg;
1247  
1248  	switch (cap) {
1249  	case MAC_CAPAB_POLL:
1250  		return (B_TRUE);
1251  	case MAC_CAPAB_HCKSUM: {
1252  		uint32_t *hcksum_txflags = cap_data;
1253  
1254  		*hcksum_txflags = vnic->vn_hcksum_txflags &
1255  		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
1256  		    HCKSUM_INET_PARTIAL);
1257  		break;
1258  	}
1259  	default:
1260  		return (B_FALSE);
1261  	}
1262  	return (B_TRUE);
1263  }
1264  
1265  static int
1266  vnic_m_start(void *arg)
1267  {
1268  	vnic_t *vnic = arg;
1269  	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
1270  	int rc;
1271  
1272  	rc = mac_start(lower_mh);
1273  	if (rc != 0)
1274  		return (rc);
1275  
1276  	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx, vnic, vnic);
1277  	return (0);
1278  }
1279  
1280  static void
1281  vnic_m_stop(void *arg)
1282  {
1283  	vnic_t *vnic = arg;
1284  	mac_handle_t lower_mh = vnic->vn_vnic_mac->va_mh;
1285  
1286  	vnic_classifier_flow_update_fn(vnic->vn_flow_ent, vnic_rx_initial,
1287  	    vnic, vnic);
1288  	mac_stop(lower_mh);
1289  }
1290  
1291  /* ARGSUSED */
1292  static int
1293  vnic_m_promisc(void *arg, boolean_t on)
1294  {
1295  	vnic_t *vnic = arg;
1296  
1297  	return (vnic_promisc_set(vnic, on));
1298  }
1299  
1300  static int
1301  vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1302  {
1303  	vnic_t *vnic = arg;
1304  	int rc = 0;
1305  
1306  	if (add)
1307  		rc = vnic_bcast_add(vnic, addrp, MAC_ADDRTYPE_MULTICAST);
1308  	else
1309  		vnic_bcast_delete(vnic, addrp);
1310  
1311  	return (rc);
1312  }
1313  
1314  static int
1315  vnic_m_unicst(void *arg, const uint8_t *mac_addr)
1316  {
1317  	vnic_t *vnic = arg;
1318  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1319  	int rv;
1320  
1321  	rw_enter(&vnic_lock, RW_WRITER);
1322  	rv = vnic_modify_mac_addr(vnic, vnic_mac->va_addr_len,
1323  	    (uchar_t *)mac_addr);
1324  	rw_exit(&vnic_lock);
1325  
1326  	if (rv == 0)
1327  		mac_unicst_update(vnic->vn_mh, mac_addr);
1328  	return (0);
1329  }
1330  
1331  int
1332  vnic_info(uint_t *nvnics, uint32_t vnic_id, char *dev_name, void *fn_arg,
1333      vnic_info_new_vnic_fn_t new_vnic_fn)
1334  {
1335  	vnic_info_state_t state;
1336  	int rc = 0;
1337  
1338  	rw_enter(&vnic_lock, RW_READER);
1339  
1340  	*nvnics = vnic_count;
1341  
1342  	bzero(&state, sizeof (state));
1343  	state.vs_vnic_id = vnic_id;
1344  	bcopy(state.vs_dev_name, dev_name, MAXNAMELEN);
1345  	state.vs_new_vnic_fn = new_vnic_fn;
1346  	state.vs_fn_arg = fn_arg;
1347  
1348  	mod_hash_walk(vnic_hash, vnic_info_walker, &state);
1349  
1350  	if ((rc = state.vs_rc) == 0 && vnic_id != 0 &&
1351  	    !state.vs_vnic_found)
1352  		rc = ENOENT;
1353  
1354  	rw_exit(&vnic_lock);
1355  	return (rc);
1356  }
1357  
1358  /*
1359   * Walker invoked when building a list of vnics that must be passed
1360   * up to user space.
1361   */
1362  /*ARGSUSED*/
1363  static uint_t
1364  vnic_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1365  {
1366  	vnic_t *vnic;
1367  	vnic_info_state_t *state = arg;
1368  
1369  	if (state->vs_rc != 0)
1370  		return (MH_WALK_TERMINATE);	/* terminate walk */
1371  
1372  	vnic = (vnic_t *)val;
1373  
1374  	if (state->vs_vnic_id != 0 && vnic->vn_id != state->vs_vnic_id)
1375  		goto bail;
1376  
1377  	state->vs_vnic_found = B_TRUE;
1378  
1379  	state->vs_rc = state->vs_new_vnic_fn(state->vs_fn_arg,
1380  	    vnic->vn_id, vnic->vn_addr_type, vnic->vn_vnic_mac->va_addr_len,
1381  	    vnic->vn_addr, vnic->vn_vnic_mac->va_dev_name);
1382  bail:
1383  	return ((state->vs_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
1384  }
1385  
1386  /*
1387   * vnic_notify_cb() and vnic_notify_walker() below are used to
1388   * process events received from an underlying NIC and, if needed,
1389   * forward these events to the VNICs defined on top of that NIC.
1390   */
1391  
1392  typedef struct vnic_notify_state {
1393  	mac_notify_type_t	vo_type;
1394  	vnic_mac_t		*vo_vnic_mac;
1395  } vnic_notify_state_t;
1396  
1397  /* ARGSUSED */
1398  static uint_t
1399  vnic_notify_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1400  {
1401  	vnic_t *vnic = (vnic_t *)val;
1402  	vnic_notify_state_t *state = arg;
1403  
1404  	/* ignore VNICs that don't use the specified underlying MAC */
1405  	if (vnic->vn_vnic_mac != state->vo_vnic_mac)
1406  		return (MH_WALK_CONTINUE);
1407  
1408  	switch (state->vo_type) {
1409  	case MAC_NOTE_TX:
1410  		mac_tx_update(vnic->vn_mh);
1411  		break;
1412  	case MAC_NOTE_LINK:
1413  		/*
1414  		 * The VNIC link state must be up regardless of
1415  		 * the link state of the underlying NIC to maintain
1416  		 * connectivity between VNICs on the same host.
1417  		 */
1418  		mac_link_update(vnic->vn_mh, LINK_STATE_UP);
1419  		break;
1420  	case MAC_NOTE_UNICST:
1421  		vnic_update_active_rx(vnic->vn_vnic_mac);
1422  		break;
1423  	case MAC_NOTE_VNIC:
1424  		/* only for clients which share a NIC with a VNIC */
1425  		break;
1426  	case MAC_NOTE_PROMISC:
1427  		mutex_enter(&vnic_mac_lock);
1428  		vnic->vn_vnic_mac->va_txinfo = mac_vnic_tx_get(
1429  		    vnic->vn_vnic_mac->va_mh);
1430  		mutex_exit(&vnic_mac_lock);
1431  		break;
1432  	}
1433  
1434  	return (MH_WALK_CONTINUE);
1435  }
1436  
1437  static void
1438  vnic_notify_cb(void *arg, mac_notify_type_t type)
1439  {
1440  	vnic_mac_t *vnic = arg;
1441  	vnic_notify_state_t state;
1442  
1443  	state.vo_type = type;
1444  	state.vo_vnic_mac = vnic;
1445  
1446  	rw_enter(&vnic_lock, RW_READER);
1447  	mod_hash_walk(vnic_hash, vnic_notify_walker, &state);
1448  	rw_exit(&vnic_lock);
1449  }
1450  
1451  static int
1452  vnic_modify_mac_addr(vnic_t *vnic, uint_t mac_len, uchar_t *mac_addr)
1453  {
1454  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1455  	vnic_flow_t *vnic_flow = vnic->vn_flow_ent;
1456  
1457  	ASSERT(RW_WRITE_HELD(&vnic_lock));
1458  
1459  	if (mac_len != vnic_mac->va_addr_len)
1460  		return (EINVAL);
1461  
1462  	vnic_classifier_flow_update_addr(vnic_flow, mac_addr);
1463  	return (0);
1464  }
1465  
1466  static int
1467  vnic_promisc_set(vnic_t *vnic, boolean_t on)
1468  {
1469  	vnic_mac_t *vnic_mac = vnic->vn_vnic_mac;
1470  	int r = -1;
1471  
1472  	if (vnic->vn_promisc == on)
1473  		return (0);
1474  
1475  	if (on) {
1476  		r = mac_promisc_set(vnic_mac->va_mh, B_TRUE, MAC_DEVPROMISC);
1477  		if (r != 0)
1478  			return (r);
1479  
1480  		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
1481  		vnic->vn_promisc_next = vnic_mac->va_promisc;
1482  		vnic_mac->va_promisc = vnic;
1483  		vnic_mac->va_promisc_gen++;
1484  
1485  		vnic->vn_promisc = B_TRUE;
1486  		rw_exit(&vnic_mac->va_promisc_lock);
1487  
1488  		return (0);
1489  	} else {
1490  		vnic_t *loop, *prev = NULL;
1491  
1492  		rw_enter(&vnic_mac->va_promisc_lock, RW_WRITER);
1493  		loop = vnic_mac->va_promisc;
1494  
1495  		while ((loop != NULL) && (loop != vnic)) {
1496  			prev = loop;
1497  			loop = loop->vn_promisc_next;
1498  		}
1499  
1500  		if ((loop != NULL) &&
1501  		    ((r = mac_promisc_set(vnic_mac->va_mh, B_FALSE,
1502  		    MAC_DEVPROMISC)) == 0)) {
1503  			if (prev != NULL)
1504  				prev->vn_promisc_next = loop->vn_promisc_next;
1505  			else
1506  				vnic_mac->va_promisc = loop->vn_promisc_next;
1507  			vnic_mac->va_promisc_gen++;
1508  
1509  			vnic->vn_promisc = B_FALSE;
1510  		}
1511  		rw_exit(&vnic_mac->va_promisc_lock);
1512  
1513  		return (r);
1514  	}
1515  }
1516  
1517  void
1518  vnic_promisc_rx(vnic_mac_t *vnic_mac, vnic_t *sender, mblk_t *mp)
1519  {
1520  	vnic_t *loop;
1521  	vnic_flow_t *flow;
1522  	const vnic_flow_fn_info_t *fn_info;
1523  	mac_header_info_t hdr_info;
1524  	boolean_t dst_must_match = B_TRUE;
1525  
1526  	ASSERT(mp->b_next == NULL);
1527  
1528  	rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
1529  	if (vnic_mac->va_promisc == NULL)
1530  		goto done;
1531  
1532  	if (mac_header_info(vnic_mac->va_mh, mp, &hdr_info) != 0)
1533  		goto done;
1534  
1535  	/*
1536  	 * If this is broadcast or multicast then the destination
1537  	 * address need not match for us to deliver it.
1538  	 */
1539  	if ((hdr_info.mhi_dsttype == MAC_ADDRTYPE_BROADCAST) ||
1540  	    (hdr_info.mhi_dsttype == MAC_ADDRTYPE_MULTICAST))
1541  		dst_must_match = B_FALSE;
1542  
1543  	for (loop = vnic_mac->va_promisc;
1544  	    loop != NULL;
1545  	    loop = loop->vn_promisc_next) {
1546  		if (loop == sender)
1547  			continue;
1548  
1549  		if (dst_must_match &&
1550  		    (bcmp(hdr_info.mhi_daddr, loop->vn_addr,
1551  		    sizeof (loop->vn_addr)) != 0))
1552  			continue;
1553  
1554  		flow = loop->vn_flow_ent;
1555  		ASSERT(flow != NULL);
1556  
1557  		if (!flow->vf_is_active) {
1558  			mblk_t *copy;
1559  			uint64_t gen;
1560  
1561  			if ((copy = vnic_copymsg_cksum(mp)) == NULL)
1562  				break;
1563  			if ((sender != NULL) &&
1564  			    ((copy = vnic_fix_cksum(copy)) == NULL))
1565  				break;
1566  
1567  			VNIC_FLOW_REFHOLD(flow);
1568  			gen = vnic_mac->va_promisc_gen;
1569  			rw_exit(&vnic_mac->va_promisc_lock);
1570  
1571  			fn_info = vnic_classifier_get_fn_info(flow);
1572  			(fn_info->ff_fn)(fn_info->ff_arg1,
1573  			    fn_info->ff_arg2, copy);
1574  
1575  			VNIC_FLOW_REFRELE(flow);
1576  			rw_enter(&vnic_mac->va_promisc_lock, RW_READER);
1577  			if (vnic_mac->va_promisc_gen != gen)
1578  				break;
1579  		}
1580  	}
1581  done:
1582  	rw_exit(&vnic_mac->va_promisc_lock);
1583  }
1584