1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/debug.h>
29 #include <sys/time.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/user.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/strsubr.h>
37 #include <sys/cmn_err.h>
38 #include <sys/cpu.h>
39 #include <sys/kmem.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/ksynch.h>
44 #include <sys/stat.h>
45 #include <sys/kstat.h>
46 #include <sys/vtrace.h>
47 #include <sys/strsun.h>
48 #include <sys/dlpi.h>
49 #include <sys/ethernet.h>
50 #include <net/if.h>
51 #include <sys/varargs.h>
52 #include <sys/machsystm.h>
53 #include <sys/modctl.h>
54 #include <sys/modhash.h>
55 #include <sys/mac.h>
56 #include <sys/mac_ether.h>
57 #include <sys/taskq.h>
58 #include <sys/note.h>
59 #include <sys/mach_descrip.h>
60 #include <sys/mdeg.h>
61 #include <sys/ldc.h>
62 #include <sys/vsw_fdb.h>
63 #include <sys/vsw.h>
64 #include <sys/vio_mailbox.h>
65 #include <sys/vnet_mailbox.h>
66 #include <sys/vnet_common.h>
67 #include <sys/vio_util.h>
68 #include <sys/sdt.h>
69 #include <sys/atomic.h>
70 #include <sys/vlan.h>
71
72 /* Switching setup routines */
73 void vsw_setup_switching_thread(void *arg);
74 int vsw_setup_switching_start(vsw_t *vswp);
75 void vsw_setup_switching_stop(vsw_t *vswp);
76 int vsw_setup_switching(vsw_t *);
77 void vsw_setup_switching_post_process(vsw_t *vswp);
78 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
79 vsw_port_t *port, mac_resource_handle_t mrh);
80 static int vsw_setup_layer2(vsw_t *);
81 static int vsw_setup_layer3(vsw_t *);
82
83 /* Switching/data transmit routines */
84 static void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
85 vsw_port_t *port, mac_resource_handle_t);
86 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
87 vsw_port_t *port, mac_resource_handle_t);
88 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
89 vsw_port_t *port, mac_resource_handle_t);
90 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp,
91 int caller, vsw_port_t *port);
92 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp,
93 int caller, vsw_port_t *port);
94
95 /* VLAN routines */
96 void vsw_create_vlans(void *arg, int type);
97 void vsw_destroy_vlans(void *arg, int type);
98 void vsw_vlan_add_ids(void *arg, int type);
99 void vsw_vlan_remove_ids(void *arg, int type);
100 static void vsw_vlan_create_hash(void *arg, int type);
101 static void vsw_vlan_destroy_hash(void *arg, int type);
102 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
103 uint16_t *vidp);
104 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
105 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt);
106 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
107
108 /* Forwarding database (FDB) routines */
109 void vsw_fdbe_add(vsw_t *vswp, void *port);
110 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr);
111 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *);
112 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
113
114 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
115 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
116 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
117 void vsw_del_mcst_vsw(vsw_t *);
118
119 /* Support functions */
120 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
121 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp);
122
123
124 /*
125 * Functions imported from other files.
126 */
127 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *);
128 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
129 extern int vsw_mac_open(vsw_t *vswp);
130 extern void vsw_mac_close(vsw_t *vswp);
131 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
132 mblk_t *mp, vsw_macrx_flags_t flags);
133 extern void vsw_set_addrs(vsw_t *vswp);
134 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp);
135 extern void vsw_hio_init(vsw_t *vswp);
136 extern void vsw_hio_start_ports(vsw_t *vswp);
137 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port,
138 mcst_addr_t *mcst_p, int type);
139 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port,
140 mcst_addr_t *mcst_p, int type);
141 extern void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
142 extern void vsw_physlink_update_ports(vsw_t *vswp);
143
144 /*
145 * Tunables used in this file.
146 */
147 extern int vsw_setup_switching_delay;
148 extern uint32_t vsw_vlan_nchains;
149 extern uint32_t vsw_fdbe_refcnt_delay;
150
151 #define VSW_FDBE_REFHOLD(p) \
152 { \
153 atomic_inc_32(&(p)->refcnt); \
154 ASSERT((p)->refcnt != 0); \
155 }
156
157 #define VSW_FDBE_REFRELE(p) \
158 { \
159 ASSERT((p)->refcnt != 0); \
160 atomic_dec_32(&(p)->refcnt); \
161 }
162
163 /*
164 * Thread to setup switching mode. This thread is created during vsw_attach()
165 * initially. It invokes vsw_setup_switching() and keeps retrying while the
166 * returned value is EAGAIN. The thread exits when the switching mode setup is
167 * done successfully or when the error returned is not EAGAIN. This thread may
168 * also get created from vsw_update_md_prop() if the switching mode needs to be
169 * updated.
170 */
171 void
vsw_setup_switching_thread(void * arg)172 vsw_setup_switching_thread(void *arg)
173 {
174 callb_cpr_t cprinfo;
175 vsw_t *vswp = (vsw_t *)arg;
176 clock_t wait_time;
177 clock_t xwait;
178 clock_t wait_rv;
179 int rv;
180
181 /* wait time used on successive retries */
182 xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC);
183
184 CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr,
185 "vsw_setup_sw_thread");
186
187 mutex_enter(&vswp->sw_thr_lock);
188
189 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
190
191 CALLB_CPR_SAFE_BEGIN(&cprinfo);
192
193 /* Wait for sometime before (re)trying setup_switching() */
194 wait_time = ddi_get_lbolt() + xwait;
195 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
196 wait_rv = cv_timedwait(&vswp->sw_thr_cv,
197 &vswp->sw_thr_lock, wait_time);
198 if (wait_rv == -1) { /* timed out */
199 break;
200 }
201 }
202
203 CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock)
204
205 if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) {
206 /*
207 * If there is a stop request, process that first and
208 * exit the loop. Continue to hold the mutex which gets
209 * released in CALLB_CPR_EXIT().
210 */
211 break;
212 }
213
214 mutex_exit(&vswp->sw_thr_lock);
215 rv = vsw_setup_switching(vswp);
216 if (rv == 0) {
217 vsw_setup_switching_post_process(vswp);
218 }
219 mutex_enter(&vswp->sw_thr_lock);
220 if (rv != EAGAIN) {
221 break;
222 }
223
224 }
225
226 vswp->sw_thr_flags &= ~VSW_SWTHR_STOP;
227 vswp->sw_thread = NULL;
228 CALLB_CPR_EXIT(&cprinfo);
229 thread_exit();
230 }
231
232 /*
233 * Create a thread to setup the switching mode.
234 * Returns 0 on success; 1 on failure.
235 */
236 int
vsw_setup_switching_start(vsw_t * vswp)237 vsw_setup_switching_start(vsw_t *vswp)
238 {
239 mutex_enter(&vswp->sw_thr_lock);
240
241 vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
242 vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri);
243
244 if (vswp->sw_thread == NULL) {
245 mutex_exit(&vswp->sw_thr_lock);
246 return (1);
247 }
248
249 mutex_exit(&vswp->sw_thr_lock);
250 return (0);
251 }
252
253 /*
254 * Stop the thread to setup switching mode.
255 */
256 void
vsw_setup_switching_stop(vsw_t * vswp)257 vsw_setup_switching_stop(vsw_t *vswp)
258 {
259 kt_did_t tid = 0;
260
261 /*
262 * Signal the setup_switching thread to stop and wait until it stops.
263 */
264 mutex_enter(&vswp->sw_thr_lock);
265
266 if (vswp->sw_thread != NULL) {
267 tid = vswp->sw_thread->t_did;
268 vswp->sw_thr_flags |= VSW_SWTHR_STOP;
269 cv_signal(&vswp->sw_thr_cv);
270 }
271
272 mutex_exit(&vswp->sw_thr_lock);
273
274 if (tid != 0)
275 thread_join(tid);
276
277 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
278
279 vswp->mac_open_retries = 0;
280 }
281
282 /*
283 * Setup the required switching mode.
284 * Returns:
285 * 0 on success.
286 * EAGAIN if retry is needed.
287 * 1 on all other failures.
288 */
289 int
vsw_setup_switching(vsw_t * vswp)290 vsw_setup_switching(vsw_t *vswp)
291 {
292 int rv = 1;
293
294 D1(vswp, "%s: enter", __func__);
295
296 /*
297 * Select best switching mode.
298 * This is done as this routine can be called from the timeout
299 * handler to retry setting up a specific mode. Currently only
300 * the function which sets up layer2/promisc mode returns EAGAIN
301 * if the underlying network device is not available yet, causing
302 * retries.
303 */
304 if (vswp->smode & VSW_LAYER2) {
305 rv = vsw_setup_layer2(vswp);
306 } else if (vswp->smode & VSW_LAYER3) {
307 rv = vsw_setup_layer3(vswp);
308 } else {
309 DERR(vswp, "unknown switch mode");
310 rv = 1;
311 }
312
313 if (rv && (rv != EAGAIN)) {
314 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
315 "switching mode", vswp->instance);
316 } else if (rv == 0) {
317 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
318 }
319
320 D2(vswp, "%s: Operating in mode %d", __func__,
321 vswp->smode);
322
323 D1(vswp, "%s: exit", __func__);
324
325 return (rv);
326 }
327
328 /*
329 * Setup for layer 2 switching.
330 *
331 * Returns:
332 * 0 on success.
333 * EAGAIN if retry is needed.
334 * EIO on all other failures.
335 */
336 static int
vsw_setup_layer2(vsw_t * vswp)337 vsw_setup_layer2(vsw_t *vswp)
338 {
339 int rv;
340
341 D1(vswp, "%s: enter", __func__);
342
343 /*
344 * Until the network device is successfully opened,
345 * set the switching to use vsw_switch_l2_frame.
346 */
347 vswp->vsw_switch_frame = vsw_switch_l2_frame;
348 vswp->mac_cl_switching = B_FALSE;
349
350 rv = strlen(vswp->physname);
351 if (rv == 0) {
352 /*
353 * Physical device name is NULL, which is
354 * required for layer 2.
355 */
356 cmn_err(CE_WARN, "!vsw%d: no network device name specified",
357 vswp->instance);
358 return (EIO);
359 }
360
361 mutex_enter(&vswp->mac_lock);
362
363 rv = vsw_mac_open(vswp);
364 if (rv != 0) {
365 if (rv != EAGAIN) {
366 cmn_err(CE_WARN, "!vsw%d: Unable to open network "
367 "device: %s\n", vswp->instance, vswp->physname);
368 }
369 mutex_exit(&vswp->mac_lock);
370 return (rv);
371 }
372
373 /*
374 * Now we can use the mac client switching, so set the switching
375 * function to use vsw_switch_l2_frame_mac_client(), which simply
376 * sends the packets to MAC layer for switching.
377 */
378 vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client;
379 vswp->mac_cl_switching = B_TRUE;
380
381 D1(vswp, "%s: exit", __func__);
382
383 /* Initialize HybridIO related stuff */
384 vsw_hio_init(vswp);
385
386 mutex_exit(&vswp->mac_lock);
387 return (0);
388
389 exit_error:
390 vsw_mac_close(vswp);
391 mutex_exit(&vswp->mac_lock);
392 return (EIO);
393 }
394
395 static int
vsw_setup_layer3(vsw_t * vswp)396 vsw_setup_layer3(vsw_t *vswp)
397 {
398 D1(vswp, "%s: enter", __func__);
399
400 D2(vswp, "%s: operating in layer 3 mode", __func__);
401 vswp->vsw_switch_frame = vsw_switch_l3_frame;
402
403 D1(vswp, "%s: exit", __func__);
404
405 return (0);
406 }
407
408 /* ARGSUSED */
409 void
vsw_switch_frame_nop(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * port,mac_resource_handle_t mrh)410 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port,
411 mac_resource_handle_t mrh)
412 {
413 freemsgchain(mp);
414 }
415
416 /*
417 * Use mac client for layer 2 switching .
418 */
419 static void
vsw_switch_l2_frame_mac_client(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * port,mac_resource_handle_t mrh)420 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
421 vsw_port_t *port, mac_resource_handle_t mrh)
422 {
423 _NOTE(ARGUNUSED(mrh))
424
425 mblk_t *ret_m;
426
427 /*
428 * This switching function is expected to be called by
429 * the ports or the interface only. The packets from
430 * physical interface already switched.
431 */
432 ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV));
433
434 if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) {
435 DERR(vswp, "%s: drop mblks to "
436 "phys dev", __func__);
437 freemsgchain(ret_m);
438 }
439 }
440
441 /*
442 * Switch the given ethernet frame when operating in layer 2 mode.
443 *
444 * vswp: pointer to the vsw instance
445 * mp: pointer to chain of ethernet frame(s) to be switched
446 * caller: identifies the source of this frame as:
447 * 1. VSW_VNETPORT - a vsw port (connected to a vnet).
448 * 2. VSW_PHYSDEV - the physical ethernet device
449 * 3. VSW_LOCALDEV - vsw configured as a virtual interface
450 * arg: argument provided by the caller.
451 * 1. for VNETPORT - pointer to the corresponding vsw_port_t.
452 * 2. for PHYSDEV - NULL
453 * 3. for LOCALDEV - pointer to to this vsw_t(self)
454 */
455 void
vsw_switch_l2_frame(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * arg,mac_resource_handle_t mrh)456 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
457 vsw_port_t *arg, mac_resource_handle_t mrh)
458 {
459 struct ether_header *ehp;
460 mblk_t *bp, *ret_m;
461 vsw_fdbe_t *fp;
462
463 D1(vswp, "%s: enter (caller %d)", __func__, caller);
464
465 /*
466 * PERF: rather than breaking up the chain here, scan it
467 * to find all mblks heading to same destination and then
468 * pass that sub-chain to the lower transmit functions.
469 */
470
471 /* process the chain of packets */
472 bp = mp;
473 while (bp) {
474 ehp = (struct ether_header *)bp->b_rptr;
475 mp = vsw_get_same_dest_list(ehp, &bp);
476 ASSERT(mp != NULL);
477
478 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
479 __func__, MBLKSIZE(mp), MBLKL(mp));
480
481 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
482 /*
483 * If destination is VSW_LOCALDEV (vsw as an eth
484 * interface) and if the device is up & running,
485 * send the packet up the stack on this host.
486 * If the virtual interface is down, drop the packet.
487 */
488 if (caller != VSW_LOCALDEV) {
489 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG);
490 } else {
491 freemsgchain(mp);
492 }
493 continue;
494 }
495
496 /*
497 * Find fdb entry for the destination
498 * and hold a reference to it.
499 */
500 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
501 if (fp != NULL) {
502
503 /*
504 * If plumbed and in promisc mode then copy msg
505 * and send up the stack.
506 */
507 vsw_mac_rx(vswp, mrh, mp,
508 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
509
510 /*
511 * If the destination is in FDB, the packet
512 * should be forwarded to the correponding
513 * vsw_port (connected to a vnet device -
514 * VSW_VNETPORT)
515 */
516 (void) vsw_portsend(fp->portp, mp);
517
518 /* Release the reference on the fdb entry */
519 VSW_FDBE_REFRELE(fp);
520 } else {
521 /*
522 * Destination not in FDB.
523 *
524 * If the destination is broadcast or
525 * multicast forward the packet to all
526 * (VNETPORTs, PHYSDEV, LOCALDEV),
527 * except the caller.
528 */
529 if (IS_BROADCAST(ehp)) {
530 D2(vswp, "%s: BROADCAST pkt", __func__);
531 (void) vsw_forward_all(vswp, mp, caller, arg);
532 } else if (IS_MULTICAST(ehp)) {
533 D2(vswp, "%s: MULTICAST pkt", __func__);
534 (void) vsw_forward_grp(vswp, mp, caller, arg);
535 } else {
536 /*
537 * If the destination is unicast, and came
538 * from either a logical network device or
539 * the switch itself when it is plumbed, then
540 * send it out on the physical device and also
541 * up the stack if the logical interface is
542 * in promiscious mode.
543 *
544 * NOTE: The assumption here is that if we
545 * cannot find the destination in our fdb, its
546 * a unicast address, and came from either a
547 * vnet or down the stack (when plumbed) it
548 * must be destinded for an ethernet device
549 * outside our ldoms.
550 */
551 if (caller == VSW_VNETPORT) {
552 /* promisc check copy etc */
553 vsw_mac_rx(vswp, mrh, mp,
554 VSW_MACRX_PROMISC |
555 VSW_MACRX_COPYMSG);
556
557 if ((ret_m = vsw_tx_msg(vswp, mp,
558 caller, arg)) != NULL) {
559 DERR(vswp, "%s: drop mblks to "
560 "phys dev", __func__);
561 freemsgchain(ret_m);
562 }
563
564 } else if (caller == VSW_PHYSDEV) {
565 /*
566 * Pkt seen because card in promisc
567 * mode. Send up stack if plumbed in
568 * promisc mode, else drop it.
569 */
570 vsw_mac_rx(vswp, mrh, mp,
571 VSW_MACRX_PROMISC |
572 VSW_MACRX_FREEMSG);
573
574 } else if (caller == VSW_LOCALDEV) {
575 /*
576 * Pkt came down the stack, send out
577 * over physical device.
578 */
579 if ((ret_m = vsw_tx_msg(vswp, mp,
580 caller, NULL)) != NULL) {
581 DERR(vswp, "%s: drop mblks to "
582 "phys dev", __func__);
583 freemsgchain(ret_m);
584 }
585 }
586 }
587 }
588 }
589 D1(vswp, "%s: exit\n", __func__);
590 }
591
592 /*
593 * Switch ethernet frame when in layer 3 mode (i.e. using IP
594 * layer to do the routing).
595 *
596 * There is a large amount of overlap between this function and
597 * vsw_switch_l2_frame. At some stage we need to revisit and refactor
598 * both these functions.
599 */
600 void
vsw_switch_l3_frame(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * arg,mac_resource_handle_t mrh)601 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
602 vsw_port_t *arg, mac_resource_handle_t mrh)
603 {
604 struct ether_header *ehp;
605 mblk_t *bp = NULL;
606 vsw_fdbe_t *fp;
607
608 D1(vswp, "%s: enter (caller %d)", __func__, caller);
609
610 /*
611 * In layer 3 mode should only ever be switching packets
612 * between IP layer and vnet devices. So make sure thats
613 * who is invoking us.
614 */
615 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
616 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
617 freemsgchain(mp);
618 return;
619 }
620
621 /* process the chain of packets */
622 bp = mp;
623 while (bp) {
624 ehp = (struct ether_header *)bp->b_rptr;
625 mp = vsw_get_same_dest_list(ehp, &bp);
626 ASSERT(mp != NULL);
627
628 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
629 __func__, MBLKSIZE(mp), MBLKL(mp));
630
631 /*
632 * Find fdb entry for the destination
633 * and hold a reference to it.
634 */
635 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
636 if (fp != NULL) {
637
638 D2(vswp, "%s: sending to target port", __func__);
639 (void) vsw_portsend(fp->portp, mp);
640
641 /* Release the reference on the fdb entry */
642 VSW_FDBE_REFRELE(fp);
643 } else {
644 /*
645 * Destination not in FDB
646 *
647 * If the destination is broadcast or
648 * multicast forward the packet to all
649 * (VNETPORTs, PHYSDEV, LOCALDEV),
650 * except the caller.
651 */
652 if (IS_BROADCAST(ehp)) {
653 D2(vswp, "%s: BROADCAST pkt", __func__);
654 (void) vsw_forward_all(vswp, mp, caller, arg);
655 } else if (IS_MULTICAST(ehp)) {
656 D2(vswp, "%s: MULTICAST pkt", __func__);
657 (void) vsw_forward_grp(vswp, mp, caller, arg);
658 } else {
659 /*
660 * Unicast pkt from vnet that we don't have
661 * an FDB entry for, so must be destinded for
662 * the outside world. Attempt to send up to the
663 * IP layer to allow it to deal with it.
664 */
665 if (caller == VSW_VNETPORT) {
666 vsw_mac_rx(vswp, mrh,
667 mp, VSW_MACRX_FREEMSG);
668 }
669 }
670 }
671 }
672
673 D1(vswp, "%s: exit", __func__);
674 }
675
676 /*
677 * Additional initializations that are needed for the specific switching mode.
678 */
679 void
vsw_setup_switching_post_process(vsw_t * vswp)680 vsw_setup_switching_post_process(vsw_t *vswp)
681 {
682 link_state_t link_state = LINK_STATE_UP;
683
684 if (vswp->smode & VSW_LAYER2) {
685 /*
686 * Program unicst, mcst addrs of vsw
687 * interface and ports in the physdev.
688 */
689 vsw_set_addrs(vswp);
690
691 /* Start HIO for ports that have already connected */
692 vsw_hio_start_ports(vswp);
693
694 if (vswp->pls_update == B_TRUE) {
695 link_state = vswp->phys_link_state;
696 }
697
698 /* Update physical link info to any ports already connected */
699 vsw_physlink_update_ports(vswp);
700 }
701
702 vsw_mac_link_update(vswp, link_state);
703 }
704
705 /*
706 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
707 * except the caller (port on which frame arrived).
708 */
709 static int
vsw_forward_all(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * arg)710 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
711 {
712 vsw_port_list_t *plist = &vswp->plist;
713 vsw_port_t *portp;
714 mblk_t *nmp = NULL;
715 mblk_t *ret_m = NULL;
716 int skip_port = 0;
717
718 D1(vswp, "vsw_forward_all: enter\n");
719
720 /*
721 * Broadcast message from inside ldoms so send to outside
722 * world if in either of layer 2 modes.
723 */
724 if ((vswp->smode & VSW_LAYER2) &&
725 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
726
727 nmp = vsw_dupmsgchain(mp);
728 if (nmp) {
729 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
730 != NULL) {
731 DERR(vswp, "%s: dropping pkt(s) "
732 "consisting of %ld bytes of data for"
733 " physical device", __func__, MBLKL(ret_m));
734 freemsgchain(ret_m);
735 }
736 }
737 }
738
739 if (caller == VSW_VNETPORT)
740 skip_port = 1;
741
742 /*
743 * Broadcast message from other vnet (layer 2 or 3) or outside
744 * world (layer 2 only), send up stack if plumbed.
745 */
746 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
747 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG);
748 }
749
750 /* send it to all VNETPORTs */
751 READ_ENTER(&plist->lockrw);
752 for (portp = plist->head; portp != NULL; portp = portp->p_next) {
753 D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
754 /*
755 * Caution ! - don't reorder these two checks as arg
756 * will be NULL if the caller is PHYSDEV. skip_port is
757 * only set if caller is VNETPORT.
758 */
759 if ((skip_port) && (portp == arg)) {
760 continue;
761 } else {
762 nmp = vsw_dupmsgchain(mp);
763 if (nmp) {
764 /*
765 * The plist->lockrw is protecting the
766 * portp from getting destroyed here.
767 * So, no ref_cnt is incremented here.
768 */
769 (void) vsw_portsend(portp, nmp);
770 } else {
771 DERR(vswp, "vsw_forward_all: nmp NULL");
772 }
773 }
774 }
775 RW_EXIT(&plist->lockrw);
776
777 freemsgchain(mp);
778
779 D1(vswp, "vsw_forward_all: exit\n");
780 return (0);
781 }
782
783 /*
784 * Forward pkts to any devices or interfaces which have registered
785 * an interest in them (i.e. multicast groups).
786 */
787 static int
vsw_forward_grp(vsw_t * vswp,mblk_t * mp,int caller,vsw_port_t * arg)788 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
789 {
790 struct ether_header *ehp = (struct ether_header *)mp->b_rptr;
791 mfdb_ent_t *entp = NULL;
792 mfdb_ent_t *tpp = NULL;
793 vsw_port_t *port;
794 uint64_t key = 0;
795 mblk_t *nmp = NULL;
796 mblk_t *ret_m = NULL;
797 boolean_t check_if = B_TRUE;
798
799 /*
800 * Convert address to hash table key
801 */
802 KEY_HASH(key, &ehp->ether_dhost);
803
804 D1(vswp, "%s: key 0x%llx", __func__, key);
805
806 /*
807 * If pkt came from either a vnet or down the stack (if we are
808 * plumbed) and we are in layer 2 mode, then we send the pkt out
809 * over the physical adapter, and then check to see if any other
810 * vnets are interested in it.
811 */
812 if ((vswp->smode & VSW_LAYER2) &&
813 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
814 nmp = vsw_dupmsgchain(mp);
815 if (nmp) {
816 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
817 != NULL) {
818 DERR(vswp, "%s: dropping pkt(s) consisting of "
819 "%ld bytes of data for physical device",
820 __func__, MBLKL(ret_m));
821 freemsgchain(ret_m);
822 }
823 }
824 }
825
826 READ_ENTER(&vswp->mfdbrw);
827 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
828 (mod_hash_val_t *)&entp) != 0) {
829 D3(vswp, "%s: no table entry found for addr 0x%llx",
830 __func__, key);
831 } else {
832 /*
833 * Send to list of devices associated with this address...
834 */
835 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
836
837 /* dont send to ourselves */
838 if ((caller == VSW_VNETPORT) &&
839 (tpp->d_addr == (void *)arg)) {
840 port = (vsw_port_t *)tpp->d_addr;
841 D3(vswp, "%s: not sending to ourselves"
842 " : port %d", __func__, port->p_instance);
843 continue;
844
845 } else if ((caller == VSW_LOCALDEV) &&
846 (tpp->d_type == VSW_LOCALDEV)) {
847 D2(vswp, "%s: not sending back up stack",
848 __func__);
849 continue;
850 }
851
852 if (tpp->d_type == VSW_VNETPORT) {
853 port = (vsw_port_t *)tpp->d_addr;
854 D3(vswp, "%s: sending to port %ld for addr "
855 "0x%llx", __func__, port->p_instance, key);
856
857 nmp = vsw_dupmsgchain(mp);
858 if (nmp) {
859 /*
860 * The vswp->mfdbrw is protecting the
861 * portp from getting destroyed here.
862 * So, no ref_cnt is incremented here.
863 */
864 (void) vsw_portsend(port, nmp);
865 }
866 } else {
867 vsw_mac_rx(vswp, NULL,
868 mp, VSW_MACRX_COPYMSG);
869 D2(vswp, "%s: sending up stack"
870 " for addr 0x%llx", __func__, key);
871 check_if = B_FALSE;
872 }
873 }
874 }
875
876 RW_EXIT(&vswp->mfdbrw);
877
878 /*
879 * If the pkt came from either a vnet or from physical device,
880 * and if we havent already sent the pkt up the stack then we
881 * check now if we can/should (i.e. the interface is plumbed
882 * and in promisc mode).
883 */
884 if ((check_if) &&
885 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
886 vsw_mac_rx(vswp, NULL, mp,
887 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
888 }
889
890 freemsgchain(mp);
891
892 D1(vswp, "%s: exit", __func__);
893
894 return (0);
895 }
896
897 /*
898 * This function creates the vlan id hash table for the given vsw device or
899 * port. It then adds each vlan that the device or port has been assigned,
900 * into this hash table.
901 * Arguments:
902 * arg: vsw device or port.
903 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
904 */
905 void
vsw_create_vlans(void * arg,int type)906 vsw_create_vlans(void *arg, int type)
907 {
908 /* create vlan hash table */
909 vsw_vlan_create_hash(arg, type);
910
911 /* add vlan ids of the vsw device into its hash table */
912 vsw_vlan_add_ids(arg, type);
913 }
914
915 /*
916 * This function removes the vlan ids of the vsw device or port from its hash
917 * table. It then destroys the vlan hash table.
918 * Arguments:
919 * arg: vsw device or port.
920 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
921 */
922 void
vsw_destroy_vlans(void * arg,int type)923 vsw_destroy_vlans(void *arg, int type)
924 {
925 /* remove vlan ids from the hash table */
926 vsw_vlan_remove_ids(arg, type);
927
928 /* destroy vlan-hash-table */
929 vsw_vlan_destroy_hash(arg, type);
930 }
931
932 /*
933 * Create a vlan-id hash table for the given vsw device or port.
934 */
935 static void
vsw_vlan_create_hash(void * arg,int type)936 vsw_vlan_create_hash(void *arg, int type)
937 {
938 char hashname[MAXNAMELEN];
939
940 if (type == VSW_LOCALDEV) {
941 vsw_t *vswp = (vsw_t *)arg;
942
943 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash",
944 vswp->instance);
945
946 vswp->vlan_nchains = vsw_vlan_nchains;
947 vswp->vlan_hashp = mod_hash_create_idhash(hashname,
948 vswp->vlan_nchains, mod_hash_null_valdtor);
949
950 } else if (type == VSW_VNETPORT) {
951 vsw_port_t *portp = (vsw_port_t *)arg;
952
953 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
954 portp->p_instance);
955
956 portp->vlan_nchains = vsw_vlan_nchains;
957 portp->vlan_hashp = mod_hash_create_idhash(hashname,
958 portp->vlan_nchains, mod_hash_null_valdtor);
959
960 } else {
961 return;
962 }
963 }
964
965 /*
966 * Destroy the vlan-id hash table for the given vsw device or port.
967 */
968 static void
vsw_vlan_destroy_hash(void * arg,int type)969 vsw_vlan_destroy_hash(void *arg, int type)
970 {
971 if (type == VSW_LOCALDEV) {
972 vsw_t *vswp = (vsw_t *)arg;
973
974 mod_hash_destroy_hash(vswp->vlan_hashp);
975 vswp->vlan_nchains = 0;
976 } else if (type == VSW_VNETPORT) {
977 vsw_port_t *portp = (vsw_port_t *)arg;
978
979 mod_hash_destroy_hash(portp->vlan_hashp);
980 portp->vlan_nchains = 0;
981 } else {
982 return;
983 }
984 }
985
986 /*
987 * Add vlan ids of the given vsw device or port into its hash table.
988 */
989 void
vsw_vlan_add_ids(void * arg,int type)990 vsw_vlan_add_ids(void *arg, int type)
991 {
992 int rv;
993 int i;
994
995 if (type == VSW_LOCALDEV) {
996 vsw_t *vswp = (vsw_t *)arg;
997
998 rv = mod_hash_insert(vswp->vlan_hashp,
999 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
1000 (mod_hash_val_t)B_TRUE);
1001 if (rv != 0) {
1002 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
1003 "the interface", vswp->instance, vswp->pvid);
1004 }
1005
1006 for (i = 0; i < vswp->nvids; i++) {
1007 rv = mod_hash_insert(vswp->vlan_hashp,
1008 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid),
1009 (mod_hash_val_t)B_TRUE);
1010 if (rv != 0) {
1011 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
1012 " for the interface", vswp->instance,
1013 vswp->pvid);
1014 }
1015 }
1016
1017 } else if (type == VSW_VNETPORT) {
1018 vsw_port_t *portp = (vsw_port_t *)arg;
1019 vsw_t *vswp = portp->p_vswp;
1020
1021 rv = mod_hash_insert(portp->vlan_hashp,
1022 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1023 (mod_hash_val_t)B_TRUE);
1024 if (rv != 0) {
1025 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
1026 "the port(%d)", vswp->instance, vswp->pvid,
1027 portp->p_instance);
1028 }
1029
1030 for (i = 0; i < portp->nvids; i++) {
1031 rv = mod_hash_insert(portp->vlan_hashp,
1032 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid),
1033 (mod_hash_val_t)B_TRUE);
1034 if (rv != 0) {
1035 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
1036 " for the port(%d)", vswp->instance,
1037 vswp->pvid, portp->p_instance);
1038 }
1039 }
1040
1041 }
1042 }
1043
1044 /*
1045 * Remove vlan ids of the given vsw device or port from its hash table.
1046 */
1047 void
vsw_vlan_remove_ids(void * arg,int type)1048 vsw_vlan_remove_ids(void *arg, int type)
1049 {
1050 mod_hash_val_t vp;
1051 int rv;
1052 int i;
1053
1054 if (type == VSW_LOCALDEV) {
1055 vsw_t *vswp = (vsw_t *)arg;
1056
1057 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid);
1058 if (rv == B_TRUE) {
1059 rv = mod_hash_remove(vswp->vlan_hashp,
1060 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
1061 (mod_hash_val_t *)&vp);
1062 ASSERT(rv == 0);
1063 }
1064
1065 for (i = 0; i < vswp->nvids; i++) {
1066 rv = vsw_vlan_lookup(vswp->vlan_hashp,
1067 vswp->vids[i].vl_vid);
1068 if (rv == B_TRUE) {
1069 rv = mod_hash_remove(vswp->vlan_hashp,
1070 (mod_hash_key_t)VLAN_ID_KEY(
1071 vswp->vids[i].vl_vid),
1072 (mod_hash_val_t *)&vp);
1073 ASSERT(rv == 0);
1074 }
1075 }
1076
1077 } else if (type == VSW_VNETPORT) {
1078 vsw_port_t *portp = (vsw_port_t *)arg;
1079
1080 portp = (vsw_port_t *)arg;
1081 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid);
1082 if (rv == B_TRUE) {
1083 rv = mod_hash_remove(portp->vlan_hashp,
1084 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1085 (mod_hash_val_t *)&vp);
1086 ASSERT(rv == 0);
1087 }
1088
1089 for (i = 0; i < portp->nvids; i++) {
1090 rv = vsw_vlan_lookup(portp->vlan_hashp,
1091 portp->vids[i].vl_vid);
1092 if (rv == B_TRUE) {
1093 rv = mod_hash_remove(portp->vlan_hashp,
1094 (mod_hash_key_t)VLAN_ID_KEY(
1095 portp->vids[i].vl_vid),
1096 (mod_hash_val_t *)&vp);
1097 ASSERT(rv == 0);
1098 }
1099 }
1100
1101 } else {
1102 return;
1103 }
1104 }
1105
1106 /*
1107 * Find the given vlan id in the hash table.
1108 * Return: B_TRUE if the id is found; B_FALSE if not found.
1109 */
1110 boolean_t
vsw_vlan_lookup(mod_hash_t * vlan_hashp,uint16_t vid)1111 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1112 {
1113 int rv;
1114 mod_hash_val_t vp;
1115
1116 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1117
1118 if (rv != 0)
1119 return (B_FALSE);
1120
1121 return (B_TRUE);
1122 }
1123
1124 /*
1125 * Add an entry into FDB for the given vsw.
1126 */
1127 void
vsw_fdbe_add(vsw_t * vswp,void * port)1128 vsw_fdbe_add(vsw_t *vswp, void *port)
1129 {
1130 uint64_t addr = 0;
1131 vsw_port_t *portp;
1132 vsw_fdbe_t *fp;
1133 int rv;
1134
1135 portp = (vsw_port_t *)port;
1136 KEY_HASH(addr, &portp->p_macaddr);
1137
1138 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP);
1139 fp->portp = port;
1140
1141 /*
1142 * Note: duplicate keys will be rejected by mod_hash.
1143 */
1144 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr,
1145 (mod_hash_val_t)fp);
1146 if (rv != 0) {
1147 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for "
1148 "the port(%d)", vswp->instance,
1149 ether_sprintf(&portp->p_macaddr), portp->p_instance);
1150 kmem_free(fp, sizeof (*fp));
1151 }
1152 }
1153
1154 /*
1155 * Remove an entry from FDB.
1156 */
1157 void
vsw_fdbe_del(vsw_t * vswp,struct ether_addr * eaddr)1158 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr)
1159 {
1160 uint64_t addr = 0;
1161 vsw_fdbe_t *fp;
1162 int rv;
1163
1164 KEY_HASH(addr, eaddr);
1165
1166 /*
1167 * Remove the entry from fdb hash table.
1168 * This prevents further references to this fdb entry.
1169 */
1170 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr,
1171 (mod_hash_val_t *)&fp);
1172 if (rv != 0) {
1173 /* invalid key? */
1174 return;
1175 }
1176
1177 /*
1178 * If there are threads already ref holding before the entry was
1179 * removed from hash table, then wait for ref count to drop to zero.
1180 */
1181 while (fp->refcnt != 0) {
1182 delay(drv_usectohz(vsw_fdbe_refcnt_delay));
1183 }
1184
1185 kmem_free(fp, sizeof (*fp));
1186 }
1187
1188 /*
1189 * Search fdb for a given mac address. If an entry is found, hold
1190 * a reference to it and return the entry, else returns NULL.
1191 */
1192 static vsw_fdbe_t *
vsw_fdbe_find(vsw_t * vswp,struct ether_addr * addrp)1193 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp)
1194 {
1195 uint64_t key = 0;
1196 vsw_fdbe_t *fp;
1197 int rv;
1198
1199 KEY_HASH(key, addrp);
1200
1201 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key,
1202 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb);
1203
1204 if (rv != 0)
1205 return (NULL);
1206
1207 return (fp);
1208 }
1209
1210 /*
1211 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1212 * entry corresponding to the key (macaddr), this callback will be invoked by
1213 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1214 * entry before returning the found entry.
1215 */
1216 static void
vsw_fdbe_find_cb(mod_hash_key_t key,mod_hash_val_t val)1217 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1218 {
1219 _NOTE(ARGUNUSED(key))
1220 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val);
1221 }
1222
1223 /*
1224 * A given frame must be always tagged with the appropriate vlan id (unless it
1225 * is in the default-vlan) before the mac address switching function is called.
1226 * Otherwise, after switching function determines the destination, we cannot
1227 * figure out if the destination belongs to the the same vlan that the frame
1228 * originated from and if it needs tag/untag. Frames which are inbound from
1229 * the external(physical) network over a vlan trunk link are always tagged.
1230 * However frames which are received from a vnet-port over ldc or frames which
1231 * are coming down the stack on the service domain over vsw interface may be
1232 * untagged. These frames must be tagged with the appropriate pvid of the
1233 * sender (vnet-port or vsw device), before invoking the switching function.
1234 *
1235 * Arguments:
1236 * arg: caller of the function.
1237 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1238 * mp: frame(s) to be tagged.
1239 */
1240 mblk_t *
vsw_vlan_frame_pretag(void * arg,int type,mblk_t * mp)1241 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp)
1242 {
1243 vsw_t *vswp;
1244 vsw_port_t *portp;
1245 struct ether_header *ehp;
1246 mblk_t *bp;
1247 mblk_t *bpt;
1248 mblk_t *bph;
1249 mblk_t *bpn;
1250 uint16_t pvid;
1251
1252 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1253
1254 if (type == VSW_LOCALDEV) {
1255 vswp = (vsw_t *)arg;
1256 pvid = vswp->pvid;
1257 portp = NULL;
1258 } else {
1259 /* VSW_VNETPORT */
1260 portp = (vsw_port_t *)arg;
1261 pvid = portp->pvid;
1262 vswp = portp->p_vswp;
1263 }
1264
1265 bpn = bph = bpt = NULL;
1266
1267 for (bp = mp; bp != NULL; bp = bpn) {
1268
1269 bpn = bp->b_next;
1270 bp->b_next = bp->b_prev = NULL;
1271
1272 /* Determine if it is an untagged frame */
1273 ehp = (struct ether_header *)bp->b_rptr;
1274
1275 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */
1276
1277 /* no need to tag if the frame is in default vlan */
1278 if (pvid != vswp->default_vlan_id) {
1279 bp = vnet_vlan_insert_tag(bp, pvid);
1280 if (bp == NULL) {
1281 continue;
1282 }
1283 }
1284 }
1285
1286 /* build a chain of processed packets */
1287 if (bph == NULL) {
1288 bph = bpt = bp;
1289 } else {
1290 bpt->b_next = bp;
1291 bpt = bp;
1292 }
1293
1294 }
1295
1296 return (bph);
1297 }
1298
1299 /*
1300 * Frames destined to a vnet-port or to the local vsw interface, must be
1301 * untagged if necessary before sending. This function first checks that the
1302 * frame can be sent to the destination in the vlan identified by the frame
1303 * tag. Note that when this function is invoked the frame must have been
1304 * already tagged (unless it is in the default-vlan). Because, this function is
1305 * called when the switching function determines the destination and invokes
1306 * its send function (vnet-port or vsw interface) and all frames would have
1307 * been tagged by this time (see comments in vsw_vlan_frame_pretag()).
1308 *
1309 * Arguments:
1310 * arg: destination device.
1311 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1312 * np: head of pkt chain to be validated and untagged.
1313 * npt: tail of pkt chain to be validated and untagged.
1314 *
1315 * Returns:
1316 * np: head of updated chain of packets
1317 * npt: tail of updated chain of packets
1318 * rv: count of the packets in the returned list
1319 */
1320 uint32_t
vsw_vlan_frame_untag(void * arg,int type,mblk_t ** np,mblk_t ** npt)1321 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt)
1322 {
1323 mblk_t *bp;
1324 mblk_t *bpt;
1325 mblk_t *bph;
1326 mblk_t *bpn;
1327 vsw_port_t *portp;
1328 vsw_t *vswp;
1329 uint32_t count;
1330 struct ether_header *ehp;
1331 boolean_t is_tagged;
1332 boolean_t rv;
1333 uint16_t vlan_id;
1334 uint16_t pvid;
1335 mod_hash_t *vlan_hashp;
1336
1337 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1338
1339
1340 if (type == VSW_LOCALDEV) {
1341 vswp = (vsw_t *)arg;
1342 pvid = vswp->pvid;
1343 vlan_hashp = vswp->vlan_hashp;
1344 portp = NULL;
1345 } else {
1346 /* type == VSW_VNETPORT */
1347 portp = (vsw_port_t *)arg;
1348 vswp = portp->p_vswp;
1349 vlan_hashp = portp->vlan_hashp;
1350 pvid = portp->pvid;
1351 }
1352
1353 /*
1354 * If the MAC layer switching in place, then
1355 * untagging required only if the pvid is not
1356 * the same as default_vlan_id. This is because,
1357 * the MAC layer will send packets for the
1358 * registered vlans only.
1359 */
1360 if ((vswp->mac_cl_switching == B_TRUE) &&
1361 (pvid == vswp->default_vlan_id)) {
1362 /* simply count and set the tail */
1363 count = 1;
1364 bp = *np;
1365 ASSERT(bp != NULL);
1366 while (bp->b_next != NULL) {
1367 bp = bp->b_next;
1368 count++;
1369 }
1370 *npt = bp;
1371 return (count);
1372 }
1373
1374 bpn = bph = bpt = NULL;
1375 count = 0;
1376
1377 for (bp = *np; bp != NULL; bp = bpn) {
1378
1379 bpn = bp->b_next;
1380 bp->b_next = bp->b_prev = NULL;
1381
1382 /*
1383 * Determine the vlan id that the frame belongs to.
1384 */
1385 ehp = (struct ether_header *)bp->b_rptr;
1386 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id);
1387
1388 /*
1389 * If MAC layer switching in place, then we
1390 * need to untag only if the tagged packet has
1391 * vlan-id same as the pvid.
1392 */
1393 if (vswp->mac_cl_switching == B_TRUE) {
1394
1395 /* only tagged packets expected here */
1396 ASSERT(is_tagged == B_TRUE);
1397 if (vlan_id == pvid) {
1398 bp = vnet_vlan_remove_tag(bp);
1399 if (bp == NULL) {
1400 /* packet dropped */
1401 continue;
1402 }
1403 }
1404 } else { /* No MAC layer switching */
1405
1406 /*
1407 * Check the frame header if tag/untag is needed.
1408 */
1409 if (is_tagged == B_FALSE) {
1410 /*
1411 * Untagged frame. We shouldn't have an
1412 * untagged packet at this point, unless
1413 * the destination's vlan id is
1414 * default-vlan-id; if it is not the
1415 * default-vlan-id, we drop the packet.
1416 */
1417 if (vlan_id != vswp->default_vlan_id) {
1418 /* drop the packet */
1419 freemsg(bp);
1420 continue;
1421 }
1422 } else { /* Tagged */
1423 /*
1424 * Tagged frame, untag if it's the
1425 * destination's pvid.
1426 */
1427 if (vlan_id == pvid) {
1428
1429 bp = vnet_vlan_remove_tag(bp);
1430 if (bp == NULL) {
1431 /* packet dropped */
1432 continue;
1433 }
1434 } else {
1435
1436 /*
1437 * Check if the destination is in the
1438 * same vlan.
1439 */
1440 rv = vsw_vlan_lookup(vlan_hashp,
1441 vlan_id);
1442 if (rv == B_FALSE) {
1443 /* drop the packet */
1444 freemsg(bp);
1445 continue;
1446 }
1447 }
1448
1449 }
1450 }
1451
1452 /* build a chain of processed packets */
1453 if (bph == NULL) {
1454 bph = bpt = bp;
1455 } else {
1456 bpt->b_next = bp;
1457 bpt = bp;
1458 }
1459 count++;
1460 }
1461
1462 *np = bph;
1463 *npt = bpt;
1464 return (count);
1465 }
1466
1467 /*
1468 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame,
1469 * then the vlan-id is available in the tag; otherwise, its vlan id is
1470 * implicitly obtained based on the caller (destination of the frame:
1471 * VSW_VNETPORT or VSW_LOCALDEV).
1472 * The vlan id determined is returned in vidp.
1473 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1474 */
1475 boolean_t
vsw_frame_lookup_vid(void * arg,int caller,struct ether_header * ehp,uint16_t * vidp)1476 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
1477 uint16_t *vidp)
1478 {
1479 struct ether_vlan_header *evhp;
1480 vsw_t *vswp;
1481 vsw_port_t *portp;
1482
1483 /* If it's a tagged frame, get the vid from vlan header */
1484 if (ehp->ether_type == ETHERTYPE_VLAN) {
1485
1486 evhp = (struct ether_vlan_header *)ehp;
1487 *vidp = VLAN_ID(ntohs(evhp->ether_tci));
1488 return (B_TRUE);
1489 }
1490
1491 /* Untagged frame; determine vlan id based on caller */
1492 switch (caller) {
1493
1494 case VSW_VNETPORT:
1495 /*
1496 * packet destined to a vnet; vlan-id is pvid of vnet-port.
1497 */
1498 portp = (vsw_port_t *)arg;
1499 *vidp = portp->pvid;
1500 break;
1501
1502 case VSW_LOCALDEV:
1503
1504 /*
1505 * packet destined to vsw interface;
1506 * vlan-id is port-vlan-id of vsw device.
1507 */
1508 vswp = (vsw_t *)arg;
1509 *vidp = vswp->pvid;
1510 break;
1511 }
1512
1513 return (B_FALSE);
1514 }
1515
1516 /*
1517 * Add or remove multicast address(es).
1518 *
1519 * Returns 0 on success, 1 on failure.
1520 */
1521 int
vsw_add_rem_mcst(vnet_mcast_msg_t * mcst_pkt,vsw_port_t * port)1522 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
1523 {
1524 mcst_addr_t *mcst_p = NULL;
1525 vsw_t *vswp = port->p_vswp;
1526 uint64_t addr = 0x0;
1527 int i;
1528
1529 D1(vswp, "%s: enter", __func__);
1530
1531 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
1532
1533 for (i = 0; i < mcst_pkt->count; i++) {
1534 /*
1535 * Convert address into form that can be used
1536 * as hash table key.
1537 */
1538 KEY_HASH(addr, &(mcst_pkt->mca[i]));
1539
1540 /*
1541 * Add or delete the specified address/port combination.
1542 */
1543 if (mcst_pkt->set == 0x1) {
1544 D3(vswp, "%s: adding multicast address 0x%llx for "
1545 "port %ld", __func__, addr, port->p_instance);
1546 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1547 /*
1548 * Update the list of multicast
1549 * addresses contained within the
1550 * port structure to include this new
1551 * one.
1552 */
1553 mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
1554 KM_NOSLEEP);
1555 if (mcst_p == NULL) {
1556 DERR(vswp, "%s: unable to alloc mem",
1557 __func__);
1558 (void) vsw_del_mcst(vswp,
1559 VSW_VNETPORT, addr, port);
1560 return (1);
1561 }
1562
1563 mcst_p->nextp = NULL;
1564 mcst_p->addr = addr;
1565 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
1566
1567 /*
1568 * Program the address into HW. If the addr
1569 * has already been programmed then the MAC
1570 * just increments a ref counter (which is
1571 * used when the address is being deleted)
1572 */
1573 if (vsw_mac_multicast_add(vswp, port, mcst_p,
1574 VSW_VNETPORT)) {
1575 (void) vsw_del_mcst(vswp,
1576 VSW_VNETPORT, addr, port);
1577 kmem_free(mcst_p, sizeof (*mcst_p));
1578 return (1);
1579 }
1580
1581 mutex_enter(&port->mca_lock);
1582 mcst_p->nextp = port->mcap;
1583 port->mcap = mcst_p;
1584 mutex_exit(&port->mca_lock);
1585
1586 } else {
1587 DERR(vswp, "%s: error adding multicast "
1588 "address 0x%llx for port %ld",
1589 __func__, addr, port->p_instance);
1590 return (1);
1591 }
1592 } else {
1593 /*
1594 * Delete an entry from the multicast hash
1595 * table and update the address list
1596 * appropriately.
1597 */
1598 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1599 D3(vswp, "%s: deleting multicast address "
1600 "0x%llx for port %ld", __func__, addr,
1601 port->p_instance);
1602
1603 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
1604 ASSERT(mcst_p != NULL);
1605
1606 /*
1607 * Remove the address from HW. The address
1608 * will actually only be removed once the ref
1609 * count within the MAC layer has dropped to
1610 * zero. I.e. we can safely call this fn even
1611 * if other ports are interested in this
1612 * address.
1613 */
1614 vsw_mac_multicast_remove(vswp, port, mcst_p,
1615 VSW_VNETPORT);
1616 kmem_free(mcst_p, sizeof (*mcst_p));
1617
1618 } else {
1619 DERR(vswp, "%s: error deleting multicast "
1620 "addr 0x%llx for port %ld",
1621 __func__, addr, port->p_instance);
1622 return (1);
1623 }
1624 }
1625 }
1626 D1(vswp, "%s: exit", __func__);
1627 return (0);
1628 }
1629
1630 /*
1631 * Add a new multicast entry.
1632 *
1633 * Search hash table based on address. If match found then
1634 * update associated val (which is chain of ports), otherwise
1635 * create new key/val (addr/port) pair and insert into table.
1636 */
1637 int
vsw_add_mcst(vsw_t * vswp,uint8_t devtype,uint64_t addr,void * arg)1638 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1639 {
1640 int dup = 0;
1641 int rv = 0;
1642 mfdb_ent_t *ment = NULL;
1643 mfdb_ent_t *tmp_ent = NULL;
1644 mfdb_ent_t *new_ent = NULL;
1645 void *tgt = NULL;
1646
1647 if (devtype == VSW_VNETPORT) {
1648 /*
1649 * Being invoked from a vnet.
1650 */
1651 ASSERT(arg != NULL);
1652 tgt = arg;
1653 D2(NULL, "%s: port %d : address 0x%llx", __func__,
1654 ((vsw_port_t *)arg)->p_instance, addr);
1655 } else {
1656 /*
1657 * We are being invoked via the m_multicst mac entry
1658 * point.
1659 */
1660 D2(NULL, "%s: address 0x%llx", __func__, addr);
1661 tgt = (void *)vswp;
1662 }
1663
1664 WRITE_ENTER(&vswp->mfdbrw);
1665 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1666 (mod_hash_val_t *)&ment) != 0) {
1667
1668 /* address not currently in table */
1669 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1670 ment->d_addr = (void *)tgt;
1671 ment->d_type = devtype;
1672 ment->nextp = NULL;
1673
1674 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
1675 (mod_hash_val_t)ment) != 0) {
1676 DERR(vswp, "%s: hash table insertion failed", __func__);
1677 kmem_free(ment, sizeof (mfdb_ent_t));
1678 rv = 1;
1679 } else {
1680 D2(vswp, "%s: added initial entry for 0x%llx to "
1681 "table", __func__, addr);
1682 }
1683 } else {
1684 /*
1685 * Address in table. Check to see if specified port
1686 * is already associated with the address. If not add
1687 * it now.
1688 */
1689 tmp_ent = ment;
1690 while (tmp_ent != NULL) {
1691 if (tmp_ent->d_addr == (void *)tgt) {
1692 if (devtype == VSW_VNETPORT) {
1693 DERR(vswp, "%s: duplicate port entry "
1694 "found for portid %ld and key "
1695 "0x%llx", __func__,
1696 ((vsw_port_t *)arg)->p_instance,
1697 addr);
1698 } else {
1699 DERR(vswp, "%s: duplicate entry found"
1700 "for key 0x%llx", __func__, addr);
1701 }
1702 rv = 1;
1703 dup = 1;
1704 break;
1705 }
1706 tmp_ent = tmp_ent->nextp;
1707 }
1708
1709 /*
1710 * Port not on list so add it to end now.
1711 */
1712 if (0 == dup) {
1713 D2(vswp, "%s: added entry for 0x%llx to table",
1714 __func__, addr);
1715 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1716 new_ent->d_addr = (void *)tgt;
1717 new_ent->d_type = devtype;
1718 new_ent->nextp = NULL;
1719
1720 tmp_ent = ment;
1721 while (tmp_ent->nextp != NULL)
1722 tmp_ent = tmp_ent->nextp;
1723
1724 tmp_ent->nextp = new_ent;
1725 }
1726 }
1727
1728 RW_EXIT(&vswp->mfdbrw);
1729 return (rv);
1730 }
1731
1732 /*
1733 * Remove a multicast entry from the hashtable.
1734 *
1735 * Search hash table based on address. If match found, scan
1736 * list of ports associated with address. If specified port
1737 * found remove it from list.
1738 */
1739 int
vsw_del_mcst(vsw_t * vswp,uint8_t devtype,uint64_t addr,void * arg)1740 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1741 {
1742 mfdb_ent_t *ment = NULL;
1743 mfdb_ent_t *curr_p, *prev_p;
1744 void *tgt = NULL;
1745
1746 D1(vswp, "%s: enter", __func__);
1747
1748 if (devtype == VSW_VNETPORT) {
1749 tgt = (vsw_port_t *)arg;
1750 D2(vswp, "%s: removing port %d from mFDB for address"
1751 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
1752 } else {
1753 D2(vswp, "%s: removing entry", __func__);
1754 tgt = (void *)vswp;
1755 }
1756
1757 WRITE_ENTER(&vswp->mfdbrw);
1758 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1759 (mod_hash_val_t *)&ment) != 0) {
1760 D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
1761 RW_EXIT(&vswp->mfdbrw);
1762 return (1);
1763 }
1764
1765 prev_p = curr_p = ment;
1766
1767 while (curr_p != NULL) {
1768 if (curr_p->d_addr == (void *)tgt) {
1769 if (devtype == VSW_VNETPORT) {
1770 D2(vswp, "%s: port %d found", __func__,
1771 ((vsw_port_t *)tgt)->p_instance);
1772 } else {
1773 D2(vswp, "%s: instance found", __func__);
1774 }
1775
1776 if (prev_p == curr_p) {
1777 /*
1778 * head of list, if no other element is in
1779 * list then destroy this entry, otherwise
1780 * just replace it with updated value.
1781 */
1782 ment = curr_p->nextp;
1783 if (ment == NULL) {
1784 (void) mod_hash_destroy(vswp->mfdb,
1785 (mod_hash_val_t)addr);
1786 } else {
1787 (void) mod_hash_replace(vswp->mfdb,
1788 (mod_hash_key_t)addr,
1789 (mod_hash_val_t)ment);
1790 }
1791 } else {
1792 /*
1793 * Not head of list, no need to do
1794 * replacement, just adjust list pointers.
1795 */
1796 prev_p->nextp = curr_p->nextp;
1797 }
1798 break;
1799 }
1800
1801 prev_p = curr_p;
1802 curr_p = curr_p->nextp;
1803 }
1804
1805 RW_EXIT(&vswp->mfdbrw);
1806
1807 D1(vswp, "%s: exit", __func__);
1808
1809 if (curr_p == NULL)
1810 return (1);
1811 kmem_free(curr_p, sizeof (mfdb_ent_t));
1812 return (0);
1813 }
1814
1815 /*
1816 * Port is being deleted, but has registered an interest in one
1817 * or more multicast groups. Using the list of addresses maintained
1818 * within the port structure find the appropriate entry in the hash
1819 * table and remove this port from the list of interested ports.
1820 */
1821 void
vsw_del_mcst_port(vsw_port_t * port)1822 vsw_del_mcst_port(vsw_port_t *port)
1823 {
1824 mcst_addr_t *mcap = NULL;
1825 vsw_t *vswp = port->p_vswp;
1826
1827 D1(vswp, "%s: enter", __func__);
1828
1829 mutex_enter(&port->mca_lock);
1830
1831 while ((mcap = port->mcap) != NULL) {
1832
1833 port->mcap = mcap->nextp;
1834
1835 mutex_exit(&port->mca_lock);
1836
1837 (void) vsw_del_mcst(vswp, VSW_VNETPORT,
1838 mcap->addr, port);
1839
1840 /*
1841 * Remove the address from HW. The address
1842 * will actually only be removed once the ref
1843 * count within the MAC layer has dropped to
1844 * zero. I.e. we can safely call this fn even
1845 * if other ports are interested in this
1846 * address.
1847 */
1848 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT);
1849 kmem_free(mcap, sizeof (*mcap));
1850
1851 mutex_enter(&port->mca_lock);
1852
1853 }
1854
1855 mutex_exit(&port->mca_lock);
1856
1857 D1(vswp, "%s: exit", __func__);
1858 }
1859
1860 /*
1861 * This vsw instance is detaching, but has registered an interest in one
1862 * or more multicast groups. Using the list of addresses maintained
1863 * within the vsw structure find the appropriate entry in the hash
1864 * table and remove this instance from the list of interested ports.
1865 */
1866 void
vsw_del_mcst_vsw(vsw_t * vswp)1867 vsw_del_mcst_vsw(vsw_t *vswp)
1868 {
1869 mcst_addr_t *next_p = NULL;
1870
1871 D1(vswp, "%s: enter", __func__);
1872
1873 mutex_enter(&vswp->mca_lock);
1874
1875 while (vswp->mcap != NULL) {
1876 DERR(vswp, "%s: deleting addr 0x%llx",
1877 __func__, vswp->mcap->addr);
1878 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
1879
1880 next_p = vswp->mcap->nextp;
1881 kmem_free(vswp->mcap, sizeof (mcst_addr_t));
1882 vswp->mcap = next_p;
1883 }
1884
1885 vswp->mcap = NULL;
1886 mutex_exit(&vswp->mca_lock);
1887
1888 D1(vswp, "%s: exit", __func__);
1889 }
1890
1891 mblk_t *
vsw_get_same_dest_list(struct ether_header * ehp,mblk_t ** mpp)1892 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp)
1893 {
1894 mblk_t *bp;
1895 mblk_t *nbp;
1896 mblk_t *head = NULL;
1897 mblk_t *tail = NULL;
1898 mblk_t *prev = NULL;
1899 struct ether_header *behp;
1900
1901 /* process the chain of packets */
1902 bp = *mpp;
1903 while (bp) {
1904 nbp = bp->b_next;
1905 behp = (struct ether_header *)bp->b_rptr;
1906 bp->b_prev = NULL;
1907 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
1908 if (prev == NULL) {
1909 *mpp = nbp;
1910 } else {
1911 prev->b_next = nbp;
1912 }
1913 bp->b_next = NULL;
1914 if (head == NULL) {
1915 head = tail = bp;
1916 } else {
1917 tail->b_next = bp;
1918 tail = bp;
1919 }
1920 } else {
1921 prev = bp;
1922 }
1923 bp = nbp;
1924 }
1925 return (head);
1926 }
1927
1928 static mblk_t *
vsw_dupmsgchain(mblk_t * mp)1929 vsw_dupmsgchain(mblk_t *mp)
1930 {
1931 mblk_t *nmp = NULL;
1932 mblk_t **nmpp = &nmp;
1933
1934 for (; mp != NULL; mp = mp->b_next) {
1935 if ((*nmpp = dupmsg(mp)) == NULL) {
1936 freemsgchain(nmp);
1937 return (NULL);
1938 }
1939
1940 nmpp = &((*nmpp)->b_next);
1941 }
1942
1943 return (nmp);
1944 }
1945