1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/callb.h>
31 #include <sys/stream.h>
32 #include <sys/kmem.h>
33 #include <sys/conf.h>
34 #include <sys/devops.h>
35 #include <sys/ksynch.h>
36 #include <sys/stat.h>
37 #include <sys/modctl.h>
38 #include <sys/modhash.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/dlpi.h>
42 #include <net/if.h>
43 #include <sys/mac_provider.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_client_priv.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/strsun.h>
50 #include <sys/note.h>
51 #include <sys/atomic.h>
52 #include <sys/vnet.h>
53 #include <sys/vlan.h>
54 #include <sys/vnet_mailbox.h>
55 #include <sys/vnet_common.h>
56 #include <sys/dds.h>
57 #include <sys/strsubr.h>
58 #include <sys/taskq.h>
59
60 /*
61 * Function prototypes.
62 */
63
64 /* DDI entrypoints */
65 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
66 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
67 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
68
69 /* MAC entrypoints */
70 static int vnet_m_stat(void *, uint_t, uint64_t *);
71 static int vnet_m_start(void *);
72 static void vnet_m_stop(void *);
73 static int vnet_m_promisc(void *, boolean_t);
74 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
75 static int vnet_m_unicst(void *, const uint8_t *);
76 mblk_t *vnet_m_tx(void *, mblk_t *);
77 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
78 #ifdef VNET_IOC_DEBUG
79 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
80 #endif
81 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
82 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
83 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
84 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
85 mac_group_info_t *infop, mac_group_handle_t handle);
86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
89 uint64_t *val);
90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
93 uint64_t *val);
94 static int vnet_ring_enable_intr(void *arg);
95 static int vnet_ring_disable_intr(void *arg);
96 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
97 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
98 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
99
100 /* vnet internal functions */
101 static int vnet_unattach(vnet_t *vnetp);
102 static void vnet_ring_grp_init(vnet_t *vnetp);
103 static void vnet_ring_grp_uninit(vnet_t *vnetp);
104 static int vnet_mac_register(vnet_t *);
105 static int vnet_read_mac_address(vnet_t *vnetp);
106 static int vnet_bind_vgenring(vnet_res_t *vresp);
107 static void vnet_unbind_vgenring(vnet_res_t *vresp);
108 static int vnet_bind_hwrings(vnet_t *vnetp);
109 static void vnet_unbind_hwrings(vnet_t *vnetp);
110 static int vnet_bind_rings(vnet_res_t *vresp);
111 static void vnet_unbind_rings(vnet_res_t *vresp);
112 static int vnet_hio_stat(void *, uint_t, uint64_t *);
113 static int vnet_hio_start(void *);
114 static void vnet_hio_stop(void *);
115 mblk_t *vnet_hio_tx(void *, mblk_t *);
116
117 /* Forwarding database (FDB) routines */
118 static void vnet_fdb_create(vnet_t *vnetp);
119 static void vnet_fdb_destroy(vnet_t *vnetp);
120 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
121 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
122 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
123 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
124
125 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
126 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
127 static void vnet_tx_update(vio_net_handle_t vrh);
128 static void vnet_res_start_task(void *arg);
129 static void vnet_start_resources(vnet_t *vnetp);
130 static void vnet_stop_resources(vnet_t *vnetp);
131 static void vnet_dispatch_res_task(vnet_t *vnetp);
132 static void vnet_res_start_task(void *arg);
133 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
134 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
135 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
136 static void vnet_tx_notify_thread(void *);
137
138 /* Exported to vnet_gen */
139 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
140 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
141 void vnet_dds_cleanup_hio(vnet_t *vnetp);
142
143 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
144 vnet_res_t *vresp);
145 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
146 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
147 static void vnet_hio_destroy_kstats(kstat_t *ksp);
148
149 /* Exported to to vnet_dds */
150 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
151 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
152 void vnet_hio_mac_cleanup(vnet_t *vnetp);
153
154 /* Externs that are imported from vnet_gen */
155 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
156 const uint8_t *macaddr, void **vgenhdl);
157 extern int vgen_init_mdeg(void *arg);
158 extern void vgen_uninit(void *arg);
159 extern int vgen_dds_tx(void *arg, void *dmsg);
160 extern int vgen_enable_intr(void *arg);
161 extern int vgen_disable_intr(void *arg);
162 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
163
164 /* Externs that are imported from vnet_dds */
165 extern void vdds_mod_init(void);
166 extern void vdds_mod_fini(void);
167 extern int vdds_init(vnet_t *vnetp);
168 extern void vdds_cleanup(vnet_t *vnetp);
169 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
170 extern void vdds_cleanup_hybrid_res(void *arg);
171 extern void vdds_cleanup_hio(vnet_t *vnetp);
172
173 extern pri_t minclsyspri;
174
175 #define DRV_NAME "vnet"
176 #define VNET_FDBE_REFHOLD(p) \
177 { \
178 atomic_inc_32(&(p)->refcnt); \
179 ASSERT((p)->refcnt != 0); \
180 }
181
182 #define VNET_FDBE_REFRELE(p) \
183 { \
184 ASSERT((p)->refcnt != 0); \
185 atomic_dec_32(&(p)->refcnt); \
186 }
187
188 #ifdef VNET_IOC_DEBUG
189 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB)
190 #else
191 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB)
192 #endif
193
194 static mac_callbacks_t vnet_m_callbacks = {
195 VNET_M_CALLBACK_FLAGS,
196 vnet_m_stat,
197 vnet_m_start,
198 vnet_m_stop,
199 vnet_m_promisc,
200 vnet_m_multicst,
201 NULL, /* m_unicst entry must be NULL while rx rings are exposed */
202 NULL, /* m_tx entry must be NULL while tx rings are exposed */
203 NULL,
204 vnet_m_ioctl,
205 vnet_m_capab,
206 NULL
207 };
208
209 static mac_callbacks_t vnet_hio_res_callbacks = {
210 0,
211 vnet_hio_stat,
212 vnet_hio_start,
213 vnet_hio_stop,
214 NULL,
215 NULL,
216 NULL,
217 vnet_hio_tx,
218 NULL,
219 NULL,
220 NULL
221 };
222
223 /*
224 * Linked list of "vnet_t" structures - one per instance.
225 */
226 static vnet_t *vnet_headp = NULL;
227 static krwlock_t vnet_rw;
228
229 /* Tunables */
230 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
231
232 /*
233 * Configure tx serialization in mac layer for the vnet device. This tunable
234 * should be enabled to improve performance only if HybridIO is configured for
235 * the vnet device.
236 */
237 boolean_t vnet_mac_tx_serialize = B_FALSE;
238
239 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
240 boolean_t vnet_mac_rx_queuing = B_TRUE;
241
242 /*
243 * Set this to non-zero to enable additional internal receive buffer pools
244 * based on the MTU of the device for better performance at the cost of more
245 * memory consumption. This is turned off by default, to use allocb(9F) for
246 * receive buffer allocations of sizes > 2K.
247 */
248 boolean_t vnet_jumbo_rxpools = B_FALSE;
249
250 /* # of chains in fdb hash table */
251 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH;
252
253 /* Internal tunables */
254 uint32_t vnet_ethermtu = 1500; /* mtu of the device */
255
256 /*
257 * Default vlan id. This is only used internally when the "default-vlan-id"
258 * property is not present in the MD device node. Therefore, this should not be
259 * used as a tunable; if this value is changed, the corresponding variable
260 * should be updated to the same value in vsw and also other vnets connected to
261 * the same vsw.
262 */
263 uint16_t vnet_default_vlan_id = 1;
264
265 /* delay in usec to wait for all references on a fdb entry to be dropped */
266 uint32_t vnet_fdbe_refcnt_delay = 10;
267
268 static struct ether_addr etherbroadcastaddr = {
269 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
270 };
271
272 /* mac_open() retry delay in usec */
273 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */
274
275 /* max # of mac_open() retries */
276 uint32_t vnet_mac_open_retries = 100;
277
278 /*
279 * Property names
280 */
281 static char macaddr_propname[] = "local-mac-address";
282
283 /*
284 * This is the string displayed by modinfo(1m).
285 */
286 static char vnet_ident[] = "vnet driver";
287 extern struct mod_ops mod_driverops;
288 static struct cb_ops cb_vnetops = {
289 nulldev, /* cb_open */
290 nulldev, /* cb_close */
291 nodev, /* cb_strategy */
292 nodev, /* cb_print */
293 nodev, /* cb_dump */
294 nodev, /* cb_read */
295 nodev, /* cb_write */
296 nodev, /* cb_ioctl */
297 nodev, /* cb_devmap */
298 nodev, /* cb_mmap */
299 nodev, /* cb_segmap */
300 nochpoll, /* cb_chpoll */
301 ddi_prop_op, /* cb_prop_op */
302 NULL, /* cb_stream */
303 (int)(D_MP) /* cb_flag */
304 };
305
306 static struct dev_ops vnetops = {
307 DEVO_REV, /* devo_rev */
308 0, /* devo_refcnt */
309 NULL, /* devo_getinfo */
310 nulldev, /* devo_identify */
311 nulldev, /* devo_probe */
312 vnetattach, /* devo_attach */
313 vnetdetach, /* devo_detach */
314 nodev, /* devo_reset */
315 &cb_vnetops, /* devo_cb_ops */
316 (struct bus_ops *)NULL, /* devo_bus_ops */
317 NULL, /* devo_power */
318 ddi_quiesce_not_supported, /* devo_quiesce */
319 };
320
321 static struct modldrv modldrv = {
322 &mod_driverops, /* Type of module. This one is a driver */
323 vnet_ident, /* ID string */
324 &vnetops /* driver specific ops */
325 };
326
327 static struct modlinkage modlinkage = {
328 MODREV_1, (void *)&modldrv, NULL
329 };
330
331 #ifdef DEBUG
332
333 #define DEBUG_PRINTF debug_printf
334
335 /*
336 * Print debug messages - set to 0xf to enable all msgs
337 */
338 int vnet_dbglevel = 0x8;
339
340 static void
debug_printf(const char * fname,void * arg,const char * fmt,...)341 debug_printf(const char *fname, void *arg, const char *fmt, ...)
342 {
343 char buf[512];
344 va_list ap;
345 vnet_t *vnetp = (vnet_t *)arg;
346 char *bufp = buf;
347
348 if (vnetp == NULL) {
349 (void) sprintf(bufp, "%s: ", fname);
350 bufp += strlen(bufp);
351 } else {
352 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
353 bufp += strlen(bufp);
354 }
355 va_start(ap, fmt);
356 (void) vsprintf(bufp, fmt, ap);
357 va_end(ap);
358 cmn_err(CE_CONT, "%s\n", buf);
359 }
360
361 #endif
362
363 /* _init(9E): initialize the loadable module */
364 int
_init(void)365 _init(void)
366 {
367 int status;
368
369 DBG1(NULL, "enter\n");
370
371 mac_init_ops(&vnetops, "vnet");
372 status = mod_install(&modlinkage);
373 if (status != 0) {
374 mac_fini_ops(&vnetops);
375 }
376 vdds_mod_init();
377 DBG1(NULL, "exit(%d)\n", status);
378 return (status);
379 }
380
381 /* _fini(9E): prepare the module for unloading. */
382 int
_fini(void)383 _fini(void)
384 {
385 int status;
386
387 DBG1(NULL, "enter\n");
388
389 status = mod_remove(&modlinkage);
390 if (status != 0)
391 return (status);
392 mac_fini_ops(&vnetops);
393 vdds_mod_fini();
394
395 DBG1(NULL, "exit(%d)\n", status);
396 return (status);
397 }
398
399 /* _info(9E): return information about the loadable module */
400 int
_info(struct modinfo * modinfop)401 _info(struct modinfo *modinfop)
402 {
403 return (mod_info(&modlinkage, modinfop));
404 }
405
406 /*
407 * attach(9E): attach a device to the system.
408 * called once for each instance of the device on the system.
409 */
410 static int
vnetattach(dev_info_t * dip,ddi_attach_cmd_t cmd)411 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
412 {
413 vnet_t *vnetp;
414 int status;
415 int instance;
416 uint64_t reg;
417 char qname[TASKQ_NAMELEN];
418 vnet_attach_progress_t attach_progress;
419
420 attach_progress = AST_init;
421
422 switch (cmd) {
423 case DDI_ATTACH:
424 break;
425 case DDI_RESUME:
426 case DDI_PM_RESUME:
427 default:
428 goto vnet_attach_fail;
429 }
430
431 instance = ddi_get_instance(dip);
432 DBG1(NULL, "instance(%d) enter\n", instance);
433
434 /* allocate vnet_t and mac_t structures */
435 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
436 vnetp->dip = dip;
437 vnetp->instance = instance;
438 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
439 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
440 attach_progress |= AST_vnet_alloc;
441
442 vnet_ring_grp_init(vnetp);
443 attach_progress |= AST_ring_init;
444
445 status = vdds_init(vnetp);
446 if (status != 0) {
447 goto vnet_attach_fail;
448 }
449 attach_progress |= AST_vdds_init;
450
451 /* setup links to vnet_t from both devinfo and mac_t */
452 ddi_set_driver_private(dip, (caddr_t)vnetp);
453
454 /* read the mac address */
455 status = vnet_read_mac_address(vnetp);
456 if (status != DDI_SUCCESS) {
457 goto vnet_attach_fail;
458 }
459 attach_progress |= AST_read_macaddr;
460
461 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
462 DDI_PROP_DONTPASS, "reg", -1);
463 if (reg == -1) {
464 goto vnet_attach_fail;
465 }
466 vnetp->reg = reg;
467
468 vnet_fdb_create(vnetp);
469 attach_progress |= AST_fdbh_alloc;
470
471 (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
472 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
473 TASKQ_DEFAULTPRI, 0)) == NULL) {
474 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
475 instance);
476 goto vnet_attach_fail;
477 }
478 attach_progress |= AST_taskq_create;
479
480 /* add to the list of vnet devices */
481 WRITE_ENTER(&vnet_rw);
482 vnetp->nextp = vnet_headp;
483 vnet_headp = vnetp;
484 RW_EXIT(&vnet_rw);
485
486 attach_progress |= AST_vnet_list;
487
488 /*
489 * Initialize the generic vnet plugin which provides communication via
490 * sun4v LDC (logical domain channel) based resources. This involves 2
491 * steps; first, vgen_init() is invoked to read the various properties
492 * of the vnet device from its MD node (including its mtu which is
493 * needed to mac_register()) and obtain a handle to the vgen layer.
494 * After mac_register() is done and we have a mac handle, we then
495 * invoke vgen_init_mdeg() which registers with the the MD event
496 * generator (mdeg) framework to allow LDC resource notifications.
497 * Note: this sequence also allows us to report the correct default #
498 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
499 * in the context of mac_register(); and avoids conflicting with
500 * dynamic pseudo rx rings which get added/removed as a result of mdeg
501 * events in vgen.
502 */
503 status = vgen_init(vnetp, reg, vnetp->dip,
504 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
505 if (status != DDI_SUCCESS) {
506 DERR(vnetp, "vgen_init() failed\n");
507 goto vnet_attach_fail;
508 }
509 attach_progress |= AST_vgen_init;
510
511 status = vnet_mac_register(vnetp);
512 if (status != DDI_SUCCESS) {
513 goto vnet_attach_fail;
514 }
515 vnetp->link_state = LINK_STATE_UNKNOWN;
516 attach_progress |= AST_macreg;
517
518 status = vgen_init_mdeg(vnetp->vgenhdl);
519 if (status != DDI_SUCCESS) {
520 goto vnet_attach_fail;
521 }
522 attach_progress |= AST_init_mdeg;
523
524 vnetp->attach_progress = attach_progress;
525
526 DBG1(NULL, "instance(%d) exit\n", instance);
527 return (DDI_SUCCESS);
528
529 vnet_attach_fail:
530 vnetp->attach_progress = attach_progress;
531 status = vnet_unattach(vnetp);
532 ASSERT(status == 0);
533 return (DDI_FAILURE);
534 }
535
536 /*
537 * detach(9E): detach a device from the system.
538 */
539 static int
vnetdetach(dev_info_t * dip,ddi_detach_cmd_t cmd)540 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
541 {
542 vnet_t *vnetp;
543 int instance;
544
545 instance = ddi_get_instance(dip);
546 DBG1(NULL, "instance(%d) enter\n", instance);
547
548 vnetp = ddi_get_driver_private(dip);
549 if (vnetp == NULL) {
550 goto vnet_detach_fail;
551 }
552
553 switch (cmd) {
554 case DDI_DETACH:
555 break;
556 case DDI_SUSPEND:
557 case DDI_PM_SUSPEND:
558 default:
559 goto vnet_detach_fail;
560 }
561
562 if (vnet_unattach(vnetp) != 0) {
563 goto vnet_detach_fail;
564 }
565
566 return (DDI_SUCCESS);
567
568 vnet_detach_fail:
569 return (DDI_FAILURE);
570 }
571
572 /*
573 * Common routine to handle vnetattach() failure and vnetdetach(). Note that
574 * the only reason this function could fail is if mac_unregister() fails.
575 * Otherwise, this function must ensure that all resources are freed and return
576 * success.
577 */
578 static int
vnet_unattach(vnet_t * vnetp)579 vnet_unattach(vnet_t *vnetp)
580 {
581 vnet_attach_progress_t attach_progress;
582
583 attach_progress = vnetp->attach_progress;
584
585 /*
586 * Disable the mac device in the gldv3 subsystem. This can fail, in
587 * particular if there are still any open references to this mac
588 * device; in which case we just return failure without continuing to
589 * detach further.
590 * If it succeeds, we then invoke vgen_uninit() which should unregister
591 * any pseudo rings registered with the mac layer. Note we keep the
592 * AST_macreg flag on, so we can unregister with the mac layer at
593 * the end of this routine.
594 */
595 if (attach_progress & AST_macreg) {
596 if (mac_disable(vnetp->mh) != 0) {
597 return (1);
598 }
599 }
600
601 /*
602 * Now that we have disabled the device, we must finish all other steps
603 * and successfully return from this function; otherwise we will end up
604 * leaving the device in a broken/unusable state.
605 *
606 * First, release any hybrid resources assigned to this vnet device.
607 */
608 if (attach_progress & AST_vdds_init) {
609 vdds_cleanup(vnetp);
610 attach_progress &= ~AST_vdds_init;
611 }
612
613 /*
614 * Uninit vgen. This stops further mdeg callbacks to this vnet
615 * device and/or its ports; and detaches any existing ports.
616 */
617 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
618 vgen_uninit(vnetp->vgenhdl);
619 attach_progress &= ~AST_vgen_init;
620 attach_progress &= ~AST_init_mdeg;
621 }
622
623 /* Destroy the taskq. */
624 if (attach_progress & AST_taskq_create) {
625 ddi_taskq_destroy(vnetp->taskqp);
626 attach_progress &= ~AST_taskq_create;
627 }
628
629 /* Destroy fdb. */
630 if (attach_progress & AST_fdbh_alloc) {
631 vnet_fdb_destroy(vnetp);
632 attach_progress &= ~AST_fdbh_alloc;
633 }
634
635 /* Remove from the device list */
636 if (attach_progress & AST_vnet_list) {
637 vnet_t **vnetpp;
638 /* unlink from instance(vnet_t) list */
639 WRITE_ENTER(&vnet_rw);
640 for (vnetpp = &vnet_headp; *vnetpp;
641 vnetpp = &(*vnetpp)->nextp) {
642 if (*vnetpp == vnetp) {
643 *vnetpp = vnetp->nextp;
644 break;
645 }
646 }
647 RW_EXIT(&vnet_rw);
648 attach_progress &= ~AST_vnet_list;
649 }
650
651 if (attach_progress & AST_ring_init) {
652 vnet_ring_grp_uninit(vnetp);
653 attach_progress &= ~AST_ring_init;
654 }
655
656 if (attach_progress & AST_macreg) {
657 VERIFY(mac_unregister(vnetp->mh) == 0);
658 vnetp->mh = NULL;
659 attach_progress &= ~AST_macreg;
660 }
661
662 if (attach_progress & AST_vnet_alloc) {
663 rw_destroy(&vnetp->vrwlock);
664 rw_destroy(&vnetp->vsw_fp_rw);
665 attach_progress &= ~AST_vnet_list;
666 KMEM_FREE(vnetp);
667 }
668
669 return (0);
670 }
671
672 /* enable the device for transmit/receive */
673 static int
vnet_m_start(void * arg)674 vnet_m_start(void *arg)
675 {
676 vnet_t *vnetp = arg;
677
678 DBG1(vnetp, "enter\n");
679
680 WRITE_ENTER(&vnetp->vrwlock);
681 vnetp->flags |= VNET_STARTED;
682 vnet_start_resources(vnetp);
683 RW_EXIT(&vnetp->vrwlock);
684
685 DBG1(vnetp, "exit\n");
686 return (VNET_SUCCESS);
687
688 }
689
690 /* stop transmit/receive for the device */
691 static void
vnet_m_stop(void * arg)692 vnet_m_stop(void *arg)
693 {
694 vnet_t *vnetp = arg;
695
696 DBG1(vnetp, "enter\n");
697
698 WRITE_ENTER(&vnetp->vrwlock);
699 if (vnetp->flags & VNET_STARTED) {
700 /*
701 * Set the flags appropriately; this should prevent starting of
702 * any new resources that are added(see vnet_res_start_task()),
703 * while we release the vrwlock in vnet_stop_resources() before
704 * stopping each resource.
705 */
706 vnetp->flags &= ~VNET_STARTED;
707 vnetp->flags |= VNET_STOPPING;
708 vnet_stop_resources(vnetp);
709 vnetp->flags &= ~VNET_STOPPING;
710 }
711 RW_EXIT(&vnetp->vrwlock);
712
713 DBG1(vnetp, "exit\n");
714 }
715
716 /* set the unicast mac address of the device */
717 static int
vnet_m_unicst(void * arg,const uint8_t * macaddr)718 vnet_m_unicst(void *arg, const uint8_t *macaddr)
719 {
720 _NOTE(ARGUNUSED(macaddr))
721
722 vnet_t *vnetp = arg;
723
724 DBG1(vnetp, "enter\n");
725 /*
726 * NOTE: setting mac address dynamically is not supported.
727 */
728 DBG1(vnetp, "exit\n");
729
730 return (VNET_FAILURE);
731 }
732
733 /* enable/disable a multicast address */
734 static int
vnet_m_multicst(void * arg,boolean_t add,const uint8_t * mca)735 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
736 {
737 _NOTE(ARGUNUSED(add, mca))
738
739 vnet_t *vnetp = arg;
740 vnet_res_t *vresp;
741 mac_register_t *macp;
742 mac_callbacks_t *cbp;
743 int rv = VNET_SUCCESS;
744
745 DBG1(vnetp, "enter\n");
746
747 READ_ENTER(&vnetp->vsw_fp_rw);
748 if (vnetp->vsw_fp == NULL) {
749 RW_EXIT(&vnetp->vsw_fp_rw);
750 return (EAGAIN);
751 }
752 VNET_FDBE_REFHOLD(vnetp->vsw_fp);
753 RW_EXIT(&vnetp->vsw_fp_rw);
754
755 vresp = vnetp->vsw_fp;
756 macp = &vresp->macreg;
757 cbp = macp->m_callbacks;
758 rv = cbp->mc_multicst(macp->m_driver, add, mca);
759
760 VNET_FDBE_REFRELE(vnetp->vsw_fp);
761
762 DBG1(vnetp, "exit(%d)\n", rv);
763 return (rv);
764 }
765
766 /* set or clear promiscuous mode on the device */
767 static int
vnet_m_promisc(void * arg,boolean_t on)768 vnet_m_promisc(void *arg, boolean_t on)
769 {
770 _NOTE(ARGUNUSED(on))
771
772 vnet_t *vnetp = arg;
773 DBG1(vnetp, "enter\n");
774 /*
775 * NOTE: setting promiscuous mode is not supported, just return success.
776 */
777 DBG1(vnetp, "exit\n");
778 return (VNET_SUCCESS);
779 }
780
781 /*
782 * Transmit a chain of packets. This function provides switching functionality
783 * based on the destination mac address to reach other guests (within ldoms) or
784 * external hosts.
785 */
786 mblk_t *
vnet_tx_ring_send(void * arg,mblk_t * mp)787 vnet_tx_ring_send(void *arg, mblk_t *mp)
788 {
789 vnet_pseudo_tx_ring_t *tx_ringp;
790 vnet_tx_ring_stats_t *statsp;
791 vnet_t *vnetp;
792 vnet_res_t *vresp;
793 mblk_t *next;
794 mblk_t *resid_mp;
795 mac_register_t *macp;
796 struct ether_header *ehp;
797 boolean_t is_unicast;
798 boolean_t is_pvid; /* non-default pvid ? */
799 boolean_t hres; /* Hybrid resource ? */
800 void *tx_arg;
801 size_t size;
802
803 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
804 statsp = &tx_ringp->tx_ring_stats;
805 vnetp = (vnet_t *)tx_ringp->vnetp;
806 DBG1(vnetp, "enter\n");
807 ASSERT(mp != NULL);
808
809 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
810
811 while (mp != NULL) {
812
813 next = mp->b_next;
814 mp->b_next = NULL;
815
816 /* update stats */
817 size = msgsize(mp);
818
819 /*
820 * Find fdb entry for the destination
821 * and hold a reference to it.
822 */
823 ehp = (struct ether_header *)mp->b_rptr;
824 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
825 if (vresp != NULL) {
826
827 /*
828 * Destination found in FDB.
829 * The destination is a vnet device within ldoms
830 * and directly reachable, invoke the tx function
831 * in the fdb entry.
832 */
833 macp = &vresp->macreg;
834 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
835
836 /* tx done; now release ref on fdb entry */
837 VNET_FDBE_REFRELE(vresp);
838
839 if (resid_mp != NULL) {
840 /* m_tx failed */
841 mp->b_next = next;
842 break;
843 }
844 } else {
845 is_unicast = !(IS_BROADCAST(ehp) ||
846 (IS_MULTICAST(ehp)));
847 /*
848 * Destination is not in FDB.
849 * If the destination is broadcast or multicast,
850 * then forward the packet to vswitch.
851 * If a Hybrid resource avilable, then send the
852 * unicast packet via hybrid resource, otherwise
853 * forward it to vswitch.
854 */
855 READ_ENTER(&vnetp->vsw_fp_rw);
856
857 if ((is_unicast) && (vnetp->hio_fp != NULL)) {
858 vresp = vnetp->hio_fp;
859 hres = B_TRUE;
860 } else {
861 vresp = vnetp->vsw_fp;
862 hres = B_FALSE;
863 }
864 if (vresp == NULL) {
865 /*
866 * no fdb entry to vsw? drop the packet.
867 */
868 RW_EXIT(&vnetp->vsw_fp_rw);
869 freemsg(mp);
870 mp = next;
871 continue;
872 }
873
874 /* ref hold the fdb entry to vsw */
875 VNET_FDBE_REFHOLD(vresp);
876
877 RW_EXIT(&vnetp->vsw_fp_rw);
878
879 /*
880 * In the case of a hybrid resource we need to insert
881 * the tag for the pvid case here; unlike packets that
882 * are destined to a vnet/vsw in which case the vgen
883 * layer does the tagging before sending it over ldc.
884 */
885 if (hres == B_TRUE) {
886 /*
887 * Determine if the frame being transmitted
888 * over the hybrid resource is untagged. If so,
889 * insert the tag before transmitting.
890 */
891 if (is_pvid == B_TRUE &&
892 ehp->ether_type != htons(ETHERTYPE_VLAN)) {
893
894 mp = vnet_vlan_insert_tag(mp,
895 vnetp->pvid);
896 if (mp == NULL) {
897 VNET_FDBE_REFRELE(vresp);
898 mp = next;
899 continue;
900 }
901
902 }
903
904 macp = &vresp->macreg;
905 tx_arg = tx_ringp;
906 } else {
907 macp = &vresp->macreg;
908 tx_arg = macp->m_driver;
909 }
910 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
911
912 /* tx done; now release ref on fdb entry */
913 VNET_FDBE_REFRELE(vresp);
914
915 if (resid_mp != NULL) {
916 /* m_tx failed */
917 mp->b_next = next;
918 break;
919 }
920 }
921
922 statsp->obytes += size;
923 statsp->opackets++;
924 mp = next;
925 }
926
927 DBG1(vnetp, "exit\n");
928 return (mp);
929 }
930
931 /* get statistics from the device */
932 int
vnet_m_stat(void * arg,uint_t stat,uint64_t * val)933 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
934 {
935 vnet_t *vnetp = arg;
936 vnet_res_t *vresp;
937 mac_register_t *macp;
938 mac_callbacks_t *cbp;
939 uint64_t val_total = 0;
940
941 DBG1(vnetp, "enter\n");
942
943 /*
944 * get the specified statistic from each transport and return the
945 * aggregate val. This obviously only works for counters.
946 */
947 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
948 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
949 return (ENOTSUP);
950 }
951
952 READ_ENTER(&vnetp->vrwlock);
953 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
954 macp = &vresp->macreg;
955 cbp = macp->m_callbacks;
956 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
957 val_total += *val;
958 }
959 RW_EXIT(&vnetp->vrwlock);
960
961 *val = val_total;
962
963 DBG1(vnetp, "exit\n");
964 return (0);
965 }
966
967 static void
vnet_ring_grp_init(vnet_t * vnetp)968 vnet_ring_grp_init(vnet_t *vnetp)
969 {
970 vnet_pseudo_rx_group_t *rx_grp;
971 vnet_pseudo_rx_ring_t *rx_ringp;
972 vnet_pseudo_tx_group_t *tx_grp;
973 vnet_pseudo_tx_ring_t *tx_ringp;
974 int i;
975
976 tx_grp = &vnetp->tx_grp[0];
977 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
978 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
979 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
980 tx_ringp[i].state |= VNET_TXRING_SHARED;
981 }
982 tx_grp->rings = tx_ringp;
983 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
984 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
985 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
986 tx_grp->flowctl_thread = thread_create(NULL, 0,
987 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
988
989 rx_grp = &vnetp->rx_grp[0];
990 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
991 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
992 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
993 rx_grp->max_ring_cnt, KM_SLEEP);
994
995 /*
996 * Setup the first 3 Pseudo RX Rings that are reserved;
997 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
998 */
999 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1000 rx_ringp[0].index = 0;
1001 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1002 rx_ringp[1].index = 1;
1003 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1004 rx_ringp[2].index = 2;
1005
1006 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1007 rx_grp->rings = rx_ringp;
1008
1009 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1010 i < rx_grp->max_ring_cnt; i++) {
1011 rx_ringp = &rx_grp->rings[i];
1012 rx_ringp->state = VNET_RXRING_FREE;
1013 rx_ringp->index = i;
1014 }
1015 }
1016
1017 static void
vnet_ring_grp_uninit(vnet_t * vnetp)1018 vnet_ring_grp_uninit(vnet_t *vnetp)
1019 {
1020 vnet_pseudo_rx_group_t *rx_grp;
1021 vnet_pseudo_tx_group_t *tx_grp;
1022 kt_did_t tid = 0;
1023
1024 tx_grp = &vnetp->tx_grp[0];
1025
1026 /* Inform tx_notify_thread to exit */
1027 mutex_enter(&tx_grp->flowctl_lock);
1028 if (tx_grp->flowctl_thread != NULL) {
1029 tid = tx_grp->flowctl_thread->t_did;
1030 tx_grp->flowctl_done = B_TRUE;
1031 cv_signal(&tx_grp->flowctl_cv);
1032 }
1033 mutex_exit(&tx_grp->flowctl_lock);
1034 if (tid != 0)
1035 thread_join(tid);
1036
1037 if (tx_grp->rings != NULL) {
1038 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1039 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1040 tx_grp->ring_cnt);
1041 tx_grp->rings = NULL;
1042 }
1043
1044 rx_grp = &vnetp->rx_grp[0];
1045 if (rx_grp->rings != NULL) {
1046 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1047 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1048 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1049 rx_grp->max_ring_cnt);
1050 rx_grp->rings = NULL;
1051 }
1052 }
1053
1054 static vnet_pseudo_rx_ring_t *
vnet_alloc_pseudo_rx_ring(vnet_t * vnetp)1055 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1056 {
1057 vnet_pseudo_rx_group_t *rx_grp;
1058 vnet_pseudo_rx_ring_t *rx_ringp;
1059 int index;
1060
1061 rx_grp = &vnetp->rx_grp[0];
1062 WRITE_ENTER(&rx_grp->lock);
1063
1064 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1065 /* no rings available */
1066 RW_EXIT(&rx_grp->lock);
1067 return (NULL);
1068 }
1069
1070 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1071 index < rx_grp->max_ring_cnt; index++) {
1072 rx_ringp = &rx_grp->rings[index];
1073 if (rx_ringp->state == VNET_RXRING_FREE) {
1074 rx_ringp->state |= VNET_RXRING_INUSE;
1075 rx_grp->ring_cnt++;
1076 break;
1077 }
1078 }
1079
1080 RW_EXIT(&rx_grp->lock);
1081 return (rx_ringp);
1082 }
1083
1084 static void
vnet_free_pseudo_rx_ring(vnet_t * vnetp,vnet_pseudo_rx_ring_t * ringp)1085 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1086 {
1087 vnet_pseudo_rx_group_t *rx_grp;
1088
1089 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1090 rx_grp = &vnetp->rx_grp[0];
1091 WRITE_ENTER(&rx_grp->lock);
1092
1093 if (ringp->state != VNET_RXRING_FREE) {
1094 ringp->state = VNET_RXRING_FREE;
1095 ringp->handle = NULL;
1096 rx_grp->ring_cnt--;
1097 }
1098
1099 RW_EXIT(&rx_grp->lock);
1100 }
1101
1102 /* wrapper function for mac_register() */
1103 static int
vnet_mac_register(vnet_t * vnetp)1104 vnet_mac_register(vnet_t *vnetp)
1105 {
1106 mac_register_t *macp;
1107 int err;
1108
1109 if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1110 return (DDI_FAILURE);
1111 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1112 macp->m_driver = vnetp;
1113 macp->m_dip = vnetp->dip;
1114 macp->m_src_addr = vnetp->curr_macaddr;
1115 macp->m_callbacks = &vnet_m_callbacks;
1116 macp->m_min_sdu = 0;
1117 macp->m_max_sdu = vnetp->mtu;
1118 macp->m_margin = VLAN_TAGSZ;
1119
1120 macp->m_v12n = MAC_VIRT_LEVEL1;
1121
1122 /*
1123 * Finally, we're ready to register ourselves with the MAC layer
1124 * interface; if this succeeds, we're all ready to start()
1125 */
1126 err = mac_register(macp, &vnetp->mh);
1127 mac_free(macp);
1128 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1129 }
1130
1131 /* read the mac address of the device */
1132 static int
vnet_read_mac_address(vnet_t * vnetp)1133 vnet_read_mac_address(vnet_t *vnetp)
1134 {
1135 uchar_t *macaddr;
1136 uint32_t size;
1137 int rv;
1138
1139 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1140 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1141 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1142 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1143 macaddr_propname, rv);
1144 return (DDI_FAILURE);
1145 }
1146 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1147 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1148 ddi_prop_free(macaddr);
1149
1150 return (DDI_SUCCESS);
1151 }
1152
1153 static void
vnet_fdb_create(vnet_t * vnetp)1154 vnet_fdb_create(vnet_t *vnetp)
1155 {
1156 char hashname[MAXNAMELEN];
1157
1158 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1159 vnetp->instance);
1160 vnetp->fdb_nchains = vnet_fdb_nchains;
1161 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1162 mod_hash_null_valdtor, sizeof (void *));
1163 }
1164
1165 static void
vnet_fdb_destroy(vnet_t * vnetp)1166 vnet_fdb_destroy(vnet_t *vnetp)
1167 {
1168 /* destroy fdb-hash-table */
1169 if (vnetp->fdb_hashp != NULL) {
1170 mod_hash_destroy_hash(vnetp->fdb_hashp);
1171 vnetp->fdb_hashp = NULL;
1172 vnetp->fdb_nchains = 0;
1173 }
1174 }
1175
1176 /*
1177 * Add an entry into the fdb.
1178 */
1179 void
vnet_fdbe_add(vnet_t * vnetp,vnet_res_t * vresp)1180 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1181 {
1182 uint64_t addr = 0;
1183 int rv;
1184
1185 KEY_HASH(addr, vresp->rem_macaddr);
1186
1187 /*
1188 * If the entry being added corresponds to LDC_SERVICE resource,
1189 * that is, vswitch connection, it is added to the hash and also
1190 * the entry is cached, an additional reference count reflects
1191 * this. The HYBRID resource is not added to the hash, but only
1192 * cached, as it is only used for sending out packets for unknown
1193 * unicast destinations.
1194 */
1195 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1196 (vresp->refcnt = 1) : (vresp->refcnt = 0);
1197
1198 /*
1199 * Note: duplicate keys will be rejected by mod_hash.
1200 */
1201 if (vresp->type != VIO_NET_RES_HYBRID) {
1202 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1203 (mod_hash_val_t)vresp);
1204 if (rv != 0) {
1205 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1206 return;
1207 }
1208 }
1209
1210 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1211 /* Cache the fdb entry to vsw-port */
1212 WRITE_ENTER(&vnetp->vsw_fp_rw);
1213 if (vnetp->vsw_fp == NULL)
1214 vnetp->vsw_fp = vresp;
1215 RW_EXIT(&vnetp->vsw_fp_rw);
1216 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1217 /* Cache the fdb entry to hybrid resource */
1218 WRITE_ENTER(&vnetp->vsw_fp_rw);
1219 if (vnetp->hio_fp == NULL)
1220 vnetp->hio_fp = vresp;
1221 RW_EXIT(&vnetp->vsw_fp_rw);
1222 }
1223 }
1224
1225 /*
1226 * Remove an entry from fdb.
1227 */
1228 static void
vnet_fdbe_del(vnet_t * vnetp,vnet_res_t * vresp)1229 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1230 {
1231 uint64_t addr = 0;
1232 int rv;
1233 uint32_t refcnt;
1234 vnet_res_t *tmp;
1235
1236 KEY_HASH(addr, vresp->rem_macaddr);
1237
1238 /*
1239 * Remove the entry from fdb hash table.
1240 * This prevents further references to this fdb entry.
1241 */
1242 if (vresp->type != VIO_NET_RES_HYBRID) {
1243 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1244 (mod_hash_val_t *)&tmp);
1245 if (rv != 0) {
1246 /*
1247 * As the resources are added to the hash only
1248 * after they are started, this can occur if
1249 * a resource unregisters before it is ever started.
1250 */
1251 return;
1252 }
1253 }
1254
1255 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1256 WRITE_ENTER(&vnetp->vsw_fp_rw);
1257
1258 ASSERT(tmp == vnetp->vsw_fp);
1259 vnetp->vsw_fp = NULL;
1260
1261 RW_EXIT(&vnetp->vsw_fp_rw);
1262 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1263 WRITE_ENTER(&vnetp->vsw_fp_rw);
1264
1265 vnetp->hio_fp = NULL;
1266
1267 RW_EXIT(&vnetp->vsw_fp_rw);
1268 }
1269
1270 /*
1271 * If there are threads already ref holding before the entry was
1272 * removed from hash table, then wait for ref count to drop to zero.
1273 */
1274 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1275 (refcnt = 1) : (refcnt = 0);
1276 while (vresp->refcnt > refcnt) {
1277 delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1278 }
1279 }
1280
1281 /*
1282 * Search fdb for a given mac address. If an entry is found, hold
1283 * a reference to it and return the entry; else returns NULL.
1284 */
1285 static vnet_res_t *
vnet_fdbe_find(vnet_t * vnetp,struct ether_addr * addrp)1286 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1287 {
1288 uint64_t key = 0;
1289 vnet_res_t *vresp;
1290 int rv;
1291
1292 KEY_HASH(key, addrp->ether_addr_octet);
1293
1294 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1295 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1296
1297 if (rv != 0)
1298 return (NULL);
1299
1300 return (vresp);
1301 }
1302
1303 /*
1304 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1305 * entry corresponding to the key (macaddr), this callback will be invoked by
1306 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1307 * entry before returning the found entry.
1308 */
1309 static void
vnet_fdbe_find_cb(mod_hash_key_t key,mod_hash_val_t val)1310 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1311 {
1312 _NOTE(ARGUNUSED(key))
1313 VNET_FDBE_REFHOLD((vnet_res_t *)val);
1314 }
1315
1316 /*
1317 * Frames received that are tagged with the pvid of the vnet device must be
1318 * untagged before sending up the stack. This function walks the chain of rx
1319 * frames, untags any such frames and returns the updated chain.
1320 *
1321 * Arguments:
1322 * pvid: pvid of the vnet device for which packets are being received
1323 * mp: head of pkt chain to be validated and untagged
1324 *
1325 * Returns:
1326 * mp: head of updated chain of packets
1327 */
1328 static void
vnet_rx_frames_untag(uint16_t pvid,mblk_t ** mp)1329 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1330 {
1331 struct ether_vlan_header *evhp;
1332 mblk_t *bp;
1333 mblk_t *bpt;
1334 mblk_t *bph;
1335 mblk_t *bpn;
1336
1337 bpn = bph = bpt = NULL;
1338
1339 for (bp = *mp; bp != NULL; bp = bpn) {
1340
1341 bpn = bp->b_next;
1342 bp->b_next = bp->b_prev = NULL;
1343
1344 evhp = (struct ether_vlan_header *)bp->b_rptr;
1345
1346 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1347 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1348
1349 bp = vnet_vlan_remove_tag(bp);
1350 if (bp == NULL) {
1351 continue;
1352 }
1353
1354 }
1355
1356 /* build a chain of processed packets */
1357 if (bph == NULL) {
1358 bph = bpt = bp;
1359 } else {
1360 bpt->b_next = bp;
1361 bpt = bp;
1362 }
1363
1364 }
1365
1366 *mp = bph;
1367 }
1368
1369 static void
vnet_rx(vio_net_handle_t vrh,mblk_t * mp)1370 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1371 {
1372 vnet_res_t *vresp = (vnet_res_t *)vrh;
1373 vnet_t *vnetp = vresp->vnetp;
1374 vnet_pseudo_rx_ring_t *ringp;
1375
1376 if ((vnetp == NULL) || (vnetp->mh == 0)) {
1377 freemsgchain(mp);
1378 return;
1379 }
1380
1381 ringp = vresp->rx_ringp;
1382 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1383 }
1384
1385 void
vnet_tx_update(vio_net_handle_t vrh)1386 vnet_tx_update(vio_net_handle_t vrh)
1387 {
1388 vnet_res_t *vresp = (vnet_res_t *)vrh;
1389 vnet_t *vnetp = vresp->vnetp;
1390 vnet_pseudo_tx_ring_t *tx_ringp;
1391 vnet_pseudo_tx_group_t *tx_grp;
1392 int i;
1393
1394 if (vnetp == NULL || vnetp->mh == NULL) {
1395 return;
1396 }
1397
1398 /*
1399 * Currently, the tx hwring API (used to access rings that belong to
1400 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1401 * update; also the pseudo rings are shared by the ports/ldcs in the
1402 * vgen layer. Thus we can't figure out which pseudo ring is being
1403 * re-enabled for transmits. To work around this, when we get a tx
1404 * restart notification from below, we simply propagate that to all
1405 * the tx pseudo rings registered with the mac layer above.
1406 *
1407 * There are a couple of side effects with this approach, but they are
1408 * not harmful, as outlined below:
1409 *
1410 * A) We might send an invalid ring_update() for a ring that is not
1411 * really flow controlled. This will not have any effect in the mac
1412 * layer and packets will continue to be transmitted on that ring.
1413 *
1414 * B) We might end up clearing the flow control in the mac layer for
1415 * a ring that is still flow controlled in the underlying resource.
1416 * This will result in the mac layer restarting transmit, only to be
1417 * flow controlled again on that ring.
1418 */
1419 tx_grp = &vnetp->tx_grp[0];
1420 for (i = 0; i < tx_grp->ring_cnt; i++) {
1421 tx_ringp = &tx_grp->rings[i];
1422 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1423 }
1424 }
1425
1426 /*
1427 * vnet_tx_notify_thread:
1428 *
1429 * vnet_tx_ring_update() callback function wakes up this thread when
1430 * it gets called. This thread will call mac_tx_ring_update() to
1431 * notify upper mac of flow control getting relieved. Note that
1432 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1433 * because vnet_tx_ring_update() is called from lower mac with
1434 * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1435 * the same lock.
1436 */
1437 static void
vnet_tx_notify_thread(void * arg)1438 vnet_tx_notify_thread(void *arg)
1439 {
1440 callb_cpr_t cprinfo;
1441 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg;
1442 vnet_pseudo_tx_ring_t *tx_ringp;
1443 vnet_t *vnetp;
1444 int i;
1445
1446 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1447 "vnet_tx_notify_thread");
1448
1449 mutex_enter(&tx_grp->flowctl_lock);
1450 while (!tx_grp->flowctl_done) {
1451 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1452 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1453 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1454
1455 for (i = 0; i < tx_grp->ring_cnt; i++) {
1456 tx_ringp = &tx_grp->rings[i];
1457 if (tx_ringp->woken_up) {
1458 tx_ringp->woken_up = B_FALSE;
1459 vnetp = tx_ringp->vnetp;
1460 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1461 }
1462 }
1463 }
1464 /*
1465 * The tx_grp is being destroyed, exit the thread.
1466 */
1467 tx_grp->flowctl_thread = NULL;
1468 CALLB_CPR_EXIT(&cprinfo);
1469 thread_exit();
1470 }
1471
1472 void
vnet_tx_ring_update(void * arg1,uintptr_t arg2)1473 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1474 {
1475 vnet_t *vnetp = (vnet_t *)arg1;
1476 vnet_pseudo_tx_group_t *tx_grp;
1477 vnet_pseudo_tx_ring_t *tx_ringp;
1478 int i;
1479
1480 tx_grp = &vnetp->tx_grp[0];
1481 for (i = 0; i < tx_grp->ring_cnt; i++) {
1482 tx_ringp = &tx_grp->rings[i];
1483 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1484 mutex_enter(&tx_grp->flowctl_lock);
1485 tx_ringp->woken_up = B_TRUE;
1486 cv_signal(&tx_grp->flowctl_cv);
1487 mutex_exit(&tx_grp->flowctl_lock);
1488 break;
1489 }
1490 }
1491 }
1492
1493 /*
1494 * Update the new mtu of vnet into the mac layer. First check if the device has
1495 * been plumbed and if so fail the mtu update. Returns 0 on success.
1496 */
1497 int
vnet_mtu_update(vnet_t * vnetp,uint32_t mtu)1498 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1499 {
1500 int rv;
1501
1502 if (vnetp == NULL || vnetp->mh == NULL) {
1503 return (EINVAL);
1504 }
1505
1506 WRITE_ENTER(&vnetp->vrwlock);
1507
1508 if (vnetp->flags & VNET_STARTED) {
1509 RW_EXIT(&vnetp->vrwlock);
1510 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1511 "update as the device is plumbed\n",
1512 vnetp->instance);
1513 return (EBUSY);
1514 }
1515
1516 /* update mtu in the mac layer */
1517 rv = mac_maxsdu_update(vnetp->mh, mtu);
1518 if (rv != 0) {
1519 RW_EXIT(&vnetp->vrwlock);
1520 cmn_err(CE_NOTE,
1521 "!vnet%d: Unable to update mtu with mac layer\n",
1522 vnetp->instance);
1523 return (EIO);
1524 }
1525
1526 vnetp->mtu = mtu;
1527
1528 RW_EXIT(&vnetp->vrwlock);
1529
1530 return (0);
1531 }
1532
1533 /*
1534 * Update the link state of vnet to the mac layer.
1535 */
1536 void
vnet_link_update(vnet_t * vnetp,link_state_t link_state)1537 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1538 {
1539 if (vnetp == NULL || vnetp->mh == NULL) {
1540 return;
1541 }
1542
1543 WRITE_ENTER(&vnetp->vrwlock);
1544 if (vnetp->link_state == link_state) {
1545 RW_EXIT(&vnetp->vrwlock);
1546 return;
1547 }
1548 vnetp->link_state = link_state;
1549 RW_EXIT(&vnetp->vrwlock);
1550
1551 mac_link_update(vnetp->mh, link_state);
1552 }
1553
1554 /*
1555 * vio_net_resource_reg -- An interface called to register a resource
1556 * with vnet.
1557 * macp -- a GLDv3 mac_register that has all the details of
1558 * a resource and its callbacks etc.
1559 * type -- resource type.
1560 * local_macaddr -- resource's MAC address. This is used to
1561 * associate a resource with a corresponding vnet.
1562 * remote_macaddr -- remote side MAC address. This is ignored for
1563 * the Hybrid resources.
1564 * vhp -- A handle returned to the caller.
1565 * vcb -- A set of callbacks provided to the callers.
1566 */
vio_net_resource_reg(mac_register_t * macp,vio_net_res_type_t type,ether_addr_t local_macaddr,ether_addr_t rem_macaddr,vio_net_handle_t * vhp,vio_net_callbacks_t * vcb)1567 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1568 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1569 vio_net_callbacks_t *vcb)
1570 {
1571 vnet_t *vnetp;
1572 vnet_res_t *vresp;
1573
1574 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1575 ether_copy(local_macaddr, vresp->local_macaddr);
1576 ether_copy(rem_macaddr, vresp->rem_macaddr);
1577 vresp->type = type;
1578 bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1579
1580 DBG1(NULL, "Resource Registerig type=0%X\n", type);
1581
1582 READ_ENTER(&vnet_rw);
1583 vnetp = vnet_headp;
1584 while (vnetp != NULL) {
1585 if (VNET_MATCH_RES(vresp, vnetp)) {
1586 vresp->vnetp = vnetp;
1587
1588 /* Setup kstats for hio resource */
1589 if (vresp->type == VIO_NET_RES_HYBRID) {
1590 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1591 "hio", vresp);
1592 if (vresp->ksp == NULL) {
1593 cmn_err(CE_NOTE, "!vnet%d: Cannot "
1594 "create kstats for hio resource",
1595 vnetp->instance);
1596 }
1597 }
1598 vnet_add_resource(vnetp, vresp);
1599 break;
1600 }
1601 vnetp = vnetp->nextp;
1602 }
1603 RW_EXIT(&vnet_rw);
1604 if (vresp->vnetp == NULL) {
1605 DWARN(NULL, "No vnet instance");
1606 kmem_free(vresp, sizeof (vnet_res_t));
1607 return (ENXIO);
1608 }
1609
1610 *vhp = vresp;
1611 vcb->vio_net_rx_cb = vnet_rx;
1612 vcb->vio_net_tx_update = vnet_tx_update;
1613 vcb->vio_net_report_err = vnet_handle_res_err;
1614
1615 /* Bind the resource to pseudo ring(s) */
1616 if (vnet_bind_rings(vresp) != 0) {
1617 (void) vnet_rem_resource(vnetp, vresp);
1618 vnet_hio_destroy_kstats(vresp->ksp);
1619 KMEM_FREE(vresp);
1620 return (1);
1621 }
1622
1623 /* Dispatch a task to start resources */
1624 vnet_dispatch_res_task(vnetp);
1625 return (0);
1626 }
1627
1628 /*
1629 * vio_net_resource_unreg -- An interface to unregister a resource.
1630 */
1631 void
vio_net_resource_unreg(vio_net_handle_t vhp)1632 vio_net_resource_unreg(vio_net_handle_t vhp)
1633 {
1634 vnet_res_t *vresp = (vnet_res_t *)vhp;
1635 vnet_t *vnetp = vresp->vnetp;
1636
1637 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1638
1639 ASSERT(vnetp != NULL);
1640 /*
1641 * Remove the resource from fdb; this ensures
1642 * there are no references to the resource.
1643 */
1644 vnet_fdbe_del(vnetp, vresp);
1645
1646 vnet_unbind_rings(vresp);
1647
1648 /* Now remove the resource from the list */
1649 (void) vnet_rem_resource(vnetp, vresp);
1650
1651 vnet_hio_destroy_kstats(vresp->ksp);
1652 KMEM_FREE(vresp);
1653 }
1654
1655 static void
vnet_add_resource(vnet_t * vnetp,vnet_res_t * vresp)1656 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1657 {
1658 WRITE_ENTER(&vnetp->vrwlock);
1659 vresp->nextp = vnetp->vres_list;
1660 vnetp->vres_list = vresp;
1661 RW_EXIT(&vnetp->vrwlock);
1662 }
1663
1664 static vnet_res_t *
vnet_rem_resource(vnet_t * vnetp,vnet_res_t * vresp)1665 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1666 {
1667 vnet_res_t *vrp;
1668
1669 WRITE_ENTER(&vnetp->vrwlock);
1670 if (vresp == vnetp->vres_list) {
1671 vnetp->vres_list = vresp->nextp;
1672 } else {
1673 vrp = vnetp->vres_list;
1674 while (vrp->nextp != NULL) {
1675 if (vrp->nextp == vresp) {
1676 vrp->nextp = vresp->nextp;
1677 break;
1678 }
1679 vrp = vrp->nextp;
1680 }
1681 }
1682 vresp->vnetp = NULL;
1683 vresp->nextp = NULL;
1684
1685 RW_EXIT(&vnetp->vrwlock);
1686
1687 return (vresp);
1688 }
1689
1690 /*
1691 * vnet_dds_rx -- an interface called by vgen to DDS messages.
1692 */
1693 void
vnet_dds_rx(void * arg,void * dmsg)1694 vnet_dds_rx(void *arg, void *dmsg)
1695 {
1696 vnet_t *vnetp = arg;
1697 vdds_process_dds_msg(vnetp, dmsg);
1698 }
1699
1700 /*
1701 * vnet_send_dds_msg -- An interface provided to DDS to send
1702 * DDS messages. This simply sends meessages via vgen.
1703 */
1704 int
vnet_send_dds_msg(vnet_t * vnetp,void * dmsg)1705 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1706 {
1707 int rv;
1708
1709 if (vnetp->vgenhdl != NULL) {
1710 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1711 }
1712 return (rv);
1713 }
1714
1715 /*
1716 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1717 */
1718 void
vnet_dds_cleanup_hio(vnet_t * vnetp)1719 vnet_dds_cleanup_hio(vnet_t *vnetp)
1720 {
1721 vdds_cleanup_hio(vnetp);
1722 }
1723
1724 /*
1725 * vnet_handle_res_err -- A callback function called by a resource
1726 * to report an error. For example, vgen can call to report
1727 * an LDC down/reset event. This will trigger cleanup of associated
1728 * Hybrid resource.
1729 */
1730 /* ARGSUSED */
1731 static void
vnet_handle_res_err(vio_net_handle_t vrh,vio_net_err_val_t err)1732 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1733 {
1734 vnet_res_t *vresp = (vnet_res_t *)vrh;
1735 vnet_t *vnetp = vresp->vnetp;
1736
1737 if (vnetp == NULL) {
1738 return;
1739 }
1740 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1741 (vresp->type != VIO_NET_RES_HYBRID)) {
1742 return;
1743 }
1744
1745 vdds_cleanup_hio(vnetp);
1746 }
1747
1748 /*
1749 * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1750 */
1751 static void
vnet_dispatch_res_task(vnet_t * vnetp)1752 vnet_dispatch_res_task(vnet_t *vnetp)
1753 {
1754 int rv;
1755
1756 /*
1757 * Dispatch the task. It could be the case that vnetp->flags does
1758 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1759 * can abort the task when the task is started. See related comments
1760 * in vnet_m_stop() and vnet_stop_resources().
1761 */
1762 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1763 vnetp, DDI_NOSLEEP);
1764 if (rv != DDI_SUCCESS) {
1765 cmn_err(CE_WARN,
1766 "vnet%d:Can't dispatch start resource task",
1767 vnetp->instance);
1768 }
1769 }
1770
1771 /*
1772 * vnet_res_start_task -- A taskq callback function that starts a resource.
1773 */
1774 static void
vnet_res_start_task(void * arg)1775 vnet_res_start_task(void *arg)
1776 {
1777 vnet_t *vnetp = arg;
1778
1779 WRITE_ENTER(&vnetp->vrwlock);
1780 if (vnetp->flags & VNET_STARTED) {
1781 vnet_start_resources(vnetp);
1782 }
1783 RW_EXIT(&vnetp->vrwlock);
1784 }
1785
1786 /*
1787 * vnet_start_resources -- starts all resources associated with
1788 * a vnet.
1789 */
1790 static void
vnet_start_resources(vnet_t * vnetp)1791 vnet_start_resources(vnet_t *vnetp)
1792 {
1793 mac_register_t *macp;
1794 mac_callbacks_t *cbp;
1795 vnet_res_t *vresp;
1796 int rv;
1797
1798 DBG1(vnetp, "enter\n");
1799
1800 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1801
1802 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1803 /* skip if it is already started */
1804 if (vresp->flags & VNET_STARTED) {
1805 continue;
1806 }
1807 macp = &vresp->macreg;
1808 cbp = macp->m_callbacks;
1809 rv = cbp->mc_start(macp->m_driver);
1810 if (rv == 0) {
1811 /*
1812 * Successfully started the resource, so now
1813 * add it to the fdb.
1814 */
1815 vresp->flags |= VNET_STARTED;
1816 vnet_fdbe_add(vnetp, vresp);
1817 }
1818 }
1819
1820 DBG1(vnetp, "exit\n");
1821
1822 }
1823
1824 /*
1825 * vnet_stop_resources -- stop all resources associated with a vnet.
1826 */
1827 static void
vnet_stop_resources(vnet_t * vnetp)1828 vnet_stop_resources(vnet_t *vnetp)
1829 {
1830 vnet_res_t *vresp;
1831 mac_register_t *macp;
1832 mac_callbacks_t *cbp;
1833
1834 DBG1(vnetp, "enter\n");
1835
1836 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1837
1838 for (vresp = vnetp->vres_list; vresp != NULL; ) {
1839 if (vresp->flags & VNET_STARTED) {
1840 /*
1841 * Release the lock while invoking mc_stop() of the
1842 * underlying resource. We hold a reference to this
1843 * resource to prevent being removed from the list in
1844 * vio_net_resource_unreg(). Note that new resources
1845 * can be added to the head of the list while the lock
1846 * is released, but they won't be started, as
1847 * VNET_STARTED flag has been cleared for the vnet
1848 * device in vnet_m_stop(). Also, while the lock is
1849 * released a resource could be removed from the list
1850 * in vio_net_resource_unreg(); but that is ok, as we
1851 * re-acquire the lock and only then access the forward
1852 * link (vresp->nextp) to continue with the next
1853 * resource.
1854 */
1855 vresp->flags &= ~VNET_STARTED;
1856 vresp->flags |= VNET_STOPPING;
1857 macp = &vresp->macreg;
1858 cbp = macp->m_callbacks;
1859 VNET_FDBE_REFHOLD(vresp);
1860 RW_EXIT(&vnetp->vrwlock);
1861
1862 cbp->mc_stop(macp->m_driver);
1863
1864 WRITE_ENTER(&vnetp->vrwlock);
1865 vresp->flags &= ~VNET_STOPPING;
1866 VNET_FDBE_REFRELE(vresp);
1867 }
1868 vresp = vresp->nextp;
1869 }
1870 DBG1(vnetp, "exit\n");
1871 }
1872
1873 /*
1874 * Setup kstats for the HIO statistics.
1875 * NOTE: the synchronization for the statistics is the
1876 * responsibility of the caller.
1877 */
1878 kstat_t *
vnet_hio_setup_kstats(char * ks_mod,char * ks_name,vnet_res_t * vresp)1879 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1880 {
1881 kstat_t *ksp;
1882 vnet_t *vnetp = vresp->vnetp;
1883 vnet_hio_kstats_t *hiokp;
1884 size_t size;
1885
1886 ASSERT(vnetp != NULL);
1887 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1888 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1889 KSTAT_TYPE_NAMED, size, 0);
1890 if (ksp == NULL) {
1891 return (NULL);
1892 }
1893
1894 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1895 kstat_named_init(&hiokp->ipackets, "ipackets",
1896 KSTAT_DATA_ULONG);
1897 kstat_named_init(&hiokp->ierrors, "ierrors",
1898 KSTAT_DATA_ULONG);
1899 kstat_named_init(&hiokp->opackets, "opackets",
1900 KSTAT_DATA_ULONG);
1901 kstat_named_init(&hiokp->oerrors, "oerrors",
1902 KSTAT_DATA_ULONG);
1903
1904
1905 /* MIB II kstat variables */
1906 kstat_named_init(&hiokp->rbytes, "rbytes",
1907 KSTAT_DATA_ULONG);
1908 kstat_named_init(&hiokp->obytes, "obytes",
1909 KSTAT_DATA_ULONG);
1910 kstat_named_init(&hiokp->multircv, "multircv",
1911 KSTAT_DATA_ULONG);
1912 kstat_named_init(&hiokp->multixmt, "multixmt",
1913 KSTAT_DATA_ULONG);
1914 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv",
1915 KSTAT_DATA_ULONG);
1916 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt",
1917 KSTAT_DATA_ULONG);
1918 kstat_named_init(&hiokp->norcvbuf, "norcvbuf",
1919 KSTAT_DATA_ULONG);
1920 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf",
1921 KSTAT_DATA_ULONG);
1922
1923 ksp->ks_update = vnet_hio_update_kstats;
1924 ksp->ks_private = (void *)vresp;
1925 kstat_install(ksp);
1926 return (ksp);
1927 }
1928
1929 /*
1930 * Destroy kstats.
1931 */
1932 static void
vnet_hio_destroy_kstats(kstat_t * ksp)1933 vnet_hio_destroy_kstats(kstat_t *ksp)
1934 {
1935 if (ksp != NULL)
1936 kstat_delete(ksp);
1937 }
1938
1939 /*
1940 * Update the kstats.
1941 */
1942 static int
vnet_hio_update_kstats(kstat_t * ksp,int rw)1943 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1944 {
1945 vnet_t *vnetp;
1946 vnet_res_t *vresp;
1947 vnet_hio_stats_t statsp;
1948 vnet_hio_kstats_t *hiokp;
1949
1950 vresp = (vnet_res_t *)ksp->ks_private;
1951 vnetp = vresp->vnetp;
1952
1953 bzero(&statsp, sizeof (vnet_hio_stats_t));
1954
1955 READ_ENTER(&vnetp->vsw_fp_rw);
1956 if (vnetp->hio_fp == NULL) {
1957 /* not using hio resources, just return */
1958 RW_EXIT(&vnetp->vsw_fp_rw);
1959 return (0);
1960 }
1961 VNET_FDBE_REFHOLD(vnetp->hio_fp);
1962 RW_EXIT(&vnetp->vsw_fp_rw);
1963 vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1964 VNET_FDBE_REFRELE(vnetp->hio_fp);
1965
1966 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1967
1968 if (rw == KSTAT_READ) {
1969 /* Link Input/Output stats */
1970 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets;
1971 hiokp->ipackets64.value.ull = statsp.ipackets;
1972 hiokp->ierrors.value.ul = statsp.ierrors;
1973 hiokp->opackets.value.ul = (uint32_t)statsp.opackets;
1974 hiokp->opackets64.value.ull = statsp.opackets;
1975 hiokp->oerrors.value.ul = statsp.oerrors;
1976
1977 /* MIB II kstat variables */
1978 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes;
1979 hiokp->rbytes64.value.ull = statsp.rbytes;
1980 hiokp->obytes.value.ul = (uint32_t)statsp.obytes;
1981 hiokp->obytes64.value.ull = statsp.obytes;
1982 hiokp->multircv.value.ul = statsp.multircv;
1983 hiokp->multixmt.value.ul = statsp.multixmt;
1984 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv;
1985 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt;
1986 hiokp->norcvbuf.value.ul = statsp.norcvbuf;
1987 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf;
1988 } else {
1989 return (EACCES);
1990 }
1991
1992 return (0);
1993 }
1994
1995 static void
vnet_hio_get_stats(vnet_res_t * vresp,vnet_hio_stats_t * statsp)1996 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1997 {
1998 mac_register_t *macp;
1999 mac_callbacks_t *cbp;
2000 uint64_t val;
2001 int stat;
2002
2003 /*
2004 * get the specified statistics from the underlying nxge.
2005 */
2006 macp = &vresp->macreg;
2007 cbp = macp->m_callbacks;
2008 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2009 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2010 switch (stat) {
2011 case MAC_STAT_IPACKETS:
2012 statsp->ipackets = val;
2013 break;
2014
2015 case MAC_STAT_IERRORS:
2016 statsp->ierrors = val;
2017 break;
2018
2019 case MAC_STAT_OPACKETS:
2020 statsp->opackets = val;
2021 break;
2022
2023 case MAC_STAT_OERRORS:
2024 statsp->oerrors = val;
2025 break;
2026
2027 case MAC_STAT_RBYTES:
2028 statsp->rbytes = val;
2029 break;
2030
2031 case MAC_STAT_OBYTES:
2032 statsp->obytes = val;
2033 break;
2034
2035 case MAC_STAT_MULTIRCV:
2036 statsp->multircv = val;
2037 break;
2038
2039 case MAC_STAT_MULTIXMT:
2040 statsp->multixmt = val;
2041 break;
2042
2043 case MAC_STAT_BRDCSTRCV:
2044 statsp->brdcstrcv = val;
2045 break;
2046
2047 case MAC_STAT_BRDCSTXMT:
2048 statsp->brdcstxmt = val;
2049 break;
2050
2051 case MAC_STAT_NOXMTBUF:
2052 statsp->noxmtbuf = val;
2053 break;
2054
2055 case MAC_STAT_NORCVBUF:
2056 statsp->norcvbuf = val;
2057 break;
2058
2059 default:
2060 /*
2061 * parameters not interested.
2062 */
2063 break;
2064 }
2065 }
2066 }
2067 }
2068
2069 static boolean_t
vnet_m_capab(void * arg,mac_capab_t cap,void * cap_data)2070 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2071 {
2072 vnet_t *vnetp = (vnet_t *)arg;
2073
2074 if (vnetp == NULL) {
2075 return (0);
2076 }
2077
2078 switch (cap) {
2079
2080 case MAC_CAPAB_RINGS: {
2081
2082 mac_capab_rings_t *cap_rings = cap_data;
2083 /*
2084 * Rings Capability Notes:
2085 * We advertise rings to make use of the rings framework in
2086 * gldv3 mac layer, to improve the performance. This is
2087 * specifically needed when a Hybrid resource (with multiple
2088 * tx/rx hardware rings) is assigned to a vnet device. We also
2089 * leverage this for the normal case when no Hybrid resource is
2090 * assigned.
2091 *
2092 * Ring Allocation:
2093 * - TX path:
2094 * We expose a pseudo ring group with 2 pseudo tx rings (as
2095 * currently HybridIO exports only 2 rings) In the normal case,
2096 * transmit traffic that comes down to the driver through the
2097 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2098 * distributed switching algorithm in vnet and gets transmitted
2099 * over a port/LDC in the vgen layer to either the vswitch or a
2100 * peer vnet. If and when a Hybrid resource is assigned to the
2101 * vnet, we obtain the tx ring information of the Hybrid device
2102 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2103 * Traffic being sent over the Hybrid resource by the mac layer
2104 * gets spread across both hw rings, as they are mapped to the
2105 * 2 pseudo tx rings in vnet.
2106 *
2107 * - RX path:
2108 * We expose a pseudo ring group with 3 pseudo rx rings (static
2109 * rings) initially. The first (default) pseudo rx ring is
2110 * reserved for the resource that connects to the vswitch
2111 * service. The next 2 rings are reserved for a Hybrid resource
2112 * that may be assigned to the vnet device. If and when a
2113 * Hybrid resource is assigned to the vnet, we obtain the rx
2114 * ring information of the Hybrid device (nxge) and map these
2115 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2116 * resource that connects to a peer vnet, we dynamically
2117 * allocate a pseudo rx ring and map it to that resource, when
2118 * the resource gets added; and the pseudo rx ring is
2119 * dynamically registered with the upper mac layer. We do the
2120 * reverse and unregister the ring with the mac layer when
2121 * the resource gets removed.
2122 *
2123 * Synchronization notes:
2124 * We don't need any lock to protect members of ring structure,
2125 * specifically ringp->hw_rh, in either the TX or the RX ring,
2126 * as explained below.
2127 * - TX ring:
2128 * ring->hw_rh is initialized only when a Hybrid resource is
2129 * associated; and gets referenced only in vnet_hio_tx(). The
2130 * Hybrid resource itself is available in fdb only after tx
2131 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2132 * we call vnet_bind_rings() first and then call
2133 * vnet_start_resources() which adds an entry to fdb. For
2134 * traffic going over LDC resources, we don't reference
2135 * ring->hw_rh at all.
2136 * - RX ring:
2137 * For rings mapped to Hybrid resource ring->hw_rh is
2138 * initialized and only then do we add the rx callback for
2139 * the underlying Hybrid resource; we disable callbacks before
2140 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2141 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2142 * (vio_net_resource_unreg()).
2143 * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2144 * Note that for rings mapped to Hybrid resource, though the
2145 * rings are statically registered with the mac layer, its
2146 * hardware ring mapping (ringp->hw_rh) can be torn down in
2147 * vnet_unbind_hwrings() while the kstat operation is in
2148 * progress. To protect against this, we hold a reference to
2149 * the resource in FDB; this ensures that the thread in
2150 * vio_net_resource_unreg() waits for the reference to be
2151 * dropped before unbinding the ring.
2152 *
2153 * We don't need to do this for rings mapped to LDC resources.
2154 * These rings are registered/unregistered dynamically with
2155 * the mac layer and so any attempt to unregister the ring
2156 * while kstat operation is in progress will block in
2157 * mac_group_rem_ring(). Thus implicitly protects the
2158 * resource (ringp->hw_rh) from disappearing.
2159 */
2160
2161 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2162 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2163
2164 /*
2165 * The ring_cnt for rx grp is initialized in
2166 * vnet_ring_grp_init(). Later, the ring_cnt gets
2167 * updated dynamically whenever LDC resources are added
2168 * or removed.
2169 */
2170 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2171 cap_rings->mr_rget = vnet_get_ring;
2172
2173 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2174 cap_rings->mr_gget = vnet_get_group;
2175 cap_rings->mr_gaddring = NULL;
2176 cap_rings->mr_gremring = NULL;
2177 } else {
2178 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2179
2180 /*
2181 * The ring_cnt for tx grp is initialized in
2182 * vnet_ring_grp_init() and remains constant, as we
2183 * do not support dymanic tx rings for now.
2184 */
2185 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2186 cap_rings->mr_rget = vnet_get_ring;
2187
2188 /*
2189 * Transmit rings are not grouped; i.e, the number of
2190 * transmit ring groups advertised should be set to 0.
2191 */
2192 cap_rings->mr_gnum = 0;
2193
2194 cap_rings->mr_gget = vnet_get_group;
2195 cap_rings->mr_gaddring = NULL;
2196 cap_rings->mr_gremring = NULL;
2197 }
2198 return (B_TRUE);
2199
2200 }
2201
2202 default:
2203 break;
2204
2205 }
2206
2207 return (B_FALSE);
2208 }
2209
2210 /*
2211 * Callback funtion for MAC layer to get ring information.
2212 */
2213 static void
vnet_get_ring(void * arg,mac_ring_type_t rtype,const int g_index,const int r_index,mac_ring_info_t * infop,mac_ring_handle_t r_handle)2214 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2215 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2216 {
2217 vnet_t *vnetp = arg;
2218
2219 switch (rtype) {
2220
2221 case MAC_RING_TYPE_RX: {
2222
2223 vnet_pseudo_rx_group_t *rx_grp;
2224 vnet_pseudo_rx_ring_t *rx_ringp;
2225 mac_intr_t *mintr;
2226
2227 /* We advertised only one RX group */
2228 ASSERT(g_index == 0);
2229 rx_grp = &vnetp->rx_grp[g_index];
2230
2231 /* Check the current # of rings in the rx group */
2232 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2233
2234 /* Get the ring based on the index */
2235 rx_ringp = &rx_grp->rings[r_index];
2236
2237 rx_ringp->handle = r_handle;
2238 /*
2239 * Note: we don't need to save the incoming r_index in rx_ring,
2240 * as vnet_ring_grp_init() would have initialized the index for
2241 * each ring in the array.
2242 */
2243 rx_ringp->grp = rx_grp;
2244 rx_ringp->vnetp = vnetp;
2245
2246 mintr = &infop->mri_intr;
2247 mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2248 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2249 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2250
2251 infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2252 infop->mri_start = vnet_rx_ring_start;
2253 infop->mri_stop = vnet_rx_ring_stop;
2254 infop->mri_stat = vnet_rx_ring_stat;
2255
2256 /* Set the poll function, as this is an rx ring */
2257 infop->mri_poll = vnet_rx_poll;
2258 /*
2259 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2260 * which was not sending packet chains in interrupt
2261 * context. For such drivers, packets are queued in
2262 * Rx soft rings so that we get a chance to switch
2263 * into a polling mode under backlog. This bug (not
2264 * sending packet chains) has now been fixed. Once
2265 * the performance impact is measured, this change
2266 * will be removed.
2267 */
2268 infop->mri_flags = (vnet_mac_rx_queuing ?
2269 MAC_RING_RX_ENQUEUE : 0);
2270 break;
2271 }
2272
2273 case MAC_RING_TYPE_TX: {
2274 vnet_pseudo_tx_group_t *tx_grp;
2275 vnet_pseudo_tx_ring_t *tx_ringp;
2276
2277 /*
2278 * No need to check grp index; mac layer passes -1 for it.
2279 */
2280 tx_grp = &vnetp->tx_grp[0];
2281
2282 /* Check the # of rings in the tx group */
2283 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2284
2285 /* Get the ring based on the index */
2286 tx_ringp = &tx_grp->rings[r_index];
2287
2288 tx_ringp->handle = r_handle;
2289 tx_ringp->index = r_index;
2290 tx_ringp->grp = tx_grp;
2291 tx_ringp->vnetp = vnetp;
2292
2293 infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2294 infop->mri_start = vnet_tx_ring_start;
2295 infop->mri_stop = vnet_tx_ring_stop;
2296 infop->mri_stat = vnet_tx_ring_stat;
2297
2298 /* Set the transmit function, as this is a tx ring */
2299 infop->mri_tx = vnet_tx_ring_send;
2300 /*
2301 * MAC_RING_TX_SERIALIZE bit needs to be set while
2302 * hybridIO is enabled to workaround tx lock
2303 * contention issues in nxge.
2304 */
2305 infop->mri_flags = (vnet_mac_tx_serialize ?
2306 MAC_RING_TX_SERIALIZE : 0);
2307 break;
2308 }
2309
2310 default:
2311 break;
2312 }
2313 }
2314
2315 /*
2316 * Callback funtion for MAC layer to get group information.
2317 */
2318 static void
vnet_get_group(void * arg,mac_ring_type_t type,const int index,mac_group_info_t * infop,mac_group_handle_t handle)2319 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2320 mac_group_info_t *infop, mac_group_handle_t handle)
2321 {
2322 vnet_t *vnetp = (vnet_t *)arg;
2323
2324 switch (type) {
2325
2326 case MAC_RING_TYPE_RX:
2327 {
2328 vnet_pseudo_rx_group_t *rx_grp;
2329
2330 /* We advertised only one RX group */
2331 ASSERT(index == 0);
2332
2333 rx_grp = &vnetp->rx_grp[index];
2334 rx_grp->handle = handle;
2335 rx_grp->index = index;
2336 rx_grp->vnetp = vnetp;
2337
2338 infop->mgi_driver = (mac_group_driver_t)rx_grp;
2339 infop->mgi_start = NULL;
2340 infop->mgi_stop = NULL;
2341 infop->mgi_addmac = vnet_addmac;
2342 infop->mgi_remmac = vnet_remmac;
2343 infop->mgi_count = rx_grp->ring_cnt;
2344
2345 break;
2346 }
2347
2348 case MAC_RING_TYPE_TX:
2349 {
2350 vnet_pseudo_tx_group_t *tx_grp;
2351
2352 /* We advertised only one TX group */
2353 ASSERT(index == 0);
2354
2355 tx_grp = &vnetp->tx_grp[index];
2356 tx_grp->handle = handle;
2357 tx_grp->index = index;
2358 tx_grp->vnetp = vnetp;
2359
2360 infop->mgi_driver = (mac_group_driver_t)tx_grp;
2361 infop->mgi_start = NULL;
2362 infop->mgi_stop = NULL;
2363 infop->mgi_addmac = NULL;
2364 infop->mgi_remmac = NULL;
2365 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2366
2367 break;
2368 }
2369
2370 default:
2371 break;
2372
2373 }
2374 }
2375
2376 static int
vnet_rx_ring_start(mac_ring_driver_t arg,uint64_t mr_gen_num)2377 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2378 {
2379 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2380 int err;
2381
2382 /*
2383 * If this ring is mapped to a LDC resource, simply mark the state to
2384 * indicate the ring is started and return.
2385 */
2386 if ((rx_ringp->state &
2387 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2388 rx_ringp->gen_num = mr_gen_num;
2389 rx_ringp->state |= VNET_RXRING_STARTED;
2390 return (0);
2391 }
2392
2393 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2394
2395 /*
2396 * This must be a ring reserved for a hwring. If the hwring is not
2397 * bound yet, simply mark the state to indicate the ring is started and
2398 * return. If and when a hybrid resource is activated for this vnet
2399 * device, we will bind the hwring and start it then. If a hwring is
2400 * already bound, start it now.
2401 */
2402 if (rx_ringp->hw_rh == NULL) {
2403 rx_ringp->gen_num = mr_gen_num;
2404 rx_ringp->state |= VNET_RXRING_STARTED;
2405 return (0);
2406 }
2407
2408 err = mac_hwring_start(rx_ringp->hw_rh);
2409 if (err == 0) {
2410 rx_ringp->gen_num = mr_gen_num;
2411 rx_ringp->state |= VNET_RXRING_STARTED;
2412 } else {
2413 err = ENXIO;
2414 }
2415
2416 return (err);
2417 }
2418
2419 static void
vnet_rx_ring_stop(mac_ring_driver_t arg)2420 vnet_rx_ring_stop(mac_ring_driver_t arg)
2421 {
2422 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2423
2424 /*
2425 * If this ring is mapped to a LDC resource, simply mark the state to
2426 * indicate the ring is now stopped and return.
2427 */
2428 if ((rx_ringp->state &
2429 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2430 rx_ringp->state &= ~VNET_RXRING_STARTED;
2431 return;
2432 }
2433
2434 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2435
2436 /*
2437 * This must be a ring reserved for a hwring. If the hwring is not
2438 * bound yet, simply mark the state to indicate the ring is stopped and
2439 * return. If a hwring is already bound, stop it now.
2440 */
2441 if (rx_ringp->hw_rh == NULL) {
2442 rx_ringp->state &= ~VNET_RXRING_STARTED;
2443 return;
2444 }
2445
2446 mac_hwring_stop(rx_ringp->hw_rh);
2447 rx_ringp->state &= ~VNET_RXRING_STARTED;
2448 }
2449
2450 static int
vnet_rx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)2451 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2452 {
2453 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2454 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp;
2455 vnet_res_t *vresp;
2456 mac_register_t *macp;
2457 mac_callbacks_t *cbp;
2458
2459 /*
2460 * Refer to vnet_m_capab() function for detailed comments on ring
2461 * synchronization.
2462 */
2463 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2464 READ_ENTER(&vnetp->vsw_fp_rw);
2465 if (vnetp->hio_fp == NULL) {
2466 RW_EXIT(&vnetp->vsw_fp_rw);
2467 return (0);
2468 }
2469
2470 VNET_FDBE_REFHOLD(vnetp->hio_fp);
2471 RW_EXIT(&vnetp->vsw_fp_rw);
2472 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2473 VNET_FDBE_REFRELE(vnetp->hio_fp);
2474 return (0);
2475 }
2476
2477 ASSERT((rx_ringp->state &
2478 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2479 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2480 macp = &vresp->macreg;
2481 cbp = macp->m_callbacks;
2482
2483 cbp->mc_getstat(macp->m_driver, stat, val);
2484
2485 return (0);
2486 }
2487
2488 /* ARGSUSED */
2489 static int
vnet_tx_ring_start(mac_ring_driver_t arg,uint64_t mr_gen_num)2490 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2491 {
2492 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2493
2494 tx_ringp->state |= VNET_TXRING_STARTED;
2495 return (0);
2496 }
2497
2498 static void
vnet_tx_ring_stop(mac_ring_driver_t arg)2499 vnet_tx_ring_stop(mac_ring_driver_t arg)
2500 {
2501 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2502
2503 tx_ringp->state &= ~VNET_TXRING_STARTED;
2504 }
2505
2506 static int
vnet_tx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)2507 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2508 {
2509 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2510 vnet_tx_ring_stats_t *statsp;
2511
2512 statsp = &tx_ringp->tx_ring_stats;
2513
2514 switch (stat) {
2515 case MAC_STAT_OPACKETS:
2516 *val = statsp->opackets;
2517 break;
2518
2519 case MAC_STAT_OBYTES:
2520 *val = statsp->obytes;
2521 break;
2522
2523 default:
2524 *val = 0;
2525 return (ENOTSUP);
2526 }
2527
2528 return (0);
2529 }
2530
2531 /*
2532 * Disable polling for a ring and enable its interrupt.
2533 */
2534 static int
vnet_ring_enable_intr(void * arg)2535 vnet_ring_enable_intr(void *arg)
2536 {
2537 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2538 vnet_res_t *vresp;
2539
2540 if (rx_ringp->hw_rh == NULL) {
2541 /*
2542 * Ring enable intr func is being invoked, but the ring is
2543 * not bound to any underlying resource ? This must be a ring
2544 * reserved for Hybrid resource and no such resource has been
2545 * assigned to this vnet device yet. We simply return success.
2546 */
2547 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2548 return (0);
2549 }
2550
2551 /*
2552 * The rx ring has been bound to either a LDC or a Hybrid resource.
2553 * Call the appropriate function to enable interrupts for the ring.
2554 */
2555 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2556 return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2557 } else {
2558 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2559 return (vgen_enable_intr(vresp->macreg.m_driver));
2560 }
2561 }
2562
2563 /*
2564 * Enable polling for a ring and disable its interrupt.
2565 */
2566 static int
vnet_ring_disable_intr(void * arg)2567 vnet_ring_disable_intr(void *arg)
2568 {
2569 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2570 vnet_res_t *vresp;
2571
2572 if (rx_ringp->hw_rh == NULL) {
2573 /*
2574 * Ring disable intr func is being invoked, but the ring is
2575 * not bound to any underlying resource ? This must be a ring
2576 * reserved for Hybrid resource and no such resource has been
2577 * assigned to this vnet device yet. We simply return success.
2578 */
2579 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2580 return (0);
2581 }
2582
2583 /*
2584 * The rx ring has been bound to either a LDC or a Hybrid resource.
2585 * Call the appropriate function to disable interrupts for the ring.
2586 */
2587 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2588 return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2589 } else {
2590 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2591 return (vgen_disable_intr(vresp->macreg.m_driver));
2592 }
2593 }
2594
2595 /*
2596 * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2597 */
2598 static mblk_t *
vnet_rx_poll(void * arg,int bytes_to_pickup)2599 vnet_rx_poll(void *arg, int bytes_to_pickup)
2600 {
2601 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2602 mblk_t *mp = NULL;
2603 vnet_res_t *vresp;
2604 vnet_t *vnetp = rx_ringp->vnetp;
2605
2606 if (rx_ringp->hw_rh == NULL) {
2607 return (NULL);
2608 }
2609
2610 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2611 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2612 /*
2613 * Packets received over a hybrid resource need additional
2614 * processing to remove the tag, for the pvid case. The
2615 * underlying resource is not aware of the vnet's pvid and thus
2616 * packets are received with the vlan tag in the header; unlike
2617 * packets that are received over a ldc channel in which case
2618 * the peer vnet/vsw would have already removed the tag.
2619 */
2620 if (vnetp->pvid != vnetp->default_vlan_id) {
2621 vnet_rx_frames_untag(vnetp->pvid, &mp);
2622 }
2623 } else {
2624 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2625 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2626 }
2627 return (mp);
2628 }
2629
2630 /* ARGSUSED */
2631 void
vnet_hio_rx_cb(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)2632 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2633 boolean_t loopback)
2634 {
2635 vnet_t *vnetp = (vnet_t *)arg;
2636 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh;
2637
2638 /*
2639 * Packets received over a hybrid resource need additional processing
2640 * to remove the tag, for the pvid case. The underlying resource is
2641 * not aware of the vnet's pvid and thus packets are received with the
2642 * vlan tag in the header; unlike packets that are received over a ldc
2643 * channel in which case the peer vnet/vsw would have already removed
2644 * the tag.
2645 */
2646 if (vnetp->pvid != vnetp->default_vlan_id) {
2647 vnet_rx_frames_untag(vnetp->pvid, &mp);
2648 if (mp == NULL) {
2649 return;
2650 }
2651 }
2652 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2653 }
2654
2655 static int
vnet_addmac(void * arg,const uint8_t * mac_addr)2656 vnet_addmac(void *arg, const uint8_t *mac_addr)
2657 {
2658 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2659 vnet_t *vnetp;
2660
2661 vnetp = rx_grp->vnetp;
2662
2663 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2664 return (0);
2665 }
2666
2667 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2668 vnetp->instance, __func__);
2669 return (EINVAL);
2670 }
2671
2672 static int
vnet_remmac(void * arg,const uint8_t * mac_addr)2673 vnet_remmac(void *arg, const uint8_t *mac_addr)
2674 {
2675 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2676 vnet_t *vnetp;
2677
2678 vnetp = rx_grp->vnetp;
2679
2680 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2681 return (0);
2682 }
2683
2684 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2685 vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2686 return (EINVAL);
2687 }
2688
2689 int
vnet_hio_mac_init(vnet_t * vnetp,char * ifname)2690 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2691 {
2692 mac_handle_t mh;
2693 mac_client_handle_t mch = NULL;
2694 mac_unicast_handle_t muh = NULL;
2695 mac_diag_t diag;
2696 mac_register_t *macp;
2697 char client_name[MAXNAMELEN];
2698 int rv;
2699 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE |
2700 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2701 vio_net_callbacks_t vcb;
2702 ether_addr_t rem_addr =
2703 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2704 uint32_t retries = 0;
2705
2706 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2707 return (EAGAIN);
2708 }
2709
2710 do {
2711 rv = mac_open_by_linkname(ifname, &mh);
2712 if (rv == 0) {
2713 break;
2714 }
2715 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2716 mac_free(macp);
2717 return (rv);
2718 }
2719 drv_usecwait(vnet_mac_open_delay);
2720 } while (rv == ENOENT);
2721
2722 vnetp->hio_mh = mh;
2723
2724 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2725 ifname);
2726 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2727 if (rv != 0) {
2728 goto fail;
2729 }
2730 vnetp->hio_mch = mch;
2731
2732 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2733 &diag);
2734 if (rv != 0) {
2735 goto fail;
2736 }
2737 vnetp->hio_muh = muh;
2738
2739 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2740 macp->m_driver = vnetp;
2741 macp->m_dip = NULL;
2742 macp->m_src_addr = NULL;
2743 macp->m_callbacks = &vnet_hio_res_callbacks;
2744 macp->m_min_sdu = 0;
2745 macp->m_max_sdu = ETHERMTU;
2746
2747 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2748 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2749 if (rv != 0) {
2750 goto fail;
2751 }
2752 mac_free(macp);
2753
2754 /* add the recv callback */
2755 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2756
2757 return (0);
2758
2759 fail:
2760 mac_free(macp);
2761 vnet_hio_mac_cleanup(vnetp);
2762 return (1);
2763 }
2764
2765 void
vnet_hio_mac_cleanup(vnet_t * vnetp)2766 vnet_hio_mac_cleanup(vnet_t *vnetp)
2767 {
2768 if (vnetp->hio_vhp != NULL) {
2769 vio_net_resource_unreg(vnetp->hio_vhp);
2770 vnetp->hio_vhp = NULL;
2771 }
2772
2773 if (vnetp->hio_muh != NULL) {
2774 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2775 vnetp->hio_muh = NULL;
2776 }
2777
2778 if (vnetp->hio_mch != NULL) {
2779 mac_client_close(vnetp->hio_mch, 0);
2780 vnetp->hio_mch = NULL;
2781 }
2782
2783 if (vnetp->hio_mh != NULL) {
2784 mac_close(vnetp->hio_mh);
2785 vnetp->hio_mh = NULL;
2786 }
2787 }
2788
2789 /* Bind pseudo rings to hwrings */
2790 static int
vnet_bind_hwrings(vnet_t * vnetp)2791 vnet_bind_hwrings(vnet_t *vnetp)
2792 {
2793 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS];
2794 mac_perim_handle_t mph1;
2795 vnet_pseudo_rx_group_t *rx_grp;
2796 vnet_pseudo_rx_ring_t *rx_ringp;
2797 vnet_pseudo_tx_group_t *tx_grp;
2798 vnet_pseudo_tx_ring_t *tx_ringp;
2799 int hw_ring_cnt;
2800 int i;
2801 int rv;
2802
2803 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2804
2805 /* Get the list of the underlying RX rings. */
2806 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2807 MAC_RING_TYPE_RX);
2808
2809 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2810 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2811 cmn_err(CE_WARN,
2812 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2813 vnetp->instance, hw_ring_cnt);
2814 goto fail;
2815 }
2816
2817 if (vnetp->rx_hwgh != NULL) {
2818 /*
2819 * Quiesce the HW ring and the mac srs on the ring. Note
2820 * that the HW ring will be restarted when the pseudo ring
2821 * is started. At that time all the packets will be
2822 * directly passed up to the pseudo RX ring and handled
2823 * by mac srs created over the pseudo RX ring.
2824 */
2825 mac_rx_client_quiesce(vnetp->hio_mch);
2826 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2827 }
2828
2829 /*
2830 * Bind the pseudo rings to the hwrings and start the hwrings.
2831 * Note we don't need to register these with the upper mac, as we have
2832 * statically exported these pseudo rxrings which are reserved for
2833 * rxrings of Hybrid resource.
2834 */
2835 rx_grp = &vnetp->rx_grp[0];
2836 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2837 /* Pick the rxrings reserved for Hybrid resource */
2838 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2839
2840 /* Store the hw ring handle */
2841 rx_ringp->hw_rh = hw_rh[i];
2842
2843 /* Bind the pseudo ring to the underlying hwring */
2844 mac_hwring_setup(rx_ringp->hw_rh,
2845 (mac_resource_handle_t)rx_ringp, NULL);
2846
2847 /* Start the hwring if needed */
2848 if (rx_ringp->state & VNET_RXRING_STARTED) {
2849 rv = mac_hwring_start(rx_ringp->hw_rh);
2850 if (rv != 0) {
2851 mac_hwring_teardown(rx_ringp->hw_rh);
2852 rx_ringp->hw_rh = NULL;
2853 goto fail;
2854 }
2855 }
2856 }
2857
2858 /* Get the list of the underlying TX rings. */
2859 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2860 MAC_RING_TYPE_TX);
2861
2862 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2863 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2864 cmn_err(CE_WARN,
2865 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2866 vnetp->instance, hw_ring_cnt);
2867 goto fail;
2868 }
2869
2870 /*
2871 * Now map the pseudo txrings to the hw txrings. Note we don't need
2872 * to register these with the upper mac, as we have statically exported
2873 * these rings. Note that these rings will continue to be used for LDC
2874 * resources to peer vnets and vswitch (shared ring).
2875 */
2876 tx_grp = &vnetp->tx_grp[0];
2877 for (i = 0; i < tx_grp->ring_cnt; i++) {
2878 tx_ringp = &tx_grp->rings[i];
2879 tx_ringp->hw_rh = hw_rh[i];
2880 tx_ringp->state |= VNET_TXRING_HYBRID;
2881 }
2882 tx_grp->tx_notify_handle =
2883 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2884
2885 mac_perim_exit(mph1);
2886 return (0);
2887
2888 fail:
2889 mac_perim_exit(mph1);
2890 vnet_unbind_hwrings(vnetp);
2891 return (1);
2892 }
2893
2894 /* Unbind pseudo rings from hwrings */
2895 static void
vnet_unbind_hwrings(vnet_t * vnetp)2896 vnet_unbind_hwrings(vnet_t *vnetp)
2897 {
2898 mac_perim_handle_t mph1;
2899 vnet_pseudo_rx_ring_t *rx_ringp;
2900 vnet_pseudo_rx_group_t *rx_grp;
2901 vnet_pseudo_tx_group_t *tx_grp;
2902 vnet_pseudo_tx_ring_t *tx_ringp;
2903 int i;
2904
2905 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2906
2907 tx_grp = &vnetp->tx_grp[0];
2908 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2909 tx_ringp = &tx_grp->rings[i];
2910 if (tx_ringp->state & VNET_TXRING_HYBRID) {
2911 tx_ringp->state &= ~VNET_TXRING_HYBRID;
2912 tx_ringp->hw_rh = NULL;
2913 }
2914 }
2915 (void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2916 tx_grp->tx_notify_handle);
2917
2918 rx_grp = &vnetp->rx_grp[0];
2919 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2920 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2921 if (rx_ringp->hw_rh != NULL) {
2922 /* Stop the hwring */
2923 mac_hwring_stop(rx_ringp->hw_rh);
2924
2925 /* Teardown the hwring */
2926 mac_hwring_teardown(rx_ringp->hw_rh);
2927 rx_ringp->hw_rh = NULL;
2928 }
2929 }
2930
2931 if (vnetp->rx_hwgh != NULL) {
2932 vnetp->rx_hwgh = NULL;
2933 /*
2934 * First clear the permanent-quiesced flag of the RX srs then
2935 * restart the HW ring and the mac srs on the ring.
2936 */
2937 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2938 mac_rx_client_restart(vnetp->hio_mch);
2939 }
2940
2941 mac_perim_exit(mph1);
2942 }
2943
2944 /* Bind pseudo ring to a LDC resource */
2945 static int
vnet_bind_vgenring(vnet_res_t * vresp)2946 vnet_bind_vgenring(vnet_res_t *vresp)
2947 {
2948 vnet_t *vnetp;
2949 vnet_pseudo_rx_group_t *rx_grp;
2950 vnet_pseudo_rx_ring_t *rx_ringp;
2951 mac_perim_handle_t mph1;
2952 int rv;
2953 int type;
2954
2955 vnetp = vresp->vnetp;
2956 type = vresp->type;
2957 rx_grp = &vnetp->rx_grp[0];
2958
2959 if (type == VIO_NET_RES_LDC_SERVICE) {
2960 /*
2961 * Ring Index 0 is the default ring in the group and is
2962 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2963 * is allocated statically and is reported to the mac layer
2964 * in vnet_m_capab(). So, all we need to do here, is save a
2965 * reference to the associated vresp.
2966 */
2967 rx_ringp = &rx_grp->rings[0];
2968 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2969 vresp->rx_ringp = (void *)rx_ringp;
2970 return (0);
2971 }
2972 ASSERT(type == VIO_NET_RES_LDC_GUEST);
2973
2974 mac_perim_enter_by_mh(vnetp->mh, &mph1);
2975
2976 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2977 if (rx_ringp == NULL) {
2978 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2979 vnetp->instance);
2980 goto fail;
2981 }
2982
2983 /* Store the LDC resource itself as the ring handle */
2984 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2985
2986 /*
2987 * Save a reference to the ring in the resource for lookup during
2988 * unbind. Note this is only done for LDC resources. We don't need this
2989 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2990 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2991 */
2992 vresp->rx_ringp = (void *)rx_ringp;
2993 rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2994
2995 /* Register the pseudo ring with upper-mac */
2996 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2997 if (rv != 0) {
2998 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
2999 rx_ringp->hw_rh = NULL;
3000 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3001 goto fail;
3002 }
3003
3004 mac_perim_exit(mph1);
3005 return (0);
3006 fail:
3007 mac_perim_exit(mph1);
3008 return (1);
3009 }
3010
3011 /* Unbind pseudo ring from a LDC resource */
3012 static void
vnet_unbind_vgenring(vnet_res_t * vresp)3013 vnet_unbind_vgenring(vnet_res_t *vresp)
3014 {
3015 vnet_t *vnetp;
3016 vnet_pseudo_rx_group_t *rx_grp;
3017 vnet_pseudo_rx_ring_t *rx_ringp;
3018 mac_perim_handle_t mph1;
3019 int type;
3020
3021 vnetp = vresp->vnetp;
3022 type = vresp->type;
3023 rx_grp = &vnetp->rx_grp[0];
3024
3025 if (vresp->rx_ringp == NULL) {
3026 return;
3027 }
3028
3029 if (type == VIO_NET_RES_LDC_SERVICE) {
3030 /*
3031 * Ring Index 0 is the default ring in the group and is
3032 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3033 * is allocated statically and is reported to the mac layer
3034 * in vnet_m_capab(). So, all we need to do here, is remove its
3035 * reference to the associated vresp.
3036 */
3037 rx_ringp = &rx_grp->rings[0];
3038 rx_ringp->hw_rh = NULL;
3039 vresp->rx_ringp = NULL;
3040 return;
3041 }
3042 ASSERT(type == VIO_NET_RES_LDC_GUEST);
3043
3044 mac_perim_enter_by_mh(vnetp->mh, &mph1);
3045
3046 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3047 vresp->rx_ringp = NULL;
3048
3049 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3050 /* Unregister the pseudo ring with upper-mac */
3051 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3052
3053 rx_ringp->hw_rh = NULL;
3054 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3055
3056 /* Free the pseudo rx ring */
3057 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3058 }
3059
3060 mac_perim_exit(mph1);
3061 }
3062
3063 static void
vnet_unbind_rings(vnet_res_t * vresp)3064 vnet_unbind_rings(vnet_res_t *vresp)
3065 {
3066 switch (vresp->type) {
3067
3068 case VIO_NET_RES_LDC_SERVICE:
3069 case VIO_NET_RES_LDC_GUEST:
3070 vnet_unbind_vgenring(vresp);
3071 break;
3072
3073 case VIO_NET_RES_HYBRID:
3074 vnet_unbind_hwrings(vresp->vnetp);
3075 break;
3076
3077 default:
3078 break;
3079
3080 }
3081 }
3082
3083 static int
vnet_bind_rings(vnet_res_t * vresp)3084 vnet_bind_rings(vnet_res_t *vresp)
3085 {
3086 int rv;
3087
3088 switch (vresp->type) {
3089
3090 case VIO_NET_RES_LDC_SERVICE:
3091 case VIO_NET_RES_LDC_GUEST:
3092 rv = vnet_bind_vgenring(vresp);
3093 break;
3094
3095 case VIO_NET_RES_HYBRID:
3096 rv = vnet_bind_hwrings(vresp->vnetp);
3097 break;
3098
3099 default:
3100 rv = 1;
3101 break;
3102
3103 }
3104
3105 return (rv);
3106 }
3107
3108 /* ARGSUSED */
3109 int
vnet_hio_stat(void * arg,uint_t stat,uint64_t * val)3110 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3111 {
3112 vnet_t *vnetp = (vnet_t *)arg;
3113
3114 *val = mac_stat_get(vnetp->hio_mh, stat);
3115 return (0);
3116 }
3117
3118 /*
3119 * The start() and stop() routines for the Hybrid resource below, are just
3120 * dummy functions. This is provided to avoid resource type specific code in
3121 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3122 * of the Hybrid resource happens in the context of the mac_client interfaces
3123 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3124 */
3125 /* ARGSUSED */
3126 static int
vnet_hio_start(void * arg)3127 vnet_hio_start(void *arg)
3128 {
3129 return (0);
3130 }
3131
3132 /* ARGSUSED */
3133 static void
vnet_hio_stop(void * arg)3134 vnet_hio_stop(void *arg)
3135 {
3136 }
3137
3138 mblk_t *
vnet_hio_tx(void * arg,mblk_t * mp)3139 vnet_hio_tx(void *arg, mblk_t *mp)
3140 {
3141 vnet_pseudo_tx_ring_t *tx_ringp;
3142 mblk_t *nextp;
3143 mblk_t *ret_mp;
3144
3145 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3146 for (;;) {
3147 nextp = mp->b_next;
3148 mp->b_next = NULL;
3149
3150 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3151 if (ret_mp != NULL) {
3152 ret_mp->b_next = nextp;
3153 mp = ret_mp;
3154 break;
3155 }
3156
3157 if ((mp = nextp) == NULL)
3158 break;
3159 }
3160 return (mp);
3161 }
3162
3163 #ifdef VNET_IOC_DEBUG
3164
3165 /*
3166 * The ioctl entry point is used only for debugging for now. The ioctl commands
3167 * can be used to force the link state of the channel connected to vsw.
3168 */
3169 static void
vnet_m_ioctl(void * arg,queue_t * q,mblk_t * mp)3170 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3171 {
3172 struct iocblk *iocp;
3173 vnet_t *vnetp;
3174
3175 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3176 iocp->ioc_error = 0;
3177 vnetp = (vnet_t *)arg;
3178
3179 if (vnetp == NULL) {
3180 miocnak(q, mp, 0, EINVAL);
3181 return;
3182 }
3183
3184 switch (iocp->ioc_cmd) {
3185
3186 case VNET_FORCE_LINK_DOWN:
3187 case VNET_FORCE_LINK_UP:
3188 vnet_force_link_state(vnetp, q, mp);
3189 break;
3190
3191 default:
3192 iocp->ioc_error = EINVAL;
3193 miocnak(q, mp, 0, iocp->ioc_error);
3194 break;
3195
3196 }
3197 }
3198
3199 static void
vnet_force_link_state(vnet_t * vnetp,queue_t * q,mblk_t * mp)3200 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3201 {
3202 mac_register_t *macp;
3203 mac_callbacks_t *cbp;
3204 vnet_res_t *vresp;
3205
3206 READ_ENTER(&vnetp->vsw_fp_rw);
3207
3208 vresp = vnetp->vsw_fp;
3209 if (vresp == NULL) {
3210 RW_EXIT(&vnetp->vsw_fp_rw);
3211 return;
3212 }
3213
3214 macp = &vresp->macreg;
3215 cbp = macp->m_callbacks;
3216 cbp->mc_ioctl(macp->m_driver, q, mp);
3217
3218 RW_EXIT(&vnetp->vsw_fp_rw);
3219 }
3220
3221 #else
3222
3223 static void
vnet_m_ioctl(void * arg,queue_t * q,mblk_t * mp)3224 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3225 {
3226 vnet_t *vnetp;
3227
3228 vnetp = (vnet_t *)arg;
3229
3230 if (vnetp == NULL) {
3231 miocnak(q, mp, 0, EINVAL);
3232 return;
3233 }
3234
3235 /* ioctl support only for debugging */
3236 miocnak(q, mp, 0, ENOTSUP);
3237 }
3238
3239 #endif
3240