1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2018 Joyent, Inc.
26 */
27
28 #include <sys/types.h>
29 #include <sys/errno.h>
30 #include <sys/param.h>
31 #include <sys/callb.h>
32 #include <sys/stream.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/modhash.h>
40 #include <sys/debug.h>
41 #include <sys/ethernet.h>
42 #include <sys/dlpi.h>
43 #include <net/if.h>
44 #include <sys/mac_provider.h>
45 #include <sys/mac_client.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/mac_ether.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/strsun.h>
51 #include <sys/note.h>
52 #include <sys/atomic.h>
53 #include <sys/vnet.h>
54 #include <sys/vlan.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vnet_common.h>
57 #include <sys/dds.h>
58 #include <sys/strsubr.h>
59 #include <sys/taskq.h>
60
61 /*
62 * Function prototypes.
63 */
64
65 /* DDI entrypoints */
66 static int vnetdevinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
67 static int vnetattach(dev_info_t *, ddi_attach_cmd_t);
68 static int vnetdetach(dev_info_t *, ddi_detach_cmd_t);
69
70 /* MAC entrypoints */
71 static int vnet_m_stat(void *, uint_t, uint64_t *);
72 static int vnet_m_start(void *);
73 static void vnet_m_stop(void *);
74 static int vnet_m_promisc(void *, boolean_t);
75 static int vnet_m_multicst(void *, boolean_t, const uint8_t *);
76 static int vnet_m_unicst(void *, const uint8_t *);
77 mblk_t *vnet_m_tx(void *, mblk_t *);
78 static void vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp);
79 #ifdef VNET_IOC_DEBUG
80 static void vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp);
81 #endif
82 static boolean_t vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data);
83 static void vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
84 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle);
85 static void vnet_get_group(void *arg, mac_ring_type_t type, const int index,
86 mac_group_info_t *infop, mac_group_handle_t handle);
87 static int vnet_rx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
88 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver);
89 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
90 uint64_t *val);
91 static int vnet_tx_ring_start(mac_ring_driver_t rdriver, uint64_t mr_gen_num);
92 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver);
93 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat,
94 uint64_t *val);
95 static int vnet_ring_enable_intr(void *arg);
96 static int vnet_ring_disable_intr(void *arg);
97 static mblk_t *vnet_rx_poll(void *arg, int bytes_to_pickup);
98 static int vnet_addmac(void *arg, const uint8_t *mac_addr);
99 static int vnet_remmac(void *arg, const uint8_t *mac_addr);
100
101 /* vnet internal functions */
102 static int vnet_unattach(vnet_t *vnetp);
103 static void vnet_ring_grp_init(vnet_t *vnetp);
104 static void vnet_ring_grp_uninit(vnet_t *vnetp);
105 static int vnet_mac_register(vnet_t *);
106 static int vnet_read_mac_address(vnet_t *vnetp);
107 static int vnet_bind_vgenring(vnet_res_t *vresp);
108 static void vnet_unbind_vgenring(vnet_res_t *vresp);
109 static int vnet_bind_hwrings(vnet_t *vnetp);
110 static void vnet_unbind_hwrings(vnet_t *vnetp);
111 static int vnet_bind_rings(vnet_res_t *vresp);
112 static void vnet_unbind_rings(vnet_res_t *vresp);
113 static int vnet_hio_stat(void *, uint_t, uint64_t *);
114 static int vnet_hio_start(void *);
115 static void vnet_hio_stop(void *);
116 mblk_t *vnet_hio_tx(void *, mblk_t *);
117
118 /* Forwarding database (FDB) routines */
119 static void vnet_fdb_create(vnet_t *vnetp);
120 static void vnet_fdb_destroy(vnet_t *vnetp);
121 static vnet_res_t *vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp);
122 static void vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
123 void vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp);
124 static void vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp);
125
126 static void vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp);
127 static void vnet_rx(vio_net_handle_t vrh, mblk_t *mp);
128 static void vnet_tx_update(vio_net_handle_t vrh);
129 static void vnet_res_start_task(void *arg);
130 static void vnet_start_resources(vnet_t *vnetp);
131 static void vnet_stop_resources(vnet_t *vnetp);
132 static void vnet_dispatch_res_task(vnet_t *vnetp);
133 static void vnet_res_start_task(void *arg);
134 static void vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err);
135 static void vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp);
136 static vnet_res_t *vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp);
137 static void vnet_tx_notify_thread(void *);
138
139 /* Exported to vnet_gen */
140 int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
141 void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
142 void vnet_dds_cleanup_hio(vnet_t *vnetp);
143
144 static kstat_t *vnet_hio_setup_kstats(char *ks_mod, char *ks_name,
145 vnet_res_t *vresp);
146 static int vnet_hio_update_kstats(kstat_t *ksp, int rw);
147 static void vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp);
148 static void vnet_hio_destroy_kstats(kstat_t *ksp);
149
150 /* Exported to to vnet_dds */
151 int vnet_send_dds_msg(vnet_t *vnetp, void *dmsg);
152 int vnet_hio_mac_init(vnet_t *vnetp, char *ifname);
153 void vnet_hio_mac_cleanup(vnet_t *vnetp);
154
155 /* Externs that are imported from vnet_gen */
156 extern int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
157 const uint8_t *macaddr, void **vgenhdl);
158 extern int vgen_init_mdeg(void *arg);
159 extern void vgen_uninit(void *arg);
160 extern int vgen_dds_tx(void *arg, void *dmsg);
161 extern int vgen_enable_intr(void *arg);
162 extern int vgen_disable_intr(void *arg);
163 extern mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
164
165 /* Externs that are imported from vnet_dds */
166 extern void vdds_mod_init(void);
167 extern void vdds_mod_fini(void);
168 extern int vdds_init(vnet_t *vnetp);
169 extern void vdds_cleanup(vnet_t *vnetp);
170 extern void vdds_process_dds_msg(vnet_t *vnetp, vio_dds_msg_t *dmsg);
171 extern void vdds_cleanup_hybrid_res(void *arg);
172 extern void vdds_cleanup_hio(vnet_t *vnetp);
173
174 extern pri_t minclsyspri;
175
176 #define DRV_NAME "vnet"
177 #define VNET_FDBE_REFHOLD(p) \
178 { \
179 atomic_inc_32(&(p)->refcnt); \
180 ASSERT((p)->refcnt != 0); \
181 }
182
183 #define VNET_FDBE_REFRELE(p) \
184 { \
185 ASSERT((p)->refcnt != 0); \
186 atomic_dec_32(&(p)->refcnt); \
187 }
188
189 #ifdef VNET_IOC_DEBUG
190 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB)
191 #else
192 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB)
193 #endif
194
195 static mac_callbacks_t vnet_m_callbacks = {
196 VNET_M_CALLBACK_FLAGS,
197 vnet_m_stat,
198 vnet_m_start,
199 vnet_m_stop,
200 vnet_m_promisc,
201 vnet_m_multicst,
202 NULL, /* m_unicst entry must be NULL while rx rings are exposed */
203 NULL, /* m_tx entry must be NULL while tx rings are exposed */
204 NULL,
205 vnet_m_ioctl,
206 vnet_m_capab,
207 NULL
208 };
209
210 static mac_callbacks_t vnet_hio_res_callbacks = {
211 0,
212 vnet_hio_stat,
213 vnet_hio_start,
214 vnet_hio_stop,
215 NULL,
216 NULL,
217 NULL,
218 vnet_hio_tx,
219 NULL,
220 NULL,
221 NULL
222 };
223
224 /*
225 * Linked list of "vnet_t" structures - one per instance.
226 */
227 static vnet_t *vnet_headp = NULL;
228 static krwlock_t vnet_rw;
229
230 /* Tunables */
231 uint32_t vnet_num_descriptors = VNET_NUM_DESCRIPTORS;
232
233 /*
234 * Configure tx serialization in mac layer for the vnet device. This tunable
235 * should be enabled to improve performance only if HybridIO is configured for
236 * the vnet device.
237 */
238 boolean_t vnet_mac_tx_serialize = B_FALSE;
239
240 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
241 boolean_t vnet_mac_rx_queuing = B_TRUE;
242
243 /*
244 * Set this to non-zero to enable additional internal receive buffer pools
245 * based on the MTU of the device for better performance at the cost of more
246 * memory consumption. This is turned off by default, to use allocb(9F) for
247 * receive buffer allocations of sizes > 2K.
248 */
249 boolean_t vnet_jumbo_rxpools = B_FALSE;
250
251 /* # of chains in fdb hash table */
252 uint32_t vnet_fdb_nchains = VNET_NFDB_HASH;
253
254 /* Internal tunables */
255 uint32_t vnet_ethermtu = 1500; /* mtu of the device */
256
257 /*
258 * Default vlan id. This is only used internally when the "default-vlan-id"
259 * property is not present in the MD device node. Therefore, this should not be
260 * used as a tunable; if this value is changed, the corresponding variable
261 * should be updated to the same value in vsw and also other vnets connected to
262 * the same vsw.
263 */
264 uint16_t vnet_default_vlan_id = 1;
265
266 /* delay in usec to wait for all references on a fdb entry to be dropped */
267 uint32_t vnet_fdbe_refcnt_delay = 10;
268
269 static struct ether_addr etherbroadcastaddr = {
270 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
271 };
272
273 /* mac_open() retry delay in usec */
274 uint32_t vnet_mac_open_delay = 100; /* 0.1 ms */
275
276 /* max # of mac_open() retries */
277 uint32_t vnet_mac_open_retries = 100;
278
279 /*
280 * Property names
281 */
282 static char macaddr_propname[] = "local-mac-address";
283
284 /*
285 * This is the string displayed by modinfo(8).
286 */
287 static char vnet_ident[] = "vnet driver";
288 extern struct mod_ops mod_driverops;
289 static struct cb_ops cb_vnetops = {
290 nulldev, /* cb_open */
291 nulldev, /* cb_close */
292 nodev, /* cb_strategy */
293 nodev, /* cb_print */
294 nodev, /* cb_dump */
295 nodev, /* cb_read */
296 nodev, /* cb_write */
297 nodev, /* cb_ioctl */
298 nodev, /* cb_devmap */
299 nodev, /* cb_mmap */
300 nodev, /* cb_segmap */
301 nochpoll, /* cb_chpoll */
302 ddi_prop_op, /* cb_prop_op */
303 NULL, /* cb_stream */
304 (int)(D_MP) /* cb_flag */
305 };
306
307 static struct dev_ops vnetops = {
308 DEVO_REV, /* devo_rev */
309 0, /* devo_refcnt */
310 NULL, /* devo_getinfo */
311 nulldev, /* devo_identify */
312 nulldev, /* devo_probe */
313 vnetattach, /* devo_attach */
314 vnetdetach, /* devo_detach */
315 nodev, /* devo_reset */
316 &cb_vnetops, /* devo_cb_ops */
317 (struct bus_ops *)NULL, /* devo_bus_ops */
318 NULL, /* devo_power */
319 ddi_quiesce_not_supported, /* devo_quiesce */
320 };
321
322 static struct modldrv modldrv = {
323 &mod_driverops, /* Type of module. This one is a driver */
324 vnet_ident, /* ID string */
325 &vnetops /* driver specific ops */
326 };
327
328 static struct modlinkage modlinkage = {
329 MODREV_1, (void *)&modldrv, NULL
330 };
331
332 #ifdef DEBUG
333
334 #define DEBUG_PRINTF debug_printf
335
336 /*
337 * Print debug messages - set to 0xf to enable all msgs
338 */
339 int vnet_dbglevel = 0x8;
340
341 static void
debug_printf(const char * fname,void * arg,const char * fmt,...)342 debug_printf(const char *fname, void *arg, const char *fmt, ...)
343 {
344 char buf[512];
345 va_list ap;
346 vnet_t *vnetp = (vnet_t *)arg;
347 char *bufp = buf;
348
349 if (vnetp == NULL) {
350 (void) sprintf(bufp, "%s: ", fname);
351 bufp += strlen(bufp);
352 } else {
353 (void) sprintf(bufp, "vnet%d:%s: ", vnetp->instance, fname);
354 bufp += strlen(bufp);
355 }
356 va_start(ap, fmt);
357 (void) vsprintf(bufp, fmt, ap);
358 va_end(ap);
359 cmn_err(CE_CONT, "%s\n", buf);
360 }
361
362 #endif
363
364 /* _init(9E): initialize the loadable module */
365 int
_init(void)366 _init(void)
367 {
368 int status;
369
370 DBG1(NULL, "enter\n");
371
372 mac_init_ops(&vnetops, "vnet");
373 status = mod_install(&modlinkage);
374 if (status != 0) {
375 mac_fini_ops(&vnetops);
376 }
377 vdds_mod_init();
378 DBG1(NULL, "exit(%d)\n", status);
379 return (status);
380 }
381
382 /* _fini(9E): prepare the module for unloading. */
383 int
_fini(void)384 _fini(void)
385 {
386 int status;
387
388 DBG1(NULL, "enter\n");
389
390 status = mod_remove(&modlinkage);
391 if (status != 0)
392 return (status);
393 mac_fini_ops(&vnetops);
394 vdds_mod_fini();
395
396 DBG1(NULL, "exit(%d)\n", status);
397 return (status);
398 }
399
400 /* _info(9E): return information about the loadable module */
401 int
_info(struct modinfo * modinfop)402 _info(struct modinfo *modinfop)
403 {
404 return (mod_info(&modlinkage, modinfop));
405 }
406
407 /*
408 * attach(9E): attach a device to the system.
409 * called once for each instance of the device on the system.
410 */
411 static int
vnetattach(dev_info_t * dip,ddi_attach_cmd_t cmd)412 vnetattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
413 {
414 vnet_t *vnetp;
415 int status;
416 int instance;
417 uint64_t reg;
418 char qname[TASKQ_NAMELEN];
419 vnet_attach_progress_t attach_progress;
420
421 attach_progress = AST_init;
422
423 switch (cmd) {
424 case DDI_ATTACH:
425 break;
426 case DDI_RESUME:
427 case DDI_PM_RESUME:
428 default:
429 goto vnet_attach_fail;
430 }
431
432 instance = ddi_get_instance(dip);
433 DBG1(NULL, "instance(%d) enter\n", instance);
434
435 /* allocate vnet_t and mac_t structures */
436 vnetp = kmem_zalloc(sizeof (vnet_t), KM_SLEEP);
437 vnetp->dip = dip;
438 vnetp->instance = instance;
439 rw_init(&vnetp->vrwlock, NULL, RW_DRIVER, NULL);
440 rw_init(&vnetp->vsw_fp_rw, NULL, RW_DRIVER, NULL);
441 attach_progress |= AST_vnet_alloc;
442
443 vnet_ring_grp_init(vnetp);
444 attach_progress |= AST_ring_init;
445
446 status = vdds_init(vnetp);
447 if (status != 0) {
448 goto vnet_attach_fail;
449 }
450 attach_progress |= AST_vdds_init;
451
452 /* setup links to vnet_t from both devinfo and mac_t */
453 ddi_set_driver_private(dip, (caddr_t)vnetp);
454
455 /* read the mac address */
456 status = vnet_read_mac_address(vnetp);
457 if (status != DDI_SUCCESS) {
458 goto vnet_attach_fail;
459 }
460 attach_progress |= AST_read_macaddr;
461
462 reg = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
463 DDI_PROP_DONTPASS, "reg", -1);
464 if (reg == -1) {
465 goto vnet_attach_fail;
466 }
467 vnetp->reg = reg;
468
469 vnet_fdb_create(vnetp);
470 attach_progress |= AST_fdbh_alloc;
471
472 (void) snprintf(qname, TASKQ_NAMELEN, "vres_taskq%d", instance);
473 if ((vnetp->taskqp = ddi_taskq_create(dip, qname, 1,
474 TASKQ_DEFAULTPRI, 0)) == NULL) {
475 cmn_err(CE_WARN, "!vnet%d: Unable to create task queue",
476 instance);
477 goto vnet_attach_fail;
478 }
479 attach_progress |= AST_taskq_create;
480
481 /* add to the list of vnet devices */
482 WRITE_ENTER(&vnet_rw);
483 vnetp->nextp = vnet_headp;
484 vnet_headp = vnetp;
485 RW_EXIT(&vnet_rw);
486
487 attach_progress |= AST_vnet_list;
488
489 /*
490 * Initialize the generic vnet plugin which provides communication via
491 * sun4v LDC (logical domain channel) based resources. This involves 2
492 * steps; first, vgen_init() is invoked to read the various properties
493 * of the vnet device from its MD node (including its mtu which is
494 * needed to mac_register()) and obtain a handle to the vgen layer.
495 * After mac_register() is done and we have a mac handle, we then
496 * invoke vgen_init_mdeg() which registers with the the MD event
497 * generator (mdeg) framework to allow LDC resource notifications.
498 * Note: this sequence also allows us to report the correct default #
499 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
500 * in the context of mac_register(); and avoids conflicting with
501 * dynamic pseudo rx rings which get added/removed as a result of mdeg
502 * events in vgen.
503 */
504 status = vgen_init(vnetp, reg, vnetp->dip,
505 (uint8_t *)vnetp->curr_macaddr, &vnetp->vgenhdl);
506 if (status != DDI_SUCCESS) {
507 DERR(vnetp, "vgen_init() failed\n");
508 goto vnet_attach_fail;
509 }
510 attach_progress |= AST_vgen_init;
511
512 status = vnet_mac_register(vnetp);
513 if (status != DDI_SUCCESS) {
514 goto vnet_attach_fail;
515 }
516 vnetp->link_state = LINK_STATE_UNKNOWN;
517 attach_progress |= AST_macreg;
518
519 status = vgen_init_mdeg(vnetp->vgenhdl);
520 if (status != DDI_SUCCESS) {
521 goto vnet_attach_fail;
522 }
523 attach_progress |= AST_init_mdeg;
524
525 vnetp->attach_progress = attach_progress;
526
527 DBG1(NULL, "instance(%d) exit\n", instance);
528 return (DDI_SUCCESS);
529
530 vnet_attach_fail:
531 vnetp->attach_progress = attach_progress;
532 status = vnet_unattach(vnetp);
533 ASSERT(status == 0);
534 return (DDI_FAILURE);
535 }
536
537 /*
538 * detach(9E): detach a device from the system.
539 */
540 static int
vnetdetach(dev_info_t * dip,ddi_detach_cmd_t cmd)541 vnetdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
542 {
543 vnet_t *vnetp;
544 int instance;
545
546 instance = ddi_get_instance(dip);
547 DBG1(NULL, "instance(%d) enter\n", instance);
548
549 vnetp = ddi_get_driver_private(dip);
550 if (vnetp == NULL) {
551 goto vnet_detach_fail;
552 }
553
554 switch (cmd) {
555 case DDI_DETACH:
556 break;
557 case DDI_SUSPEND:
558 case DDI_PM_SUSPEND:
559 default:
560 goto vnet_detach_fail;
561 }
562
563 if (vnet_unattach(vnetp) != 0) {
564 goto vnet_detach_fail;
565 }
566
567 return (DDI_SUCCESS);
568
569 vnet_detach_fail:
570 return (DDI_FAILURE);
571 }
572
573 /*
574 * Common routine to handle vnetattach() failure and vnetdetach(). Note that
575 * the only reason this function could fail is if mac_unregister() fails.
576 * Otherwise, this function must ensure that all resources are freed and return
577 * success.
578 */
579 static int
vnet_unattach(vnet_t * vnetp)580 vnet_unattach(vnet_t *vnetp)
581 {
582 vnet_attach_progress_t attach_progress;
583
584 attach_progress = vnetp->attach_progress;
585
586 /*
587 * Disable the mac device in the gldv3 subsystem. This can fail, in
588 * particular if there are still any open references to this mac
589 * device; in which case we just return failure without continuing to
590 * detach further.
591 * If it succeeds, we then invoke vgen_uninit() which should unregister
592 * any pseudo rings registered with the mac layer. Note we keep the
593 * AST_macreg flag on, so we can unregister with the mac layer at
594 * the end of this routine.
595 */
596 if (attach_progress & AST_macreg) {
597 if (mac_disable(vnetp->mh) != 0) {
598 return (1);
599 }
600 }
601
602 /*
603 * Now that we have disabled the device, we must finish all other steps
604 * and successfully return from this function; otherwise we will end up
605 * leaving the device in a broken/unusable state.
606 *
607 * First, release any hybrid resources assigned to this vnet device.
608 */
609 if (attach_progress & AST_vdds_init) {
610 vdds_cleanup(vnetp);
611 attach_progress &= ~AST_vdds_init;
612 }
613
614 /*
615 * Uninit vgen. This stops further mdeg callbacks to this vnet
616 * device and/or its ports; and detaches any existing ports.
617 */
618 if (attach_progress & (AST_vgen_init|AST_init_mdeg)) {
619 vgen_uninit(vnetp->vgenhdl);
620 attach_progress &= ~AST_vgen_init;
621 attach_progress &= ~AST_init_mdeg;
622 }
623
624 /* Destroy the taskq. */
625 if (attach_progress & AST_taskq_create) {
626 ddi_taskq_destroy(vnetp->taskqp);
627 attach_progress &= ~AST_taskq_create;
628 }
629
630 /* Destroy fdb. */
631 if (attach_progress & AST_fdbh_alloc) {
632 vnet_fdb_destroy(vnetp);
633 attach_progress &= ~AST_fdbh_alloc;
634 }
635
636 /* Remove from the device list */
637 if (attach_progress & AST_vnet_list) {
638 vnet_t **vnetpp;
639 /* unlink from instance(vnet_t) list */
640 WRITE_ENTER(&vnet_rw);
641 for (vnetpp = &vnet_headp; *vnetpp;
642 vnetpp = &(*vnetpp)->nextp) {
643 if (*vnetpp == vnetp) {
644 *vnetpp = vnetp->nextp;
645 break;
646 }
647 }
648 RW_EXIT(&vnet_rw);
649 attach_progress &= ~AST_vnet_list;
650 }
651
652 if (attach_progress & AST_ring_init) {
653 vnet_ring_grp_uninit(vnetp);
654 attach_progress &= ~AST_ring_init;
655 }
656
657 if (attach_progress & AST_macreg) {
658 VERIFY(mac_unregister(vnetp->mh) == 0);
659 vnetp->mh = NULL;
660 attach_progress &= ~AST_macreg;
661 }
662
663 if (attach_progress & AST_vnet_alloc) {
664 rw_destroy(&vnetp->vrwlock);
665 rw_destroy(&vnetp->vsw_fp_rw);
666 attach_progress &= ~AST_vnet_list;
667 KMEM_FREE(vnetp);
668 }
669
670 return (0);
671 }
672
673 /* enable the device for transmit/receive */
674 static int
vnet_m_start(void * arg)675 vnet_m_start(void *arg)
676 {
677 vnet_t *vnetp = arg;
678
679 DBG1(vnetp, "enter\n");
680
681 WRITE_ENTER(&vnetp->vrwlock);
682 vnetp->flags |= VNET_STARTED;
683 vnet_start_resources(vnetp);
684 RW_EXIT(&vnetp->vrwlock);
685
686 DBG1(vnetp, "exit\n");
687 return (VNET_SUCCESS);
688
689 }
690
691 /* stop transmit/receive for the device */
692 static void
vnet_m_stop(void * arg)693 vnet_m_stop(void *arg)
694 {
695 vnet_t *vnetp = arg;
696
697 DBG1(vnetp, "enter\n");
698
699 WRITE_ENTER(&vnetp->vrwlock);
700 if (vnetp->flags & VNET_STARTED) {
701 /*
702 * Set the flags appropriately; this should prevent starting of
703 * any new resources that are added(see vnet_res_start_task()),
704 * while we release the vrwlock in vnet_stop_resources() before
705 * stopping each resource.
706 */
707 vnetp->flags &= ~VNET_STARTED;
708 vnetp->flags |= VNET_STOPPING;
709 vnet_stop_resources(vnetp);
710 vnetp->flags &= ~VNET_STOPPING;
711 }
712 RW_EXIT(&vnetp->vrwlock);
713
714 DBG1(vnetp, "exit\n");
715 }
716
717 /* set the unicast mac address of the device */
718 static int
vnet_m_unicst(void * arg,const uint8_t * macaddr)719 vnet_m_unicst(void *arg, const uint8_t *macaddr)
720 {
721 _NOTE(ARGUNUSED(macaddr))
722
723 vnet_t *vnetp = arg;
724
725 DBG1(vnetp, "enter\n");
726 /*
727 * NOTE: setting mac address dynamically is not supported.
728 */
729 DBG1(vnetp, "exit\n");
730
731 return (VNET_FAILURE);
732 }
733
734 /* enable/disable a multicast address */
735 static int
vnet_m_multicst(void * arg,boolean_t add,const uint8_t * mca)736 vnet_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
737 {
738 _NOTE(ARGUNUSED(add, mca))
739
740 vnet_t *vnetp = arg;
741 vnet_res_t *vresp;
742 mac_register_t *macp;
743 mac_callbacks_t *cbp;
744 int rv = VNET_SUCCESS;
745
746 DBG1(vnetp, "enter\n");
747
748 READ_ENTER(&vnetp->vsw_fp_rw);
749 if (vnetp->vsw_fp == NULL) {
750 RW_EXIT(&vnetp->vsw_fp_rw);
751 return (EAGAIN);
752 }
753 VNET_FDBE_REFHOLD(vnetp->vsw_fp);
754 RW_EXIT(&vnetp->vsw_fp_rw);
755
756 vresp = vnetp->vsw_fp;
757 macp = &vresp->macreg;
758 cbp = macp->m_callbacks;
759 rv = cbp->mc_multicst(macp->m_driver, add, mca);
760
761 VNET_FDBE_REFRELE(vnetp->vsw_fp);
762
763 DBG1(vnetp, "exit(%d)\n", rv);
764 return (rv);
765 }
766
767 /* set or clear promiscuous mode on the device */
768 static int
vnet_m_promisc(void * arg,boolean_t on)769 vnet_m_promisc(void *arg, boolean_t on)
770 {
771 _NOTE(ARGUNUSED(on))
772
773 vnet_t *vnetp = arg;
774 DBG1(vnetp, "enter\n");
775 /*
776 * NOTE: setting promiscuous mode is not supported, just return success.
777 */
778 DBG1(vnetp, "exit\n");
779 return (VNET_SUCCESS);
780 }
781
782 /*
783 * Transmit a chain of packets. This function provides switching functionality
784 * based on the destination mac address to reach other guests (within ldoms) or
785 * external hosts.
786 */
787 mblk_t *
vnet_tx_ring_send(void * arg,mblk_t * mp)788 vnet_tx_ring_send(void *arg, mblk_t *mp)
789 {
790 vnet_pseudo_tx_ring_t *tx_ringp;
791 vnet_tx_ring_stats_t *statsp;
792 vnet_t *vnetp;
793 vnet_res_t *vresp;
794 mblk_t *next;
795 mblk_t *resid_mp;
796 mac_register_t *macp;
797 struct ether_header *ehp;
798 boolean_t is_unicast;
799 boolean_t is_pvid; /* non-default pvid ? */
800 boolean_t hres; /* Hybrid resource ? */
801 void *tx_arg;
802 size_t size;
803
804 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
805 statsp = &tx_ringp->tx_ring_stats;
806 vnetp = (vnet_t *)tx_ringp->vnetp;
807 DBG1(vnetp, "enter\n");
808 ASSERT(mp != NULL);
809
810 is_pvid = (vnetp->pvid != vnetp->default_vlan_id) ? B_TRUE : B_FALSE;
811
812 while (mp != NULL) {
813
814 next = mp->b_next;
815 mp->b_next = NULL;
816
817 /* update stats */
818 size = msgsize(mp);
819
820 /*
821 * Find fdb entry for the destination
822 * and hold a reference to it.
823 */
824 ehp = (struct ether_header *)mp->b_rptr;
825 vresp = vnet_fdbe_find(vnetp, &ehp->ether_dhost);
826 if (vresp != NULL) {
827
828 /*
829 * Destination found in FDB.
830 * The destination is a vnet device within ldoms
831 * and directly reachable, invoke the tx function
832 * in the fdb entry.
833 */
834 macp = &vresp->macreg;
835 resid_mp = macp->m_callbacks->mc_tx(macp->m_driver, mp);
836
837 /* tx done; now release ref on fdb entry */
838 VNET_FDBE_REFRELE(vresp);
839
840 if (resid_mp != NULL) {
841 /* m_tx failed */
842 mp->b_next = next;
843 break;
844 }
845 } else {
846 is_unicast = !(IS_BROADCAST(ehp) ||
847 (IS_MULTICAST(ehp)));
848 /*
849 * Destination is not in FDB.
850 * If the destination is broadcast or multicast,
851 * then forward the packet to vswitch.
852 * If a Hybrid resource avilable, then send the
853 * unicast packet via hybrid resource, otherwise
854 * forward it to vswitch.
855 */
856 READ_ENTER(&vnetp->vsw_fp_rw);
857
858 if ((is_unicast) && (vnetp->hio_fp != NULL)) {
859 vresp = vnetp->hio_fp;
860 hres = B_TRUE;
861 } else {
862 vresp = vnetp->vsw_fp;
863 hres = B_FALSE;
864 }
865 if (vresp == NULL) {
866 /*
867 * no fdb entry to vsw? drop the packet.
868 */
869 RW_EXIT(&vnetp->vsw_fp_rw);
870 freemsg(mp);
871 mp = next;
872 continue;
873 }
874
875 /* ref hold the fdb entry to vsw */
876 VNET_FDBE_REFHOLD(vresp);
877
878 RW_EXIT(&vnetp->vsw_fp_rw);
879
880 /*
881 * In the case of a hybrid resource we need to insert
882 * the tag for the pvid case here; unlike packets that
883 * are destined to a vnet/vsw in which case the vgen
884 * layer does the tagging before sending it over ldc.
885 */
886 if (hres == B_TRUE) {
887 /*
888 * Determine if the frame being transmitted
889 * over the hybrid resource is untagged. If so,
890 * insert the tag before transmitting.
891 */
892 if (is_pvid == B_TRUE &&
893 ehp->ether_type != htons(ETHERTYPE_VLAN)) {
894
895 mp = vnet_vlan_insert_tag(mp,
896 vnetp->pvid);
897 if (mp == NULL) {
898 VNET_FDBE_REFRELE(vresp);
899 mp = next;
900 continue;
901 }
902
903 }
904
905 macp = &vresp->macreg;
906 tx_arg = tx_ringp;
907 } else {
908 macp = &vresp->macreg;
909 tx_arg = macp->m_driver;
910 }
911 resid_mp = macp->m_callbacks->mc_tx(tx_arg, mp);
912
913 /* tx done; now release ref on fdb entry */
914 VNET_FDBE_REFRELE(vresp);
915
916 if (resid_mp != NULL) {
917 /* m_tx failed */
918 mp->b_next = next;
919 break;
920 }
921 }
922
923 statsp->obytes += size;
924 statsp->opackets++;
925 mp = next;
926 }
927
928 DBG1(vnetp, "exit\n");
929 return (mp);
930 }
931
932 /* get statistics from the device */
933 int
vnet_m_stat(void * arg,uint_t stat,uint64_t * val)934 vnet_m_stat(void *arg, uint_t stat, uint64_t *val)
935 {
936 vnet_t *vnetp = arg;
937 vnet_res_t *vresp;
938 mac_register_t *macp;
939 mac_callbacks_t *cbp;
940 uint64_t val_total = 0;
941
942 DBG1(vnetp, "enter\n");
943
944 /*
945 * get the specified statistic from each transport and return the
946 * aggregate val. This obviously only works for counters.
947 */
948 if ((IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat)) ||
949 (IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat))) {
950 return (ENOTSUP);
951 }
952
953 READ_ENTER(&vnetp->vrwlock);
954 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
955 macp = &vresp->macreg;
956 cbp = macp->m_callbacks;
957 if (cbp->mc_getstat(macp->m_driver, stat, val) == 0)
958 val_total += *val;
959 }
960 RW_EXIT(&vnetp->vrwlock);
961
962 *val = val_total;
963
964 DBG1(vnetp, "exit\n");
965 return (0);
966 }
967
968 static void
vnet_ring_grp_init(vnet_t * vnetp)969 vnet_ring_grp_init(vnet_t *vnetp)
970 {
971 vnet_pseudo_rx_group_t *rx_grp;
972 vnet_pseudo_rx_ring_t *rx_ringp;
973 vnet_pseudo_tx_group_t *tx_grp;
974 vnet_pseudo_tx_ring_t *tx_ringp;
975 int i;
976
977 tx_grp = &vnetp->tx_grp[0];
978 tx_ringp = kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t) *
979 VNET_NUM_PSEUDO_TXRINGS, KM_SLEEP);
980 for (i = 0; i < VNET_NUM_PSEUDO_TXRINGS; i++) {
981 tx_ringp[i].state |= VNET_TXRING_SHARED;
982 }
983 tx_grp->rings = tx_ringp;
984 tx_grp->ring_cnt = VNET_NUM_PSEUDO_TXRINGS;
985 mutex_init(&tx_grp->flowctl_lock, NULL, MUTEX_DRIVER, NULL);
986 cv_init(&tx_grp->flowctl_cv, NULL, CV_DRIVER, NULL);
987 tx_grp->flowctl_thread = thread_create(NULL, 0,
988 vnet_tx_notify_thread, tx_grp, 0, &p0, TS_RUN, minclsyspri);
989
990 rx_grp = &vnetp->rx_grp[0];
991 rx_grp->max_ring_cnt = MAX_RINGS_PER_GROUP;
992 rw_init(&rx_grp->lock, NULL, RW_DRIVER, NULL);
993 rx_ringp = kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t) *
994 rx_grp->max_ring_cnt, KM_SLEEP);
995
996 /*
997 * Setup the first 3 Pseudo RX Rings that are reserved;
998 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
999 */
1000 rx_ringp[0].state |= VNET_RXRING_INUSE|VNET_RXRING_LDC_SERVICE;
1001 rx_ringp[0].index = 0;
1002 rx_ringp[1].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1003 rx_ringp[1].index = 1;
1004 rx_ringp[2].state |= VNET_RXRING_INUSE|VNET_RXRING_HYBRID;
1005 rx_ringp[2].index = 2;
1006
1007 rx_grp->ring_cnt = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1008 rx_grp->rings = rx_ringp;
1009
1010 for (i = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1011 i < rx_grp->max_ring_cnt; i++) {
1012 rx_ringp = &rx_grp->rings[i];
1013 rx_ringp->state = VNET_RXRING_FREE;
1014 rx_ringp->index = i;
1015 }
1016 }
1017
1018 static void
vnet_ring_grp_uninit(vnet_t * vnetp)1019 vnet_ring_grp_uninit(vnet_t *vnetp)
1020 {
1021 vnet_pseudo_rx_group_t *rx_grp;
1022 vnet_pseudo_tx_group_t *tx_grp;
1023 kt_did_t tid = 0;
1024
1025 tx_grp = &vnetp->tx_grp[0];
1026
1027 /* Inform tx_notify_thread to exit */
1028 mutex_enter(&tx_grp->flowctl_lock);
1029 if (tx_grp->flowctl_thread != NULL) {
1030 tid = tx_grp->flowctl_thread->t_did;
1031 tx_grp->flowctl_done = B_TRUE;
1032 cv_signal(&tx_grp->flowctl_cv);
1033 }
1034 mutex_exit(&tx_grp->flowctl_lock);
1035 if (tid != 0)
1036 thread_join(tid);
1037
1038 if (tx_grp->rings != NULL) {
1039 ASSERT(tx_grp->ring_cnt == VNET_NUM_PSEUDO_TXRINGS);
1040 kmem_free(tx_grp->rings, sizeof (vnet_pseudo_tx_ring_t) *
1041 tx_grp->ring_cnt);
1042 tx_grp->rings = NULL;
1043 }
1044
1045 rx_grp = &vnetp->rx_grp[0];
1046 if (rx_grp->rings != NULL) {
1047 ASSERT(rx_grp->max_ring_cnt == MAX_RINGS_PER_GROUP);
1048 ASSERT(rx_grp->ring_cnt == VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1049 kmem_free(rx_grp->rings, sizeof (vnet_pseudo_rx_ring_t) *
1050 rx_grp->max_ring_cnt);
1051 rx_grp->rings = NULL;
1052 }
1053 }
1054
1055 static vnet_pseudo_rx_ring_t *
vnet_alloc_pseudo_rx_ring(vnet_t * vnetp)1056 vnet_alloc_pseudo_rx_ring(vnet_t *vnetp)
1057 {
1058 vnet_pseudo_rx_group_t *rx_grp;
1059 vnet_pseudo_rx_ring_t *rx_ringp;
1060 int index;
1061
1062 rx_grp = &vnetp->rx_grp[0];
1063 WRITE_ENTER(&rx_grp->lock);
1064
1065 if (rx_grp->ring_cnt == rx_grp->max_ring_cnt) {
1066 /* no rings available */
1067 RW_EXIT(&rx_grp->lock);
1068 return (NULL);
1069 }
1070
1071 for (index = VNET_NUM_PSEUDO_RXRINGS_DEFAULT;
1072 index < rx_grp->max_ring_cnt; index++) {
1073 rx_ringp = &rx_grp->rings[index];
1074 if (rx_ringp->state == VNET_RXRING_FREE) {
1075 rx_ringp->state |= VNET_RXRING_INUSE;
1076 rx_grp->ring_cnt++;
1077 break;
1078 }
1079 }
1080
1081 RW_EXIT(&rx_grp->lock);
1082 return (rx_ringp);
1083 }
1084
1085 static void
vnet_free_pseudo_rx_ring(vnet_t * vnetp,vnet_pseudo_rx_ring_t * ringp)1086 vnet_free_pseudo_rx_ring(vnet_t *vnetp, vnet_pseudo_rx_ring_t *ringp)
1087 {
1088 vnet_pseudo_rx_group_t *rx_grp;
1089
1090 ASSERT(ringp->index >= VNET_NUM_PSEUDO_RXRINGS_DEFAULT);
1091 rx_grp = &vnetp->rx_grp[0];
1092 WRITE_ENTER(&rx_grp->lock);
1093
1094 if (ringp->state != VNET_RXRING_FREE) {
1095 ringp->state = VNET_RXRING_FREE;
1096 ringp->handle = NULL;
1097 rx_grp->ring_cnt--;
1098 }
1099
1100 RW_EXIT(&rx_grp->lock);
1101 }
1102
1103 /* wrapper function for mac_register() */
1104 static int
vnet_mac_register(vnet_t * vnetp)1105 vnet_mac_register(vnet_t *vnetp)
1106 {
1107 mac_register_t *macp;
1108 int err;
1109
1110 if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1111 return (DDI_FAILURE);
1112 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1113 macp->m_driver = vnetp;
1114 macp->m_dip = vnetp->dip;
1115 macp->m_src_addr = vnetp->curr_macaddr;
1116 macp->m_callbacks = &vnet_m_callbacks;
1117 macp->m_min_sdu = 0;
1118 macp->m_max_sdu = vnetp->mtu;
1119 macp->m_margin = VLAN_TAGSZ;
1120
1121 macp->m_v12n = MAC_VIRT_LEVEL1;
1122
1123 /*
1124 * Finally, we're ready to register ourselves with the MAC layer
1125 * interface; if this succeeds, we're all ready to start()
1126 */
1127 err = mac_register(macp, &vnetp->mh);
1128 mac_free(macp);
1129 return (err == 0 ? DDI_SUCCESS : DDI_FAILURE);
1130 }
1131
1132 /* read the mac address of the device */
1133 static int
vnet_read_mac_address(vnet_t * vnetp)1134 vnet_read_mac_address(vnet_t *vnetp)
1135 {
1136 uchar_t *macaddr;
1137 uint32_t size;
1138 int rv;
1139
1140 rv = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, vnetp->dip,
1141 DDI_PROP_DONTPASS, macaddr_propname, &macaddr, &size);
1142 if ((rv != DDI_PROP_SUCCESS) || (size != ETHERADDRL)) {
1143 DWARN(vnetp, "prop_lookup failed(%s) err(%d)\n",
1144 macaddr_propname, rv);
1145 return (DDI_FAILURE);
1146 }
1147 bcopy(macaddr, (caddr_t)vnetp->vendor_addr, ETHERADDRL);
1148 bcopy(macaddr, (caddr_t)vnetp->curr_macaddr, ETHERADDRL);
1149 ddi_prop_free(macaddr);
1150
1151 return (DDI_SUCCESS);
1152 }
1153
1154 static void
vnet_fdb_create(vnet_t * vnetp)1155 vnet_fdb_create(vnet_t *vnetp)
1156 {
1157 char hashname[MAXNAMELEN];
1158
1159 (void) snprintf(hashname, MAXNAMELEN, "vnet%d-fdbhash",
1160 vnetp->instance);
1161 vnetp->fdb_nchains = vnet_fdb_nchains;
1162 vnetp->fdb_hashp = mod_hash_create_ptrhash(hashname, vnetp->fdb_nchains,
1163 mod_hash_null_valdtor, sizeof (void *));
1164 }
1165
1166 static void
vnet_fdb_destroy(vnet_t * vnetp)1167 vnet_fdb_destroy(vnet_t *vnetp)
1168 {
1169 /* destroy fdb-hash-table */
1170 if (vnetp->fdb_hashp != NULL) {
1171 mod_hash_destroy_hash(vnetp->fdb_hashp);
1172 vnetp->fdb_hashp = NULL;
1173 vnetp->fdb_nchains = 0;
1174 }
1175 }
1176
1177 /*
1178 * Add an entry into the fdb.
1179 */
1180 void
vnet_fdbe_add(vnet_t * vnetp,vnet_res_t * vresp)1181 vnet_fdbe_add(vnet_t *vnetp, vnet_res_t *vresp)
1182 {
1183 uint64_t addr = 0;
1184 int rv;
1185
1186 KEY_HASH(addr, vresp->rem_macaddr);
1187
1188 /*
1189 * If the entry being added corresponds to LDC_SERVICE resource,
1190 * that is, vswitch connection, it is added to the hash and also
1191 * the entry is cached, an additional reference count reflects
1192 * this. The HYBRID resource is not added to the hash, but only
1193 * cached, as it is only used for sending out packets for unknown
1194 * unicast destinations.
1195 */
1196 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1197 (vresp->refcnt = 1) : (vresp->refcnt = 0);
1198
1199 /*
1200 * Note: duplicate keys will be rejected by mod_hash.
1201 */
1202 if (vresp->type != VIO_NET_RES_HYBRID) {
1203 rv = mod_hash_insert(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1204 (mod_hash_val_t)vresp);
1205 if (rv != 0) {
1206 DWARN(vnetp, "Duplicate macaddr key(%lx)\n", addr);
1207 return;
1208 }
1209 }
1210
1211 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1212 /* Cache the fdb entry to vsw-port */
1213 WRITE_ENTER(&vnetp->vsw_fp_rw);
1214 if (vnetp->vsw_fp == NULL)
1215 vnetp->vsw_fp = vresp;
1216 RW_EXIT(&vnetp->vsw_fp_rw);
1217 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1218 /* Cache the fdb entry to hybrid resource */
1219 WRITE_ENTER(&vnetp->vsw_fp_rw);
1220 if (vnetp->hio_fp == NULL)
1221 vnetp->hio_fp = vresp;
1222 RW_EXIT(&vnetp->vsw_fp_rw);
1223 }
1224 }
1225
1226 /*
1227 * Remove an entry from fdb.
1228 */
1229 static void
vnet_fdbe_del(vnet_t * vnetp,vnet_res_t * vresp)1230 vnet_fdbe_del(vnet_t *vnetp, vnet_res_t *vresp)
1231 {
1232 uint64_t addr = 0;
1233 int rv;
1234 uint32_t refcnt;
1235 vnet_res_t *tmp;
1236
1237 KEY_HASH(addr, vresp->rem_macaddr);
1238
1239 /*
1240 * Remove the entry from fdb hash table.
1241 * This prevents further references to this fdb entry.
1242 */
1243 if (vresp->type != VIO_NET_RES_HYBRID) {
1244 rv = mod_hash_remove(vnetp->fdb_hashp, (mod_hash_key_t)addr,
1245 (mod_hash_val_t *)&tmp);
1246 if (rv != 0) {
1247 /*
1248 * As the resources are added to the hash only
1249 * after they are started, this can occur if
1250 * a resource unregisters before it is ever started.
1251 */
1252 return;
1253 }
1254 }
1255
1256 if (vresp->type == VIO_NET_RES_LDC_SERVICE) {
1257 WRITE_ENTER(&vnetp->vsw_fp_rw);
1258
1259 ASSERT(tmp == vnetp->vsw_fp);
1260 vnetp->vsw_fp = NULL;
1261
1262 RW_EXIT(&vnetp->vsw_fp_rw);
1263 } else if (vresp->type == VIO_NET_RES_HYBRID) {
1264 WRITE_ENTER(&vnetp->vsw_fp_rw);
1265
1266 vnetp->hio_fp = NULL;
1267
1268 RW_EXIT(&vnetp->vsw_fp_rw);
1269 }
1270
1271 /*
1272 * If there are threads already ref holding before the entry was
1273 * removed from hash table, then wait for ref count to drop to zero.
1274 */
1275 (vresp->type == VIO_NET_RES_LDC_SERVICE) ?
1276 (refcnt = 1) : (refcnt = 0);
1277 while (vresp->refcnt > refcnt) {
1278 delay(drv_usectohz(vnet_fdbe_refcnt_delay));
1279 }
1280 }
1281
1282 /*
1283 * Search fdb for a given mac address. If an entry is found, hold
1284 * a reference to it and return the entry; else returns NULL.
1285 */
1286 static vnet_res_t *
vnet_fdbe_find(vnet_t * vnetp,struct ether_addr * addrp)1287 vnet_fdbe_find(vnet_t *vnetp, struct ether_addr *addrp)
1288 {
1289 uint64_t key = 0;
1290 vnet_res_t *vresp;
1291 int rv;
1292
1293 KEY_HASH(key, addrp->ether_addr_octet);
1294
1295 rv = mod_hash_find_cb(vnetp->fdb_hashp, (mod_hash_key_t)key,
1296 (mod_hash_val_t *)&vresp, vnet_fdbe_find_cb);
1297
1298 if (rv != 0)
1299 return (NULL);
1300
1301 return (vresp);
1302 }
1303
1304 /*
1305 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1306 * entry corresponding to the key (macaddr), this callback will be invoked by
1307 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1308 * entry before returning the found entry.
1309 */
1310 static void
vnet_fdbe_find_cb(mod_hash_key_t key,mod_hash_val_t val)1311 vnet_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1312 {
1313 _NOTE(ARGUNUSED(key))
1314 VNET_FDBE_REFHOLD((vnet_res_t *)val);
1315 }
1316
1317 /*
1318 * Frames received that are tagged with the pvid of the vnet device must be
1319 * untagged before sending up the stack. This function walks the chain of rx
1320 * frames, untags any such frames and returns the updated chain.
1321 *
1322 * Arguments:
1323 * pvid: pvid of the vnet device for which packets are being received
1324 * mp: head of pkt chain to be validated and untagged
1325 *
1326 * Returns:
1327 * mp: head of updated chain of packets
1328 */
1329 static void
vnet_rx_frames_untag(uint16_t pvid,mblk_t ** mp)1330 vnet_rx_frames_untag(uint16_t pvid, mblk_t **mp)
1331 {
1332 struct ether_vlan_header *evhp;
1333 mblk_t *bp;
1334 mblk_t *bpt;
1335 mblk_t *bph;
1336 mblk_t *bpn;
1337
1338 bpn = bph = bpt = NULL;
1339
1340 for (bp = *mp; bp != NULL; bp = bpn) {
1341
1342 bpn = bp->b_next;
1343 bp->b_next = bp->b_prev = NULL;
1344
1345 evhp = (struct ether_vlan_header *)bp->b_rptr;
1346
1347 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN &&
1348 VLAN_ID(ntohs(evhp->ether_tci)) == pvid) {
1349
1350 bp = vnet_vlan_remove_tag(bp);
1351 if (bp == NULL) {
1352 continue;
1353 }
1354
1355 }
1356
1357 /* build a chain of processed packets */
1358 if (bph == NULL) {
1359 bph = bpt = bp;
1360 } else {
1361 bpt->b_next = bp;
1362 bpt = bp;
1363 }
1364
1365 }
1366
1367 *mp = bph;
1368 }
1369
1370 static void
vnet_rx(vio_net_handle_t vrh,mblk_t * mp)1371 vnet_rx(vio_net_handle_t vrh, mblk_t *mp)
1372 {
1373 vnet_res_t *vresp = (vnet_res_t *)vrh;
1374 vnet_t *vnetp = vresp->vnetp;
1375 vnet_pseudo_rx_ring_t *ringp;
1376
1377 if ((vnetp == NULL) || (vnetp->mh == 0)) {
1378 freemsgchain(mp);
1379 return;
1380 }
1381
1382 ringp = vresp->rx_ringp;
1383 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
1384 }
1385
1386 void
vnet_tx_update(vio_net_handle_t vrh)1387 vnet_tx_update(vio_net_handle_t vrh)
1388 {
1389 vnet_res_t *vresp = (vnet_res_t *)vrh;
1390 vnet_t *vnetp = vresp->vnetp;
1391 vnet_pseudo_tx_ring_t *tx_ringp;
1392 vnet_pseudo_tx_group_t *tx_grp;
1393 int i;
1394
1395 if (vnetp == NULL || vnetp->mh == NULL) {
1396 return;
1397 }
1398
1399 /*
1400 * Currently, the tx hwring API (used to access rings that belong to
1401 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1402 * update; also the pseudo rings are shared by the ports/ldcs in the
1403 * vgen layer. Thus we can't figure out which pseudo ring is being
1404 * re-enabled for transmits. To work around this, when we get a tx
1405 * restart notification from below, we simply propagate that to all
1406 * the tx pseudo rings registered with the mac layer above.
1407 *
1408 * There are a couple of side effects with this approach, but they are
1409 * not harmful, as outlined below:
1410 *
1411 * A) We might send an invalid ring_update() for a ring that is not
1412 * really flow controlled. This will not have any effect in the mac
1413 * layer and packets will continue to be transmitted on that ring.
1414 *
1415 * B) We might end up clearing the flow control in the mac layer for
1416 * a ring that is still flow controlled in the underlying resource.
1417 * This will result in the mac layer restarting transmit, only to be
1418 * flow controlled again on that ring.
1419 */
1420 tx_grp = &vnetp->tx_grp[0];
1421 for (i = 0; i < tx_grp->ring_cnt; i++) {
1422 tx_ringp = &tx_grp->rings[i];
1423 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1424 }
1425 }
1426
1427 /*
1428 * vnet_tx_notify_thread:
1429 *
1430 * vnet_tx_ring_update() callback function wakes up this thread when
1431 * it gets called. This thread will call mac_tx_ring_update() to
1432 * notify upper mac of flow control getting relieved. Note that
1433 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1434 * because vnet_tx_ring_update() is called from lower mac with
1435 * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1436 * the same lock.
1437 */
1438 static void
vnet_tx_notify_thread(void * arg)1439 vnet_tx_notify_thread(void *arg)
1440 {
1441 callb_cpr_t cprinfo;
1442 vnet_pseudo_tx_group_t *tx_grp = (vnet_pseudo_tx_group_t *)arg;
1443 vnet_pseudo_tx_ring_t *tx_ringp;
1444 vnet_t *vnetp;
1445 int i;
1446
1447 CALLB_CPR_INIT(&cprinfo, &tx_grp->flowctl_lock, callb_generic_cpr,
1448 "vnet_tx_notify_thread");
1449
1450 mutex_enter(&tx_grp->flowctl_lock);
1451 while (!tx_grp->flowctl_done) {
1452 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1453 cv_wait(&tx_grp->flowctl_cv, &tx_grp->flowctl_lock);
1454 CALLB_CPR_SAFE_END(&cprinfo, &tx_grp->flowctl_lock);
1455
1456 for (i = 0; i < tx_grp->ring_cnt; i++) {
1457 tx_ringp = &tx_grp->rings[i];
1458 if (tx_ringp->woken_up) {
1459 tx_ringp->woken_up = B_FALSE;
1460 vnetp = tx_ringp->vnetp;
1461 mac_tx_ring_update(vnetp->mh, tx_ringp->handle);
1462 }
1463 }
1464 }
1465 /*
1466 * The tx_grp is being destroyed, exit the thread.
1467 */
1468 tx_grp->flowctl_thread = NULL;
1469 CALLB_CPR_EXIT(&cprinfo);
1470 thread_exit();
1471 }
1472
1473 void
vnet_tx_ring_update(void * arg1,uintptr_t arg2)1474 vnet_tx_ring_update(void *arg1, uintptr_t arg2)
1475 {
1476 vnet_t *vnetp = (vnet_t *)arg1;
1477 vnet_pseudo_tx_group_t *tx_grp;
1478 vnet_pseudo_tx_ring_t *tx_ringp;
1479 int i;
1480
1481 tx_grp = &vnetp->tx_grp[0];
1482 for (i = 0; i < tx_grp->ring_cnt; i++) {
1483 tx_ringp = &tx_grp->rings[i];
1484 if (tx_ringp->hw_rh == (mac_ring_handle_t)arg2) {
1485 mutex_enter(&tx_grp->flowctl_lock);
1486 tx_ringp->woken_up = B_TRUE;
1487 cv_signal(&tx_grp->flowctl_cv);
1488 mutex_exit(&tx_grp->flowctl_lock);
1489 break;
1490 }
1491 }
1492 }
1493
1494 /*
1495 * Update the new mtu of vnet into the mac layer. First check if the device has
1496 * been plumbed and if so fail the mtu update. Returns 0 on success.
1497 */
1498 int
vnet_mtu_update(vnet_t * vnetp,uint32_t mtu)1499 vnet_mtu_update(vnet_t *vnetp, uint32_t mtu)
1500 {
1501 int rv;
1502
1503 if (vnetp == NULL || vnetp->mh == NULL) {
1504 return (EINVAL);
1505 }
1506
1507 WRITE_ENTER(&vnetp->vrwlock);
1508
1509 if (vnetp->flags & VNET_STARTED) {
1510 RW_EXIT(&vnetp->vrwlock);
1511 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu "
1512 "update as the device is plumbed\n",
1513 vnetp->instance);
1514 return (EBUSY);
1515 }
1516
1517 /* update mtu in the mac layer */
1518 rv = mac_maxsdu_update(vnetp->mh, mtu);
1519 if (rv != 0) {
1520 RW_EXIT(&vnetp->vrwlock);
1521 cmn_err(CE_NOTE,
1522 "!vnet%d: Unable to update mtu with mac layer\n",
1523 vnetp->instance);
1524 return (EIO);
1525 }
1526
1527 vnetp->mtu = mtu;
1528
1529 RW_EXIT(&vnetp->vrwlock);
1530
1531 return (0);
1532 }
1533
1534 /*
1535 * Update the link state of vnet to the mac layer.
1536 */
1537 void
vnet_link_update(vnet_t * vnetp,link_state_t link_state)1538 vnet_link_update(vnet_t *vnetp, link_state_t link_state)
1539 {
1540 if (vnetp == NULL || vnetp->mh == NULL) {
1541 return;
1542 }
1543
1544 WRITE_ENTER(&vnetp->vrwlock);
1545 if (vnetp->link_state == link_state) {
1546 RW_EXIT(&vnetp->vrwlock);
1547 return;
1548 }
1549 vnetp->link_state = link_state;
1550 RW_EXIT(&vnetp->vrwlock);
1551
1552 mac_link_update(vnetp->mh, link_state);
1553 }
1554
1555 /*
1556 * vio_net_resource_reg -- An interface called to register a resource
1557 * with vnet.
1558 * macp -- a GLDv3 mac_register that has all the details of
1559 * a resource and its callbacks etc.
1560 * type -- resource type.
1561 * local_macaddr -- resource's MAC address. This is used to
1562 * associate a resource with a corresponding vnet.
1563 * remote_macaddr -- remote side MAC address. This is ignored for
1564 * the Hybrid resources.
1565 * vhp -- A handle returned to the caller.
1566 * vcb -- A set of callbacks provided to the callers.
1567 */
vio_net_resource_reg(mac_register_t * macp,vio_net_res_type_t type,ether_addr_t local_macaddr,ether_addr_t rem_macaddr,vio_net_handle_t * vhp,vio_net_callbacks_t * vcb)1568 int vio_net_resource_reg(mac_register_t *macp, vio_net_res_type_t type,
1569 ether_addr_t local_macaddr, ether_addr_t rem_macaddr, vio_net_handle_t *vhp,
1570 vio_net_callbacks_t *vcb)
1571 {
1572 vnet_t *vnetp;
1573 vnet_res_t *vresp;
1574
1575 vresp = kmem_zalloc(sizeof (vnet_res_t), KM_SLEEP);
1576 ether_copy(local_macaddr, vresp->local_macaddr);
1577 ether_copy(rem_macaddr, vresp->rem_macaddr);
1578 vresp->type = type;
1579 bcopy(macp, &vresp->macreg, sizeof (mac_register_t));
1580
1581 DBG1(NULL, "Resource Registerig type=0%X\n", type);
1582
1583 READ_ENTER(&vnet_rw);
1584 vnetp = vnet_headp;
1585 while (vnetp != NULL) {
1586 if (VNET_MATCH_RES(vresp, vnetp)) {
1587 vresp->vnetp = vnetp;
1588
1589 /* Setup kstats for hio resource */
1590 if (vresp->type == VIO_NET_RES_HYBRID) {
1591 vresp->ksp = vnet_hio_setup_kstats(DRV_NAME,
1592 "hio", vresp);
1593 if (vresp->ksp == NULL) {
1594 cmn_err(CE_NOTE, "!vnet%d: Cannot "
1595 "create kstats for hio resource",
1596 vnetp->instance);
1597 }
1598 }
1599 vnet_add_resource(vnetp, vresp);
1600 break;
1601 }
1602 vnetp = vnetp->nextp;
1603 }
1604 RW_EXIT(&vnet_rw);
1605 if (vresp->vnetp == NULL) {
1606 DWARN(NULL, "No vnet instance");
1607 kmem_free(vresp, sizeof (vnet_res_t));
1608 return (ENXIO);
1609 }
1610
1611 *vhp = vresp;
1612 vcb->vio_net_rx_cb = vnet_rx;
1613 vcb->vio_net_tx_update = vnet_tx_update;
1614 vcb->vio_net_report_err = vnet_handle_res_err;
1615
1616 /* Bind the resource to pseudo ring(s) */
1617 if (vnet_bind_rings(vresp) != 0) {
1618 (void) vnet_rem_resource(vnetp, vresp);
1619 vnet_hio_destroy_kstats(vresp->ksp);
1620 KMEM_FREE(vresp);
1621 return (1);
1622 }
1623
1624 /* Dispatch a task to start resources */
1625 vnet_dispatch_res_task(vnetp);
1626 return (0);
1627 }
1628
1629 /*
1630 * vio_net_resource_unreg -- An interface to unregister a resource.
1631 */
1632 void
vio_net_resource_unreg(vio_net_handle_t vhp)1633 vio_net_resource_unreg(vio_net_handle_t vhp)
1634 {
1635 vnet_res_t *vresp = (vnet_res_t *)vhp;
1636 vnet_t *vnetp = vresp->vnetp;
1637
1638 DBG1(NULL, "Resource Registerig hdl=0x%p", vhp);
1639
1640 ASSERT(vnetp != NULL);
1641 /*
1642 * Remove the resource from fdb; this ensures
1643 * there are no references to the resource.
1644 */
1645 vnet_fdbe_del(vnetp, vresp);
1646
1647 vnet_unbind_rings(vresp);
1648
1649 /* Now remove the resource from the list */
1650 (void) vnet_rem_resource(vnetp, vresp);
1651
1652 vnet_hio_destroy_kstats(vresp->ksp);
1653 KMEM_FREE(vresp);
1654 }
1655
1656 static void
vnet_add_resource(vnet_t * vnetp,vnet_res_t * vresp)1657 vnet_add_resource(vnet_t *vnetp, vnet_res_t *vresp)
1658 {
1659 WRITE_ENTER(&vnetp->vrwlock);
1660 vresp->nextp = vnetp->vres_list;
1661 vnetp->vres_list = vresp;
1662 RW_EXIT(&vnetp->vrwlock);
1663 }
1664
1665 static vnet_res_t *
vnet_rem_resource(vnet_t * vnetp,vnet_res_t * vresp)1666 vnet_rem_resource(vnet_t *vnetp, vnet_res_t *vresp)
1667 {
1668 vnet_res_t *vrp;
1669
1670 WRITE_ENTER(&vnetp->vrwlock);
1671 if (vresp == vnetp->vres_list) {
1672 vnetp->vres_list = vresp->nextp;
1673 } else {
1674 vrp = vnetp->vres_list;
1675 while (vrp->nextp != NULL) {
1676 if (vrp->nextp == vresp) {
1677 vrp->nextp = vresp->nextp;
1678 break;
1679 }
1680 vrp = vrp->nextp;
1681 }
1682 }
1683 vresp->vnetp = NULL;
1684 vresp->nextp = NULL;
1685
1686 RW_EXIT(&vnetp->vrwlock);
1687
1688 return (vresp);
1689 }
1690
1691 /*
1692 * vnet_dds_rx -- an interface called by vgen to DDS messages.
1693 */
1694 void
vnet_dds_rx(void * arg,void * dmsg)1695 vnet_dds_rx(void *arg, void *dmsg)
1696 {
1697 vnet_t *vnetp = arg;
1698 vdds_process_dds_msg(vnetp, dmsg);
1699 }
1700
1701 /*
1702 * vnet_send_dds_msg -- An interface provided to DDS to send
1703 * DDS messages. This simply sends meessages via vgen.
1704 */
1705 int
vnet_send_dds_msg(vnet_t * vnetp,void * dmsg)1706 vnet_send_dds_msg(vnet_t *vnetp, void *dmsg)
1707 {
1708 int rv;
1709
1710 if (vnetp->vgenhdl != NULL) {
1711 rv = vgen_dds_tx(vnetp->vgenhdl, dmsg);
1712 }
1713 return (rv);
1714 }
1715
1716 /*
1717 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1718 */
1719 void
vnet_dds_cleanup_hio(vnet_t * vnetp)1720 vnet_dds_cleanup_hio(vnet_t *vnetp)
1721 {
1722 vdds_cleanup_hio(vnetp);
1723 }
1724
1725 /*
1726 * vnet_handle_res_err -- A callback function called by a resource
1727 * to report an error. For example, vgen can call to report
1728 * an LDC down/reset event. This will trigger cleanup of associated
1729 * Hybrid resource.
1730 */
1731 /* ARGSUSED */
1732 static void
vnet_handle_res_err(vio_net_handle_t vrh,vio_net_err_val_t err)1733 vnet_handle_res_err(vio_net_handle_t vrh, vio_net_err_val_t err)
1734 {
1735 vnet_res_t *vresp = (vnet_res_t *)vrh;
1736 vnet_t *vnetp = vresp->vnetp;
1737
1738 if (vnetp == NULL) {
1739 return;
1740 }
1741 if ((vresp->type != VIO_NET_RES_LDC_SERVICE) &&
1742 (vresp->type != VIO_NET_RES_HYBRID)) {
1743 return;
1744 }
1745
1746 vdds_cleanup_hio(vnetp);
1747 }
1748
1749 /*
1750 * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1751 */
1752 static void
vnet_dispatch_res_task(vnet_t * vnetp)1753 vnet_dispatch_res_task(vnet_t *vnetp)
1754 {
1755 int rv;
1756
1757 /*
1758 * Dispatch the task. It could be the case that vnetp->flags does
1759 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1760 * can abort the task when the task is started. See related comments
1761 * in vnet_m_stop() and vnet_stop_resources().
1762 */
1763 rv = ddi_taskq_dispatch(vnetp->taskqp, vnet_res_start_task,
1764 vnetp, DDI_NOSLEEP);
1765 if (rv != DDI_SUCCESS) {
1766 cmn_err(CE_WARN,
1767 "vnet%d:Can't dispatch start resource task",
1768 vnetp->instance);
1769 }
1770 }
1771
1772 /*
1773 * vnet_res_start_task -- A taskq callback function that starts a resource.
1774 */
1775 static void
vnet_res_start_task(void * arg)1776 vnet_res_start_task(void *arg)
1777 {
1778 vnet_t *vnetp = arg;
1779
1780 WRITE_ENTER(&vnetp->vrwlock);
1781 if (vnetp->flags & VNET_STARTED) {
1782 vnet_start_resources(vnetp);
1783 }
1784 RW_EXIT(&vnetp->vrwlock);
1785 }
1786
1787 /*
1788 * vnet_start_resources -- starts all resources associated with
1789 * a vnet.
1790 */
1791 static void
vnet_start_resources(vnet_t * vnetp)1792 vnet_start_resources(vnet_t *vnetp)
1793 {
1794 mac_register_t *macp;
1795 mac_callbacks_t *cbp;
1796 vnet_res_t *vresp;
1797 int rv;
1798
1799 DBG1(vnetp, "enter\n");
1800
1801 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1802
1803 for (vresp = vnetp->vres_list; vresp != NULL; vresp = vresp->nextp) {
1804 /* skip if it is already started */
1805 if (vresp->flags & VNET_STARTED) {
1806 continue;
1807 }
1808 macp = &vresp->macreg;
1809 cbp = macp->m_callbacks;
1810 rv = cbp->mc_start(macp->m_driver);
1811 if (rv == 0) {
1812 /*
1813 * Successfully started the resource, so now
1814 * add it to the fdb.
1815 */
1816 vresp->flags |= VNET_STARTED;
1817 vnet_fdbe_add(vnetp, vresp);
1818 }
1819 }
1820
1821 DBG1(vnetp, "exit\n");
1822
1823 }
1824
1825 /*
1826 * vnet_stop_resources -- stop all resources associated with a vnet.
1827 */
1828 static void
vnet_stop_resources(vnet_t * vnetp)1829 vnet_stop_resources(vnet_t *vnetp)
1830 {
1831 vnet_res_t *vresp;
1832 mac_register_t *macp;
1833 mac_callbacks_t *cbp;
1834
1835 DBG1(vnetp, "enter\n");
1836
1837 ASSERT(RW_WRITE_HELD(&vnetp->vrwlock));
1838
1839 for (vresp = vnetp->vres_list; vresp != NULL; ) {
1840 if (vresp->flags & VNET_STARTED) {
1841 /*
1842 * Release the lock while invoking mc_stop() of the
1843 * underlying resource. We hold a reference to this
1844 * resource to prevent being removed from the list in
1845 * vio_net_resource_unreg(). Note that new resources
1846 * can be added to the head of the list while the lock
1847 * is released, but they won't be started, as
1848 * VNET_STARTED flag has been cleared for the vnet
1849 * device in vnet_m_stop(). Also, while the lock is
1850 * released a resource could be removed from the list
1851 * in vio_net_resource_unreg(); but that is ok, as we
1852 * re-acquire the lock and only then access the forward
1853 * link (vresp->nextp) to continue with the next
1854 * resource.
1855 */
1856 vresp->flags &= ~VNET_STARTED;
1857 vresp->flags |= VNET_STOPPING;
1858 macp = &vresp->macreg;
1859 cbp = macp->m_callbacks;
1860 VNET_FDBE_REFHOLD(vresp);
1861 RW_EXIT(&vnetp->vrwlock);
1862
1863 cbp->mc_stop(macp->m_driver);
1864
1865 WRITE_ENTER(&vnetp->vrwlock);
1866 vresp->flags &= ~VNET_STOPPING;
1867 VNET_FDBE_REFRELE(vresp);
1868 }
1869 vresp = vresp->nextp;
1870 }
1871 DBG1(vnetp, "exit\n");
1872 }
1873
1874 /*
1875 * Setup kstats for the HIO statistics.
1876 * NOTE: the synchronization for the statistics is the
1877 * responsibility of the caller.
1878 */
1879 kstat_t *
vnet_hio_setup_kstats(char * ks_mod,char * ks_name,vnet_res_t * vresp)1880 vnet_hio_setup_kstats(char *ks_mod, char *ks_name, vnet_res_t *vresp)
1881 {
1882 kstat_t *ksp;
1883 vnet_t *vnetp = vresp->vnetp;
1884 vnet_hio_kstats_t *hiokp;
1885 size_t size;
1886
1887 ASSERT(vnetp != NULL);
1888 size = sizeof (vnet_hio_kstats_t) / sizeof (kstat_named_t);
1889 ksp = kstat_create(ks_mod, vnetp->instance, ks_name, "net",
1890 KSTAT_TYPE_NAMED, size, 0);
1891 if (ksp == NULL) {
1892 return (NULL);
1893 }
1894
1895 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1896 kstat_named_init(&hiokp->ipackets, "ipackets",
1897 KSTAT_DATA_ULONG);
1898 kstat_named_init(&hiokp->ierrors, "ierrors",
1899 KSTAT_DATA_ULONG);
1900 kstat_named_init(&hiokp->opackets, "opackets",
1901 KSTAT_DATA_ULONG);
1902 kstat_named_init(&hiokp->oerrors, "oerrors",
1903 KSTAT_DATA_ULONG);
1904
1905
1906 /* MIB II kstat variables */
1907 kstat_named_init(&hiokp->rbytes, "rbytes",
1908 KSTAT_DATA_ULONG);
1909 kstat_named_init(&hiokp->obytes, "obytes",
1910 KSTAT_DATA_ULONG);
1911 kstat_named_init(&hiokp->multircv, "multircv",
1912 KSTAT_DATA_ULONG);
1913 kstat_named_init(&hiokp->multixmt, "multixmt",
1914 KSTAT_DATA_ULONG);
1915 kstat_named_init(&hiokp->brdcstrcv, "brdcstrcv",
1916 KSTAT_DATA_ULONG);
1917 kstat_named_init(&hiokp->brdcstxmt, "brdcstxmt",
1918 KSTAT_DATA_ULONG);
1919 kstat_named_init(&hiokp->norcvbuf, "norcvbuf",
1920 KSTAT_DATA_ULONG);
1921 kstat_named_init(&hiokp->noxmtbuf, "noxmtbuf",
1922 KSTAT_DATA_ULONG);
1923
1924 ksp->ks_update = vnet_hio_update_kstats;
1925 ksp->ks_private = (void *)vresp;
1926 kstat_install(ksp);
1927 return (ksp);
1928 }
1929
1930 /*
1931 * Destroy kstats.
1932 */
1933 static void
vnet_hio_destroy_kstats(kstat_t * ksp)1934 vnet_hio_destroy_kstats(kstat_t *ksp)
1935 {
1936 if (ksp != NULL)
1937 kstat_delete(ksp);
1938 }
1939
1940 /*
1941 * Update the kstats.
1942 */
1943 static int
vnet_hio_update_kstats(kstat_t * ksp,int rw)1944 vnet_hio_update_kstats(kstat_t *ksp, int rw)
1945 {
1946 vnet_t *vnetp;
1947 vnet_res_t *vresp;
1948 vnet_hio_stats_t statsp;
1949 vnet_hio_kstats_t *hiokp;
1950
1951 vresp = (vnet_res_t *)ksp->ks_private;
1952 vnetp = vresp->vnetp;
1953
1954 bzero(&statsp, sizeof (vnet_hio_stats_t));
1955
1956 READ_ENTER(&vnetp->vsw_fp_rw);
1957 if (vnetp->hio_fp == NULL) {
1958 /* not using hio resources, just return */
1959 RW_EXIT(&vnetp->vsw_fp_rw);
1960 return (0);
1961 }
1962 VNET_FDBE_REFHOLD(vnetp->hio_fp);
1963 RW_EXIT(&vnetp->vsw_fp_rw);
1964 vnet_hio_get_stats(vnetp->hio_fp, &statsp);
1965 VNET_FDBE_REFRELE(vnetp->hio_fp);
1966
1967 hiokp = (vnet_hio_kstats_t *)ksp->ks_data;
1968
1969 if (rw == KSTAT_READ) {
1970 /* Link Input/Output stats */
1971 hiokp->ipackets.value.ul = (uint32_t)statsp.ipackets;
1972 hiokp->ipackets64.value.ull = statsp.ipackets;
1973 hiokp->ierrors.value.ul = statsp.ierrors;
1974 hiokp->opackets.value.ul = (uint32_t)statsp.opackets;
1975 hiokp->opackets64.value.ull = statsp.opackets;
1976 hiokp->oerrors.value.ul = statsp.oerrors;
1977
1978 /* MIB II kstat variables */
1979 hiokp->rbytes.value.ul = (uint32_t)statsp.rbytes;
1980 hiokp->rbytes64.value.ull = statsp.rbytes;
1981 hiokp->obytes.value.ul = (uint32_t)statsp.obytes;
1982 hiokp->obytes64.value.ull = statsp.obytes;
1983 hiokp->multircv.value.ul = statsp.multircv;
1984 hiokp->multixmt.value.ul = statsp.multixmt;
1985 hiokp->brdcstrcv.value.ul = statsp.brdcstrcv;
1986 hiokp->brdcstxmt.value.ul = statsp.brdcstxmt;
1987 hiokp->norcvbuf.value.ul = statsp.norcvbuf;
1988 hiokp->noxmtbuf.value.ul = statsp.noxmtbuf;
1989 } else {
1990 return (EACCES);
1991 }
1992
1993 return (0);
1994 }
1995
1996 static void
vnet_hio_get_stats(vnet_res_t * vresp,vnet_hio_stats_t * statsp)1997 vnet_hio_get_stats(vnet_res_t *vresp, vnet_hio_stats_t *statsp)
1998 {
1999 mac_register_t *macp;
2000 mac_callbacks_t *cbp;
2001 uint64_t val;
2002 int stat;
2003
2004 /*
2005 * get the specified statistics from the underlying nxge.
2006 */
2007 macp = &vresp->macreg;
2008 cbp = macp->m_callbacks;
2009 for (stat = MAC_STAT_MIN; stat < MAC_STAT_OVERFLOWS; stat++) {
2010 if (cbp->mc_getstat(macp->m_driver, stat, &val) == 0) {
2011 switch (stat) {
2012 case MAC_STAT_IPACKETS:
2013 statsp->ipackets = val;
2014 break;
2015
2016 case MAC_STAT_IERRORS:
2017 statsp->ierrors = val;
2018 break;
2019
2020 case MAC_STAT_OPACKETS:
2021 statsp->opackets = val;
2022 break;
2023
2024 case MAC_STAT_OERRORS:
2025 statsp->oerrors = val;
2026 break;
2027
2028 case MAC_STAT_RBYTES:
2029 statsp->rbytes = val;
2030 break;
2031
2032 case MAC_STAT_OBYTES:
2033 statsp->obytes = val;
2034 break;
2035
2036 case MAC_STAT_MULTIRCV:
2037 statsp->multircv = val;
2038 break;
2039
2040 case MAC_STAT_MULTIXMT:
2041 statsp->multixmt = val;
2042 break;
2043
2044 case MAC_STAT_BRDCSTRCV:
2045 statsp->brdcstrcv = val;
2046 break;
2047
2048 case MAC_STAT_BRDCSTXMT:
2049 statsp->brdcstxmt = val;
2050 break;
2051
2052 case MAC_STAT_NOXMTBUF:
2053 statsp->noxmtbuf = val;
2054 break;
2055
2056 case MAC_STAT_NORCVBUF:
2057 statsp->norcvbuf = val;
2058 break;
2059
2060 default:
2061 /*
2062 * parameters not interested.
2063 */
2064 break;
2065 }
2066 }
2067 }
2068 }
2069
2070 static boolean_t
vnet_m_capab(void * arg,mac_capab_t cap,void * cap_data)2071 vnet_m_capab(void *arg, mac_capab_t cap, void *cap_data)
2072 {
2073 vnet_t *vnetp = (vnet_t *)arg;
2074
2075 if (vnetp == NULL) {
2076 return (0);
2077 }
2078
2079 switch (cap) {
2080
2081 case MAC_CAPAB_RINGS: {
2082
2083 mac_capab_rings_t *cap_rings = cap_data;
2084 /*
2085 * Rings Capability Notes:
2086 * We advertise rings to make use of the rings framework in
2087 * gldv3 mac layer, to improve the performance. This is
2088 * specifically needed when a Hybrid resource (with multiple
2089 * tx/rx hardware rings) is assigned to a vnet device. We also
2090 * leverage this for the normal case when no Hybrid resource is
2091 * assigned.
2092 *
2093 * Ring Allocation:
2094 * - TX path:
2095 * We expose a pseudo ring group with 2 pseudo tx rings (as
2096 * currently HybridIO exports only 2 rings) In the normal case,
2097 * transmit traffic that comes down to the driver through the
2098 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2099 * distributed switching algorithm in vnet and gets transmitted
2100 * over a port/LDC in the vgen layer to either the vswitch or a
2101 * peer vnet. If and when a Hybrid resource is assigned to the
2102 * vnet, we obtain the tx ring information of the Hybrid device
2103 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2104 * Traffic being sent over the Hybrid resource by the mac layer
2105 * gets spread across both hw rings, as they are mapped to the
2106 * 2 pseudo tx rings in vnet.
2107 *
2108 * - RX path:
2109 * We expose a pseudo ring group with 3 pseudo rx rings (static
2110 * rings) initially. The first (default) pseudo rx ring is
2111 * reserved for the resource that connects to the vswitch
2112 * service. The next 2 rings are reserved for a Hybrid resource
2113 * that may be assigned to the vnet device. If and when a
2114 * Hybrid resource is assigned to the vnet, we obtain the rx
2115 * ring information of the Hybrid device (nxge) and map these
2116 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2117 * resource that connects to a peer vnet, we dynamically
2118 * allocate a pseudo rx ring and map it to that resource, when
2119 * the resource gets added; and the pseudo rx ring is
2120 * dynamically registered with the upper mac layer. We do the
2121 * reverse and unregister the ring with the mac layer when
2122 * the resource gets removed.
2123 *
2124 * Synchronization notes:
2125 * We don't need any lock to protect members of ring structure,
2126 * specifically ringp->hw_rh, in either the TX or the RX ring,
2127 * as explained below.
2128 * - TX ring:
2129 * ring->hw_rh is initialized only when a Hybrid resource is
2130 * associated; and gets referenced only in vnet_hio_tx(). The
2131 * Hybrid resource itself is available in fdb only after tx
2132 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2133 * we call vnet_bind_rings() first and then call
2134 * vnet_start_resources() which adds an entry to fdb. For
2135 * traffic going over LDC resources, we don't reference
2136 * ring->hw_rh at all.
2137 * - RX ring:
2138 * For rings mapped to Hybrid resource ring->hw_rh is
2139 * initialized and only then do we add the rx callback for
2140 * the underlying Hybrid resource; we disable callbacks before
2141 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2142 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2143 * (vio_net_resource_unreg()).
2144 * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2145 * Note that for rings mapped to Hybrid resource, though the
2146 * rings are statically registered with the mac layer, its
2147 * hardware ring mapping (ringp->hw_rh) can be torn down in
2148 * vnet_unbind_hwrings() while the kstat operation is in
2149 * progress. To protect against this, we hold a reference to
2150 * the resource in FDB; this ensures that the thread in
2151 * vio_net_resource_unreg() waits for the reference to be
2152 * dropped before unbinding the ring.
2153 *
2154 * We don't need to do this for rings mapped to LDC resources.
2155 * These rings are registered/unregistered dynamically with
2156 * the mac layer and so any attempt to unregister the ring
2157 * while kstat operation is in progress will block in
2158 * mac_group_rem_ring(). Thus implicitly protects the
2159 * resource (ringp->hw_rh) from disappearing.
2160 */
2161
2162 if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2163 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2164
2165 /*
2166 * The ring_cnt for rx grp is initialized in
2167 * vnet_ring_grp_init(). Later, the ring_cnt gets
2168 * updated dynamically whenever LDC resources are added
2169 * or removed.
2170 */
2171 cap_rings->mr_rnum = vnetp->rx_grp[0].ring_cnt;
2172 cap_rings->mr_rget = vnet_get_ring;
2173
2174 cap_rings->mr_gnum = VNET_NUM_PSEUDO_GROUPS;
2175 cap_rings->mr_gget = vnet_get_group;
2176 cap_rings->mr_gaddring = NULL;
2177 cap_rings->mr_gremring = NULL;
2178 } else {
2179 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2180
2181 /*
2182 * The ring_cnt for tx grp is initialized in
2183 * vnet_ring_grp_init() and remains constant, as we
2184 * do not support dymanic tx rings for now.
2185 */
2186 cap_rings->mr_rnum = vnetp->tx_grp[0].ring_cnt;
2187 cap_rings->mr_rget = vnet_get_ring;
2188
2189 /*
2190 * Transmit rings are not grouped; i.e, the number of
2191 * transmit ring groups advertised should be set to 0.
2192 */
2193 cap_rings->mr_gnum = 0;
2194
2195 cap_rings->mr_gget = vnet_get_group;
2196 cap_rings->mr_gaddring = NULL;
2197 cap_rings->mr_gremring = NULL;
2198 }
2199 return (B_TRUE);
2200
2201 }
2202
2203 default:
2204 break;
2205
2206 }
2207
2208 return (B_FALSE);
2209 }
2210
2211 /*
2212 * Callback funtion for MAC layer to get ring information.
2213 */
2214 static void
vnet_get_ring(void * arg,mac_ring_type_t rtype,const int g_index,const int r_index,mac_ring_info_t * infop,mac_ring_handle_t r_handle)2215 vnet_get_ring(void *arg, mac_ring_type_t rtype, const int g_index,
2216 const int r_index, mac_ring_info_t *infop, mac_ring_handle_t r_handle)
2217 {
2218 vnet_t *vnetp = arg;
2219
2220 switch (rtype) {
2221
2222 case MAC_RING_TYPE_RX: {
2223
2224 vnet_pseudo_rx_group_t *rx_grp;
2225 vnet_pseudo_rx_ring_t *rx_ringp;
2226 mac_intr_t *mintr;
2227
2228 /* We advertised only one RX group */
2229 ASSERT(g_index == 0);
2230 rx_grp = &vnetp->rx_grp[g_index];
2231
2232 /* Check the current # of rings in the rx group */
2233 ASSERT((r_index >= 0) && (r_index < rx_grp->max_ring_cnt));
2234
2235 /* Get the ring based on the index */
2236 rx_ringp = &rx_grp->rings[r_index];
2237
2238 rx_ringp->handle = r_handle;
2239 /*
2240 * Note: we don't need to save the incoming r_index in rx_ring,
2241 * as vnet_ring_grp_init() would have initialized the index for
2242 * each ring in the array.
2243 */
2244 rx_ringp->grp = rx_grp;
2245 rx_ringp->vnetp = vnetp;
2246
2247 mintr = &infop->mri_intr;
2248 mintr->mi_handle = (mac_intr_handle_t)rx_ringp;
2249 mintr->mi_enable = (mac_intr_enable_t)vnet_ring_enable_intr;
2250 mintr->mi_disable = (mac_intr_disable_t)vnet_ring_disable_intr;
2251
2252 infop->mri_driver = (mac_ring_driver_t)rx_ringp;
2253 infop->mri_start = vnet_rx_ring_start;
2254 infop->mri_stop = vnet_rx_ring_stop;
2255 infop->mri_stat = vnet_rx_ring_stat;
2256
2257 /* Set the poll function, as this is an rx ring */
2258 infop->mri_poll = vnet_rx_poll;
2259 /*
2260 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2261 * which was not sending packet chains in interrupt
2262 * context. For such drivers, packets are queued in
2263 * Rx soft rings so that we get a chance to switch
2264 * into a polling mode under backlog. This bug (not
2265 * sending packet chains) has now been fixed. Once
2266 * the performance impact is measured, this change
2267 * will be removed.
2268 */
2269 infop->mri_flags = (vnet_mac_rx_queuing ?
2270 MAC_RING_RX_ENQUEUE : 0);
2271 break;
2272 }
2273
2274 case MAC_RING_TYPE_TX: {
2275 vnet_pseudo_tx_group_t *tx_grp;
2276 vnet_pseudo_tx_ring_t *tx_ringp;
2277
2278 /*
2279 * No need to check grp index; mac layer passes -1 for it.
2280 */
2281 tx_grp = &vnetp->tx_grp[0];
2282
2283 /* Check the # of rings in the tx group */
2284 ASSERT((r_index >= 0) && (r_index < tx_grp->ring_cnt));
2285
2286 /* Get the ring based on the index */
2287 tx_ringp = &tx_grp->rings[r_index];
2288
2289 tx_ringp->handle = r_handle;
2290 tx_ringp->index = r_index;
2291 tx_ringp->grp = tx_grp;
2292 tx_ringp->vnetp = vnetp;
2293
2294 infop->mri_driver = (mac_ring_driver_t)tx_ringp;
2295 infop->mri_start = vnet_tx_ring_start;
2296 infop->mri_stop = vnet_tx_ring_stop;
2297 infop->mri_stat = vnet_tx_ring_stat;
2298
2299 /* Set the transmit function, as this is a tx ring */
2300 infop->mri_tx = vnet_tx_ring_send;
2301 /*
2302 * MAC_RING_TX_SERIALIZE bit needs to be set while
2303 * hybridIO is enabled to workaround tx lock
2304 * contention issues in nxge.
2305 */
2306 infop->mri_flags = (vnet_mac_tx_serialize ?
2307 MAC_RING_TX_SERIALIZE : 0);
2308 break;
2309 }
2310
2311 default:
2312 break;
2313 }
2314 }
2315
2316 /*
2317 * Callback funtion for MAC layer to get group information.
2318 */
2319 static void
vnet_get_group(void * arg,mac_ring_type_t type,const int index,mac_group_info_t * infop,mac_group_handle_t handle)2320 vnet_get_group(void *arg, mac_ring_type_t type, const int index,
2321 mac_group_info_t *infop, mac_group_handle_t handle)
2322 {
2323 vnet_t *vnetp = (vnet_t *)arg;
2324
2325 switch (type) {
2326
2327 case MAC_RING_TYPE_RX:
2328 {
2329 vnet_pseudo_rx_group_t *rx_grp;
2330
2331 /* We advertised only one RX group */
2332 ASSERT(index == 0);
2333
2334 rx_grp = &vnetp->rx_grp[index];
2335 rx_grp->handle = handle;
2336 rx_grp->index = index;
2337 rx_grp->vnetp = vnetp;
2338
2339 infop->mgi_driver = (mac_group_driver_t)rx_grp;
2340 infop->mgi_start = NULL;
2341 infop->mgi_stop = NULL;
2342 infop->mgi_addmac = vnet_addmac;
2343 infop->mgi_remmac = vnet_remmac;
2344 infop->mgi_count = rx_grp->ring_cnt;
2345
2346 break;
2347 }
2348
2349 case MAC_RING_TYPE_TX:
2350 {
2351 vnet_pseudo_tx_group_t *tx_grp;
2352
2353 /* We advertised only one TX group */
2354 ASSERT(index == 0);
2355
2356 tx_grp = &vnetp->tx_grp[index];
2357 tx_grp->handle = handle;
2358 tx_grp->index = index;
2359 tx_grp->vnetp = vnetp;
2360
2361 infop->mgi_driver = (mac_group_driver_t)tx_grp;
2362 infop->mgi_start = NULL;
2363 infop->mgi_stop = NULL;
2364 infop->mgi_addmac = NULL;
2365 infop->mgi_remmac = NULL;
2366 infop->mgi_count = VNET_NUM_PSEUDO_TXRINGS;
2367
2368 break;
2369 }
2370
2371 default:
2372 break;
2373
2374 }
2375 }
2376
2377 static int
vnet_rx_ring_start(mac_ring_driver_t arg,uint64_t mr_gen_num)2378 vnet_rx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2379 {
2380 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2381 int err;
2382
2383 /*
2384 * If this ring is mapped to a LDC resource, simply mark the state to
2385 * indicate the ring is started and return.
2386 */
2387 if ((rx_ringp->state &
2388 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2389 rx_ringp->gen_num = mr_gen_num;
2390 rx_ringp->state |= VNET_RXRING_STARTED;
2391 return (0);
2392 }
2393
2394 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2395
2396 /*
2397 * This must be a ring reserved for a hwring. If the hwring is not
2398 * bound yet, simply mark the state to indicate the ring is started and
2399 * return. If and when a hybrid resource is activated for this vnet
2400 * device, we will bind the hwring and start it then. If a hwring is
2401 * already bound, start it now.
2402 */
2403 if (rx_ringp->hw_rh == NULL) {
2404 rx_ringp->gen_num = mr_gen_num;
2405 rx_ringp->state |= VNET_RXRING_STARTED;
2406 return (0);
2407 }
2408
2409 err = mac_hwring_activate(rx_ringp->hw_rh);
2410 if (err == 0) {
2411 rx_ringp->gen_num = mr_gen_num;
2412 rx_ringp->state |= VNET_RXRING_STARTED;
2413 } else {
2414 err = ENXIO;
2415 }
2416
2417 return (err);
2418 }
2419
2420 static void
vnet_rx_ring_stop(mac_ring_driver_t arg)2421 vnet_rx_ring_stop(mac_ring_driver_t arg)
2422 {
2423 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2424
2425 /*
2426 * If this ring is mapped to a LDC resource, simply mark the state to
2427 * indicate the ring is now stopped and return.
2428 */
2429 if ((rx_ringp->state &
2430 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0) {
2431 rx_ringp->state &= ~VNET_RXRING_STARTED;
2432 return;
2433 }
2434
2435 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2436
2437 /*
2438 * This must be a ring reserved for a hwring. If the hwring is not
2439 * bound yet, simply mark the state to indicate the ring is stopped and
2440 * return. If a hwring is already bound, stop it now.
2441 */
2442 if (rx_ringp->hw_rh == NULL) {
2443 rx_ringp->state &= ~VNET_RXRING_STARTED;
2444 return;
2445 }
2446
2447 mac_hwring_quiesce(rx_ringp->hw_rh);
2448 rx_ringp->state &= ~VNET_RXRING_STARTED;
2449 }
2450
2451 static int
vnet_rx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)2452 vnet_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2453 {
2454 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)rdriver;
2455 vnet_t *vnetp = (vnet_t *)rx_ringp->vnetp;
2456 vnet_res_t *vresp;
2457 mac_register_t *macp;
2458 mac_callbacks_t *cbp;
2459
2460 /*
2461 * Refer to vnet_m_capab() function for detailed comments on ring
2462 * synchronization.
2463 */
2464 if ((rx_ringp->state & VNET_RXRING_HYBRID) != 0) {
2465 READ_ENTER(&vnetp->vsw_fp_rw);
2466 if (vnetp->hio_fp == NULL) {
2467 RW_EXIT(&vnetp->vsw_fp_rw);
2468 return (0);
2469 }
2470
2471 VNET_FDBE_REFHOLD(vnetp->hio_fp);
2472 RW_EXIT(&vnetp->vsw_fp_rw);
2473 (void) mac_hwring_getstat(rx_ringp->hw_rh, stat, val);
2474 VNET_FDBE_REFRELE(vnetp->hio_fp);
2475 return (0);
2476 }
2477
2478 ASSERT((rx_ringp->state &
2479 (VNET_RXRING_LDC_SERVICE|VNET_RXRING_LDC_GUEST)) != 0);
2480 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2481 macp = &vresp->macreg;
2482 cbp = macp->m_callbacks;
2483
2484 (void) cbp->mc_getstat(macp->m_driver, stat, val);
2485
2486 return (0);
2487 }
2488
2489 /* ARGSUSED */
2490 static int
vnet_tx_ring_start(mac_ring_driver_t arg,uint64_t mr_gen_num)2491 vnet_tx_ring_start(mac_ring_driver_t arg, uint64_t mr_gen_num)
2492 {
2493 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2494
2495 tx_ringp->state |= VNET_TXRING_STARTED;
2496 return (0);
2497 }
2498
2499 static void
vnet_tx_ring_stop(mac_ring_driver_t arg)2500 vnet_tx_ring_stop(mac_ring_driver_t arg)
2501 {
2502 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
2503
2504 tx_ringp->state &= ~VNET_TXRING_STARTED;
2505 }
2506
2507 static int
vnet_tx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)2508 vnet_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
2509 {
2510 vnet_pseudo_tx_ring_t *tx_ringp = (vnet_pseudo_tx_ring_t *)rdriver;
2511 vnet_tx_ring_stats_t *statsp;
2512
2513 statsp = &tx_ringp->tx_ring_stats;
2514
2515 switch (stat) {
2516 case MAC_STAT_OPACKETS:
2517 *val = statsp->opackets;
2518 break;
2519
2520 case MAC_STAT_OBYTES:
2521 *val = statsp->obytes;
2522 break;
2523
2524 default:
2525 *val = 0;
2526 return (ENOTSUP);
2527 }
2528
2529 return (0);
2530 }
2531
2532 /*
2533 * Disable polling for a ring and enable its interrupt.
2534 */
2535 static int
vnet_ring_enable_intr(void * arg)2536 vnet_ring_enable_intr(void *arg)
2537 {
2538 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2539 vnet_res_t *vresp;
2540
2541 if (rx_ringp->hw_rh == NULL) {
2542 /*
2543 * Ring enable intr func is being invoked, but the ring is
2544 * not bound to any underlying resource ? This must be a ring
2545 * reserved for Hybrid resource and no such resource has been
2546 * assigned to this vnet device yet. We simply return success.
2547 */
2548 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2549 return (0);
2550 }
2551
2552 /*
2553 * The rx ring has been bound to either a LDC or a Hybrid resource.
2554 * Call the appropriate function to enable interrupts for the ring.
2555 */
2556 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2557 return (mac_hwring_enable_intr(rx_ringp->hw_rh));
2558 } else {
2559 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2560 return (vgen_enable_intr(vresp->macreg.m_driver));
2561 }
2562 }
2563
2564 /*
2565 * Enable polling for a ring and disable its interrupt.
2566 */
2567 static int
vnet_ring_disable_intr(void * arg)2568 vnet_ring_disable_intr(void *arg)
2569 {
2570 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2571 vnet_res_t *vresp;
2572
2573 if (rx_ringp->hw_rh == NULL) {
2574 /*
2575 * Ring disable intr func is being invoked, but the ring is
2576 * not bound to any underlying resource ? This must be a ring
2577 * reserved for Hybrid resource and no such resource has been
2578 * assigned to this vnet device yet. We simply return success.
2579 */
2580 ASSERT((rx_ringp->state & VNET_RXRING_HYBRID) != 0);
2581 return (0);
2582 }
2583
2584 /*
2585 * The rx ring has been bound to either a LDC or a Hybrid resource.
2586 * Call the appropriate function to disable interrupts for the ring.
2587 */
2588 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2589 return (mac_hwring_disable_intr(rx_ringp->hw_rh));
2590 } else {
2591 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2592 return (vgen_disable_intr(vresp->macreg.m_driver));
2593 }
2594 }
2595
2596 /*
2597 * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2598 */
2599 static mblk_t *
vnet_rx_poll(void * arg,int bytes_to_pickup)2600 vnet_rx_poll(void *arg, int bytes_to_pickup)
2601 {
2602 vnet_pseudo_rx_ring_t *rx_ringp = (vnet_pseudo_rx_ring_t *)arg;
2603 mblk_t *mp = NULL;
2604 vnet_res_t *vresp;
2605 vnet_t *vnetp = rx_ringp->vnetp;
2606
2607 if (rx_ringp->hw_rh == NULL) {
2608 return (NULL);
2609 }
2610
2611 if (rx_ringp->state & VNET_RXRING_HYBRID) {
2612 mp = mac_hwring_poll(rx_ringp->hw_rh, bytes_to_pickup);
2613 /*
2614 * Packets received over a hybrid resource need additional
2615 * processing to remove the tag, for the pvid case. The
2616 * underlying resource is not aware of the vnet's pvid and thus
2617 * packets are received with the vlan tag in the header; unlike
2618 * packets that are received over a ldc channel in which case
2619 * the peer vnet/vsw would have already removed the tag.
2620 */
2621 if (vnetp->pvid != vnetp->default_vlan_id) {
2622 vnet_rx_frames_untag(vnetp->pvid, &mp);
2623 }
2624 } else {
2625 vresp = (vnet_res_t *)rx_ringp->hw_rh;
2626 mp = vgen_rx_poll(vresp->macreg.m_driver, bytes_to_pickup);
2627 }
2628 return (mp);
2629 }
2630
2631 /* ARGSUSED */
2632 void
vnet_hio_rx_cb(void * arg,mac_resource_handle_t mrh,mblk_t * mp,boolean_t loopback)2633 vnet_hio_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
2634 boolean_t loopback)
2635 {
2636 vnet_t *vnetp = (vnet_t *)arg;
2637 vnet_pseudo_rx_ring_t *ringp = (vnet_pseudo_rx_ring_t *)mrh;
2638
2639 /*
2640 * Packets received over a hybrid resource need additional processing
2641 * to remove the tag, for the pvid case. The underlying resource is
2642 * not aware of the vnet's pvid and thus packets are received with the
2643 * vlan tag in the header; unlike packets that are received over a ldc
2644 * channel in which case the peer vnet/vsw would have already removed
2645 * the tag.
2646 */
2647 if (vnetp->pvid != vnetp->default_vlan_id) {
2648 vnet_rx_frames_untag(vnetp->pvid, &mp);
2649 if (mp == NULL) {
2650 return;
2651 }
2652 }
2653 mac_rx_ring(vnetp->mh, ringp->handle, mp, ringp->gen_num);
2654 }
2655
2656 static int
vnet_addmac(void * arg,const uint8_t * mac_addr)2657 vnet_addmac(void *arg, const uint8_t *mac_addr)
2658 {
2659 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2660 vnet_t *vnetp;
2661
2662 vnetp = rx_grp->vnetp;
2663
2664 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2665 return (0);
2666 }
2667
2668 cmn_err(CE_CONT, "!vnet%d: %s: Multiple macaddr unsupported\n",
2669 vnetp->instance, __func__);
2670 return (EINVAL);
2671 }
2672
2673 static int
vnet_remmac(void * arg,const uint8_t * mac_addr)2674 vnet_remmac(void *arg, const uint8_t *mac_addr)
2675 {
2676 vnet_pseudo_rx_group_t *rx_grp = (vnet_pseudo_rx_group_t *)arg;
2677 vnet_t *vnetp;
2678
2679 vnetp = rx_grp->vnetp;
2680
2681 if (bcmp(mac_addr, vnetp->curr_macaddr, ETHERADDRL) == 0) {
2682 return (0);
2683 }
2684
2685 cmn_err(CE_CONT, "!vnet%d: %s: Invalid macaddr: %s\n",
2686 vnetp->instance, __func__, ether_sprintf((void *)mac_addr));
2687 return (EINVAL);
2688 }
2689
2690 int
vnet_hio_mac_init(vnet_t * vnetp,char * ifname)2691 vnet_hio_mac_init(vnet_t *vnetp, char *ifname)
2692 {
2693 mac_handle_t mh;
2694 mac_client_handle_t mch = NULL;
2695 mac_unicast_handle_t muh = NULL;
2696 mac_diag_t diag;
2697 mac_register_t *macp;
2698 char client_name[MAXNAMELEN];
2699 int rv;
2700 uint16_t mac_flags = MAC_UNICAST_TAG_DISABLE |
2701 MAC_UNICAST_STRIP_DISABLE | MAC_UNICAST_PRIMARY;
2702 vio_net_callbacks_t vcb;
2703 ether_addr_t rem_addr =
2704 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2705 uint32_t retries = 0;
2706
2707 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2708 return (EAGAIN);
2709 }
2710
2711 do {
2712 rv = mac_open_by_linkname(ifname, &mh);
2713 if (rv == 0) {
2714 break;
2715 }
2716 if (rv != ENOENT || (retries++ >= vnet_mac_open_retries)) {
2717 mac_free(macp);
2718 return (rv);
2719 }
2720 drv_usecwait(vnet_mac_open_delay);
2721 } while (rv == ENOENT);
2722
2723 vnetp->hio_mh = mh;
2724
2725 (void) snprintf(client_name, MAXNAMELEN, "vnet%d-%s", vnetp->instance,
2726 ifname);
2727 rv = mac_client_open(mh, &mch, client_name, MAC_OPEN_FLAGS_EXCLUSIVE);
2728 if (rv != 0) {
2729 goto fail;
2730 }
2731 vnetp->hio_mch = mch;
2732
2733 rv = mac_unicast_add(mch, vnetp->curr_macaddr, mac_flags, &muh, 0,
2734 &diag);
2735 if (rv != 0) {
2736 goto fail;
2737 }
2738 vnetp->hio_muh = muh;
2739
2740 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2741 macp->m_driver = vnetp;
2742 macp->m_dip = NULL;
2743 macp->m_src_addr = NULL;
2744 macp->m_callbacks = &vnet_hio_res_callbacks;
2745 macp->m_min_sdu = 0;
2746 macp->m_max_sdu = ETHERMTU;
2747
2748 rv = vio_net_resource_reg(macp, VIO_NET_RES_HYBRID,
2749 vnetp->curr_macaddr, rem_addr, &vnetp->hio_vhp, &vcb);
2750 if (rv != 0) {
2751 goto fail;
2752 }
2753 mac_free(macp);
2754
2755 /* add the recv callback */
2756 mac_rx_set(vnetp->hio_mch, vnet_hio_rx_cb, vnetp);
2757
2758 return (0);
2759
2760 fail:
2761 mac_free(macp);
2762 vnet_hio_mac_cleanup(vnetp);
2763 return (1);
2764 }
2765
2766 void
vnet_hio_mac_cleanup(vnet_t * vnetp)2767 vnet_hio_mac_cleanup(vnet_t *vnetp)
2768 {
2769 if (vnetp->hio_vhp != NULL) {
2770 vio_net_resource_unreg(vnetp->hio_vhp);
2771 vnetp->hio_vhp = NULL;
2772 }
2773
2774 if (vnetp->hio_muh != NULL) {
2775 (void) mac_unicast_remove(vnetp->hio_mch, vnetp->hio_muh);
2776 vnetp->hio_muh = NULL;
2777 }
2778
2779 if (vnetp->hio_mch != NULL) {
2780 mac_client_close(vnetp->hio_mch, 0);
2781 vnetp->hio_mch = NULL;
2782 }
2783
2784 if (vnetp->hio_mh != NULL) {
2785 mac_close(vnetp->hio_mh);
2786 vnetp->hio_mh = NULL;
2787 }
2788 }
2789
2790 /* Bind pseudo rings to hwrings */
2791 static int
vnet_bind_hwrings(vnet_t * vnetp)2792 vnet_bind_hwrings(vnet_t *vnetp)
2793 {
2794 mac_ring_handle_t hw_rh[VNET_NUM_HYBRID_RINGS];
2795 mac_perim_handle_t mph1;
2796 vnet_pseudo_rx_group_t *rx_grp;
2797 vnet_pseudo_rx_ring_t *rx_ringp;
2798 vnet_pseudo_tx_group_t *tx_grp;
2799 vnet_pseudo_tx_ring_t *tx_ringp;
2800 int hw_ring_cnt;
2801 int i;
2802 int rv;
2803
2804 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2805
2806 /* Get the list of the underlying RX rings. */
2807 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->rx_hwgh, hw_rh,
2808 MAC_RING_TYPE_RX);
2809
2810 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2811 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2812 cmn_err(CE_WARN,
2813 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2814 vnetp->instance, hw_ring_cnt);
2815 goto fail;
2816 }
2817
2818 if (vnetp->rx_hwgh != NULL) {
2819 /*
2820 * Quiesce the HW ring and the mac srs on the ring. Note
2821 * that the HW ring will be restarted when the pseudo ring
2822 * is started. At that time all the packets will be
2823 * directly passed up to the pseudo RX ring and handled
2824 * by mac srs created over the pseudo RX ring.
2825 */
2826 mac_rx_client_quiesce(vnetp->hio_mch);
2827 mac_srs_perm_quiesce(vnetp->hio_mch, B_TRUE);
2828 }
2829
2830 /*
2831 * Bind the pseudo rings to the hwrings and start the hwrings.
2832 * Note we don't need to register these with the upper mac, as we have
2833 * statically exported these pseudo rxrings which are reserved for
2834 * rxrings of Hybrid resource.
2835 */
2836 rx_grp = &vnetp->rx_grp[0];
2837 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2838 /* Pick the rxrings reserved for Hybrid resource */
2839 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2840
2841 /* Store the hw ring handle */
2842 rx_ringp->hw_rh = hw_rh[i];
2843
2844 /* Bind the pseudo ring to the underlying hwring */
2845 mac_hwring_setup(rx_ringp->hw_rh,
2846 (mac_resource_handle_t)rx_ringp, NULL);
2847
2848 /* Start the hwring if needed */
2849 if (rx_ringp->state & VNET_RXRING_STARTED) {
2850 rv = mac_hwring_activate(rx_ringp->hw_rh);
2851 if (rv != 0) {
2852 mac_hwring_teardown(rx_ringp->hw_rh);
2853 rx_ringp->hw_rh = NULL;
2854 goto fail;
2855 }
2856 }
2857 }
2858
2859 /* Get the list of the underlying TX rings. */
2860 hw_ring_cnt = mac_hwrings_get(vnetp->hio_mch, &vnetp->tx_hwgh, hw_rh,
2861 MAC_RING_TYPE_TX);
2862
2863 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2864 if (hw_ring_cnt != VNET_NUM_HYBRID_RINGS) {
2865 cmn_err(CE_WARN,
2866 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2867 vnetp->instance, hw_ring_cnt);
2868 goto fail;
2869 }
2870
2871 /*
2872 * Now map the pseudo txrings to the hw txrings. Note we don't need
2873 * to register these with the upper mac, as we have statically exported
2874 * these rings. Note that these rings will continue to be used for LDC
2875 * resources to peer vnets and vswitch (shared ring).
2876 */
2877 tx_grp = &vnetp->tx_grp[0];
2878 for (i = 0; i < tx_grp->ring_cnt; i++) {
2879 tx_ringp = &tx_grp->rings[i];
2880 tx_ringp->hw_rh = hw_rh[i];
2881 tx_ringp->state |= VNET_TXRING_HYBRID;
2882 }
2883 tx_grp->tx_notify_handle =
2884 mac_client_tx_notify(vnetp->hio_mch, vnet_tx_ring_update, vnetp);
2885
2886 mac_perim_exit(mph1);
2887 return (0);
2888
2889 fail:
2890 mac_perim_exit(mph1);
2891 vnet_unbind_hwrings(vnetp);
2892 return (1);
2893 }
2894
2895 /* Unbind pseudo rings from hwrings */
2896 static void
vnet_unbind_hwrings(vnet_t * vnetp)2897 vnet_unbind_hwrings(vnet_t *vnetp)
2898 {
2899 mac_perim_handle_t mph1;
2900 vnet_pseudo_rx_ring_t *rx_ringp;
2901 vnet_pseudo_rx_group_t *rx_grp;
2902 vnet_pseudo_tx_group_t *tx_grp;
2903 vnet_pseudo_tx_ring_t *tx_ringp;
2904 int i;
2905
2906 mac_perim_enter_by_mh(vnetp->hio_mh, &mph1);
2907
2908 tx_grp = &vnetp->tx_grp[0];
2909 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2910 tx_ringp = &tx_grp->rings[i];
2911 if (tx_ringp->state & VNET_TXRING_HYBRID) {
2912 tx_ringp->state &= ~VNET_TXRING_HYBRID;
2913 tx_ringp->hw_rh = NULL;
2914 }
2915 }
2916 (void) mac_client_tx_notify(vnetp->hio_mch, NULL,
2917 tx_grp->tx_notify_handle);
2918
2919 rx_grp = &vnetp->rx_grp[0];
2920 for (i = 0; i < VNET_NUM_HYBRID_RINGS; i++) {
2921 rx_ringp = &rx_grp->rings[i + VNET_HYBRID_RXRING_INDEX];
2922 if (rx_ringp->hw_rh != NULL) {
2923 /* Stop the hwring */
2924 mac_hwring_quiesce(rx_ringp->hw_rh);
2925
2926 /* Teardown the hwring */
2927 mac_hwring_teardown(rx_ringp->hw_rh);
2928 rx_ringp->hw_rh = NULL;
2929 }
2930 }
2931
2932 if (vnetp->rx_hwgh != NULL) {
2933 vnetp->rx_hwgh = NULL;
2934 /*
2935 * First clear the permanent-quiesced flag of the RX srs then
2936 * restart the HW ring and the mac srs on the ring.
2937 */
2938 mac_srs_perm_quiesce(vnetp->hio_mch, B_FALSE);
2939 mac_rx_client_restart(vnetp->hio_mch);
2940 }
2941
2942 mac_perim_exit(mph1);
2943 }
2944
2945 /* Bind pseudo ring to a LDC resource */
2946 static int
vnet_bind_vgenring(vnet_res_t * vresp)2947 vnet_bind_vgenring(vnet_res_t *vresp)
2948 {
2949 vnet_t *vnetp;
2950 vnet_pseudo_rx_group_t *rx_grp;
2951 vnet_pseudo_rx_ring_t *rx_ringp;
2952 mac_perim_handle_t mph1;
2953 int rv;
2954 int type;
2955
2956 vnetp = vresp->vnetp;
2957 type = vresp->type;
2958 rx_grp = &vnetp->rx_grp[0];
2959
2960 if (type == VIO_NET_RES_LDC_SERVICE) {
2961 /*
2962 * Ring Index 0 is the default ring in the group and is
2963 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2964 * is allocated statically and is reported to the mac layer
2965 * in vnet_m_capab(). So, all we need to do here, is save a
2966 * reference to the associated vresp.
2967 */
2968 rx_ringp = &rx_grp->rings[0];
2969 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2970 vresp->rx_ringp = (void *)rx_ringp;
2971 return (0);
2972 }
2973 ASSERT(type == VIO_NET_RES_LDC_GUEST);
2974
2975 mac_perim_enter_by_mh(vnetp->mh, &mph1);
2976
2977 rx_ringp = vnet_alloc_pseudo_rx_ring(vnetp);
2978 if (rx_ringp == NULL) {
2979 cmn_err(CE_WARN, "!vnet%d: Failed to allocate pseudo rx ring",
2980 vnetp->instance);
2981 goto fail;
2982 }
2983
2984 /* Store the LDC resource itself as the ring handle */
2985 rx_ringp->hw_rh = (mac_ring_handle_t)vresp;
2986
2987 /*
2988 * Save a reference to the ring in the resource for lookup during
2989 * unbind. Note this is only done for LDC resources. We don't need this
2990 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2991 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2992 */
2993 vresp->rx_ringp = (void *)rx_ringp;
2994 rx_ringp->state |= VNET_RXRING_LDC_GUEST;
2995
2996 /* Register the pseudo ring with upper-mac */
2997 rv = mac_group_add_ring(rx_grp->handle, rx_ringp->index);
2998 if (rv != 0) {
2999 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3000 rx_ringp->hw_rh = NULL;
3001 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3002 goto fail;
3003 }
3004
3005 mac_perim_exit(mph1);
3006 return (0);
3007 fail:
3008 mac_perim_exit(mph1);
3009 return (1);
3010 }
3011
3012 /* Unbind pseudo ring from a LDC resource */
3013 static void
vnet_unbind_vgenring(vnet_res_t * vresp)3014 vnet_unbind_vgenring(vnet_res_t *vresp)
3015 {
3016 vnet_t *vnetp;
3017 vnet_pseudo_rx_group_t *rx_grp;
3018 vnet_pseudo_rx_ring_t *rx_ringp;
3019 mac_perim_handle_t mph1;
3020 int type;
3021
3022 vnetp = vresp->vnetp;
3023 type = vresp->type;
3024 rx_grp = &vnetp->rx_grp[0];
3025
3026 if (vresp->rx_ringp == NULL) {
3027 return;
3028 }
3029
3030 if (type == VIO_NET_RES_LDC_SERVICE) {
3031 /*
3032 * Ring Index 0 is the default ring in the group and is
3033 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3034 * is allocated statically and is reported to the mac layer
3035 * in vnet_m_capab(). So, all we need to do here, is remove its
3036 * reference to the associated vresp.
3037 */
3038 rx_ringp = &rx_grp->rings[0];
3039 rx_ringp->hw_rh = NULL;
3040 vresp->rx_ringp = NULL;
3041 return;
3042 }
3043 ASSERT(type == VIO_NET_RES_LDC_GUEST);
3044
3045 mac_perim_enter_by_mh(vnetp->mh, &mph1);
3046
3047 rx_ringp = (vnet_pseudo_rx_ring_t *)vresp->rx_ringp;
3048 vresp->rx_ringp = NULL;
3049
3050 if (rx_ringp != NULL && (rx_ringp->state & VNET_RXRING_LDC_GUEST)) {
3051 /* Unregister the pseudo ring with upper-mac */
3052 mac_group_rem_ring(rx_grp->handle, rx_ringp->handle);
3053
3054 rx_ringp->hw_rh = NULL;
3055 rx_ringp->state &= ~VNET_RXRING_LDC_GUEST;
3056
3057 /* Free the pseudo rx ring */
3058 vnet_free_pseudo_rx_ring(vnetp, rx_ringp);
3059 }
3060
3061 mac_perim_exit(mph1);
3062 }
3063
3064 static void
vnet_unbind_rings(vnet_res_t * vresp)3065 vnet_unbind_rings(vnet_res_t *vresp)
3066 {
3067 switch (vresp->type) {
3068
3069 case VIO_NET_RES_LDC_SERVICE:
3070 case VIO_NET_RES_LDC_GUEST:
3071 vnet_unbind_vgenring(vresp);
3072 break;
3073
3074 case VIO_NET_RES_HYBRID:
3075 vnet_unbind_hwrings(vresp->vnetp);
3076 break;
3077
3078 default:
3079 break;
3080
3081 }
3082 }
3083
3084 static int
vnet_bind_rings(vnet_res_t * vresp)3085 vnet_bind_rings(vnet_res_t *vresp)
3086 {
3087 int rv;
3088
3089 switch (vresp->type) {
3090
3091 case VIO_NET_RES_LDC_SERVICE:
3092 case VIO_NET_RES_LDC_GUEST:
3093 rv = vnet_bind_vgenring(vresp);
3094 break;
3095
3096 case VIO_NET_RES_HYBRID:
3097 rv = vnet_bind_hwrings(vresp->vnetp);
3098 break;
3099
3100 default:
3101 rv = 1;
3102 break;
3103
3104 }
3105
3106 return (rv);
3107 }
3108
3109 /* ARGSUSED */
3110 int
vnet_hio_stat(void * arg,uint_t stat,uint64_t * val)3111 vnet_hio_stat(void *arg, uint_t stat, uint64_t *val)
3112 {
3113 vnet_t *vnetp = (vnet_t *)arg;
3114
3115 *val = mac_stat_get(vnetp->hio_mh, stat);
3116 return (0);
3117 }
3118
3119 /*
3120 * The start() and stop() routines for the Hybrid resource below, are just
3121 * dummy functions. This is provided to avoid resource type specific code in
3122 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3123 * of the Hybrid resource happens in the context of the mac_client interfaces
3124 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3125 */
3126 /* ARGSUSED */
3127 static int
vnet_hio_start(void * arg)3128 vnet_hio_start(void *arg)
3129 {
3130 return (0);
3131 }
3132
3133 /* ARGSUSED */
3134 static void
vnet_hio_stop(void * arg)3135 vnet_hio_stop(void *arg)
3136 {
3137 }
3138
3139 mblk_t *
vnet_hio_tx(void * arg,mblk_t * mp)3140 vnet_hio_tx(void *arg, mblk_t *mp)
3141 {
3142 vnet_pseudo_tx_ring_t *tx_ringp;
3143 mblk_t *nextp;
3144 mblk_t *ret_mp;
3145
3146 tx_ringp = (vnet_pseudo_tx_ring_t *)arg;
3147 for (;;) {
3148 nextp = mp->b_next;
3149 mp->b_next = NULL;
3150
3151 ret_mp = mac_hwring_tx(tx_ringp->hw_rh, mp);
3152 if (ret_mp != NULL) {
3153 ret_mp->b_next = nextp;
3154 mp = ret_mp;
3155 break;
3156 }
3157
3158 if ((mp = nextp) == NULL)
3159 break;
3160 }
3161 return (mp);
3162 }
3163
3164 #ifdef VNET_IOC_DEBUG
3165
3166 /*
3167 * The ioctl entry point is used only for debugging for now. The ioctl commands
3168 * can be used to force the link state of the channel connected to vsw.
3169 */
3170 static void
vnet_m_ioctl(void * arg,queue_t * q,mblk_t * mp)3171 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3172 {
3173 struct iocblk *iocp;
3174 vnet_t *vnetp;
3175
3176 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
3177 iocp->ioc_error = 0;
3178 vnetp = (vnet_t *)arg;
3179
3180 if (vnetp == NULL) {
3181 miocnak(q, mp, 0, EINVAL);
3182 return;
3183 }
3184
3185 switch (iocp->ioc_cmd) {
3186
3187 case VNET_FORCE_LINK_DOWN:
3188 case VNET_FORCE_LINK_UP:
3189 vnet_force_link_state(vnetp, q, mp);
3190 break;
3191
3192 default:
3193 iocp->ioc_error = EINVAL;
3194 miocnak(q, mp, 0, iocp->ioc_error);
3195 break;
3196
3197 }
3198 }
3199
3200 static void
vnet_force_link_state(vnet_t * vnetp,queue_t * q,mblk_t * mp)3201 vnet_force_link_state(vnet_t *vnetp, queue_t *q, mblk_t *mp)
3202 {
3203 mac_register_t *macp;
3204 mac_callbacks_t *cbp;
3205 vnet_res_t *vresp;
3206
3207 READ_ENTER(&vnetp->vsw_fp_rw);
3208
3209 vresp = vnetp->vsw_fp;
3210 if (vresp == NULL) {
3211 RW_EXIT(&vnetp->vsw_fp_rw);
3212 return;
3213 }
3214
3215 macp = &vresp->macreg;
3216 cbp = macp->m_callbacks;
3217 cbp->mc_ioctl(macp->m_driver, q, mp);
3218
3219 RW_EXIT(&vnetp->vsw_fp_rw);
3220 }
3221
3222 #else
3223
3224 static void
vnet_m_ioctl(void * arg,queue_t * q,mblk_t * mp)3225 vnet_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
3226 {
3227 vnet_t *vnetp;
3228
3229 vnetp = (vnet_t *)arg;
3230
3231 if (vnetp == NULL) {
3232 miocnak(q, mp, 0, EINVAL);
3233 return;
3234 }
3235
3236 /* ioctl support only for debugging */
3237 miocnak(q, mp, 0, ENOTSUP);
3238 }
3239
3240 #endif
3241