1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/debug.h>
29 #include <sys/time.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/user.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/strsubr.h>
37 #include <sys/cmn_err.h>
38 #include <sys/cpu.h>
39 #include <sys/kmem.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/ksynch.h>
44 #include <sys/stat.h>
45 #include <sys/kstat.h>
46 #include <sys/vtrace.h>
47 #include <sys/strsun.h>
48 #include <sys/dlpi.h>
49 #include <sys/ethernet.h>
50 #include <net/if.h>
51 #include <sys/varargs.h>
52 #include <sys/machsystm.h>
53 #include <sys/modctl.h>
54 #include <sys/modhash.h>
55 #include <sys/mac_provider.h>
56 #include <sys/mac_ether.h>
57 #include <sys/taskq.h>
58 #include <sys/note.h>
59 #include <sys/mach_descrip.h>
60 #include <sys/mac_provider.h>
61 #include <sys/mdeg.h>
62 #include <sys/ldc.h>
63 #include <sys/vsw_fdb.h>
64 #include <sys/vsw.h>
65 #include <sys/vio_mailbox.h>
66 #include <sys/vnet_mailbox.h>
67 #include <sys/vnet_common.h>
68 #include <sys/vio_util.h>
69 #include <sys/sdt.h>
70 #include <sys/atomic.h>
71 #include <sys/callb.h>
72 #include <sys/vlan.h>
73
74 /*
75 * Function prototypes.
76 */
77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
79 static int vsw_unattach(vsw_t *vswp);
80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
82 void vsw_destroy_rxpools(void *);
83
84 /* MDEG routines */
85 static int vsw_mdeg_register(vsw_t *vswp);
86 static void vsw_mdeg_unregister(vsw_t *vswp);
87 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
88 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
89 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
90 static int vsw_read_mdprops(vsw_t *vswp);
91 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
92 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
93 uint16_t *nvidsp, uint16_t *default_idp);
94 static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp,
95 mde_cookie_t node, uint64_t *bw);
96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
97 md_t *mdp, mde_cookie_t *node);
98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
99 mde_cookie_t node);
100 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
101 uint32_t *mtu);
102 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
103 static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
104 boolean_t *pls);
105 static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
106 uint64_t *bw);
107 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
108 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
109 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
110 vsw_vlanid_t *vids2, int nvids);
111
112 /* Mac driver related routines */
113 static int vsw_mac_register(vsw_t *);
114 static int vsw_mac_unregister(vsw_t *);
115 static int vsw_m_stat(void *, uint_t, uint64_t *);
116 static void vsw_m_stop(void *arg);
117 static int vsw_m_start(void *arg);
118 static int vsw_m_unicst(void *arg, const uint8_t *);
119 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
120 static int vsw_m_promisc(void *arg, boolean_t);
121 static mblk_t *vsw_m_tx(void *arg, mblk_t *);
122 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
123 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
124 mblk_t *mp, vsw_macrx_flags_t flags);
125 void vsw_physlink_state_update(vsw_t *vswp);
126
127 /*
128 * Functions imported from other files.
129 */
130 extern void vsw_setup_switching_thread(void *arg);
131 extern int vsw_setup_switching_start(vsw_t *vswp);
132 extern void vsw_setup_switching_stop(vsw_t *vswp);
133 extern int vsw_setup_switching(vsw_t *);
134 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
135 vsw_port_t *port, mac_resource_handle_t mrh);
136 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
137 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
138 extern void vsw_del_mcst_vsw(vsw_t *);
139 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
140 extern void vsw_detach_ports(vsw_t *vswp);
141 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
142 extern int vsw_port_detach(vsw_t *vswp, int p_instance);
143 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
144 md_t *prev_mdp, mde_cookie_t prev_mdex);
145 extern int vsw_port_attach(vsw_port_t *port);
146 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
147 extern int vsw_mac_open(vsw_t *vswp);
148 extern void vsw_mac_close(vsw_t *vswp);
149 extern void vsw_mac_cleanup_ports(vsw_t *vswp);
150 extern void vsw_unset_addrs(vsw_t *vswp);
151 extern void vsw_setup_switching_post_process(vsw_t *vswp);
152 extern void vsw_create_vlans(void *arg, int type);
153 extern void vsw_destroy_vlans(void *arg, int type);
154 extern void vsw_vlan_add_ids(void *arg, int type);
155 extern void vsw_vlan_remove_ids(void *arg, int type);
156 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
157 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
158 mblk_t **npt);
159 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
160 extern void vsw_hio_cleanup(vsw_t *vswp);
161 extern void vsw_hio_start_ports(vsw_t *vswp);
162 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
163 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
164 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
165 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
166 vsw_vlanid_t *new_vids, int new_nvids);
167 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
168 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
169 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
170 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
171 extern void vsw_reset_ports(vsw_t *vswp);
172 extern void vsw_port_reset(vsw_port_t *portp);
173 extern void vsw_physlink_update_ports(vsw_t *vswp);
174 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type,
175 uint64_t maxbw);
176
177 /*
178 * Internal tunables.
179 */
180 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
181 int vsw_wretries = 100; /* # of write attempts */
182 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */
183 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */
184 /* 300*3 = 900sec(15min) of max tmout */
185 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */
186 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */
187 int vsw_ldc_retries = 5; /* # of ldc_close() retries */
188 int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */
189 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */
190 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */
191 int vsw_rxpool_cleanup_delay = 100000; /* 100ms */
192
193
194 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */
195 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */
196 uint32_t vsw_ethermtu = 1500; /* mtu of the device */
197
198 /* delay in usec to wait for all references on a fdb entry to be dropped */
199 uint32_t vsw_fdbe_refcnt_delay = 10;
200
201 /*
202 * Default vlan id. This is only used internally when the "default-vlan-id"
203 * property is not present in the MD device node. Therefore, this should not be
204 * used as a tunable; if this value is changed, the corresponding variable
205 * should be updated to the same value in all vnets connected to this vsw.
206 */
207 uint16_t vsw_default_vlan_id = 1;
208
209 /*
210 * Workaround for a version handshake bug in obp's vnet.
211 * If vsw initiates version negotiation starting from the highest version,
212 * obp sends a nack and terminates version handshake. To workaround
213 * this, we do not initiate version handshake when the channel comes up.
214 * Instead, we wait for the peer to send its version info msg and go through
215 * the version protocol exchange. If we successfully negotiate a version,
216 * before sending the ack, we send our version info msg to the peer
217 * using the <major,minor> version that we are about to ack.
218 */
219 boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
220
221 /*
222 * In the absence of "priority-ether-types" property in MD, the following
223 * internal tunable can be set to specify a single priority ethertype.
224 */
225 uint64_t vsw_pri_eth_type = 0;
226
227 /*
228 * Number of transmit priority buffers that are preallocated per device.
229 * This number is chosen to be a small value to throttle transmission
230 * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
231 */
232 uint32_t vsw_pri_tx_nmblks = 64;
233
234 /*
235 * Number of RARP packets sent to announce macaddr to the physical switch,
236 * after vsw's physical device is changed dynamically or after a guest (client
237 * vnet) is live migrated in.
238 */
239 uint32_t vsw_publish_macaddr_count = 3;
240
241 /*
242 * Enable/disable HybridIO
243 */
244 boolean_t vsw_hio_enabled = B_TRUE;
245
246 /*
247 * Max retries for HybridIO cleanup
248 */
249 int vsw_hio_max_cleanup_retries = 10;
250
251 /*
252 * 10ms delay for HybridIO cleanup
253 */
254 int vsw_hio_cleanup_delay = 10000;
255
256 /*
257 * Descriptor ring modes of LDC data transfer:
258 *
259 * 1) TxDring mode:
260 * In versions < v1.6 of VIO Protocol, we support only TxDring mode. In this
261 * mode, we create a transmit descriptor ring and export it to the peer through
262 * dring registration process of handshake. The descriptor ring is exported
263 * using LDC shared memory. Each descriptor is associated with a data buffer.
264 * The data buffer is also exported over LDC and the cookies for this data
265 * buffer are provided in the descriptor. The peer maps this ring as its
266 * receive ring. Similarly, the peer exports a transmit descriptor ring which
267 * is mapped by this device as its receive ring. In this mode, in a given data
268 * transfer direction, the transmitter copies the data to the exported data
269 * buffer (owned by itself), bound to the descriptor. The receiver uses the LDC
270 * cookies specified in the descriptor to copy the data into the receiving
271 * guest through the hypervisor (ldc_mem_copy()).
272 *
273 * 2) RxDringData mode:
274 * In versions >= v1.6 of VIO Protocol, we also support RxDringData mode. In
275 * this mode, we create a receive descriptor ring and export it to the peer
276 * through dring registration process of handshake. In addition, we export a
277 * receive buffer area and provide that information also in the dring
278 * registration message. The descriptor ring and the data buffer area are
279 * exported using LDC shared memory. Each descriptor is associated with a data
280 * buffer in the data buffer area and the offset of the specific data buffer
281 * within this area is specified in the descriptor. The peer maps this ring
282 * along with the data buffer area as its transmit ring. Similarly, the peer
283 * exports a receive ring which is mapped by this device as its transmit ring,
284 * along with its buffer area. In this mode, in a given data transfer
285 * direction, the transmitter copies the data to the data buffer offset
286 * specified in the descriptor. The receiver simply picks up the data buffer
287 * (owned by itself) without any copy operation into the receiving guest.
288 *
289 * We enable RxDringData mode during handshake negotiations if LDC supports
290 * mapping in large areas of shared memory(see ldc_is_viotsb_configured() API),
291 * which is required to support RxDringData mode.
292 */
293
294 /*
295 * Number of descriptors; must be power of 2.
296 */
297 uint32_t vsw_num_descriptors = VSW_NUM_DESCRIPTORS;
298
299 /*
300 * In RxDringData mode, # of buffers is determined by multiplying the # of
301 * descriptors with the factor below. Note that the factor must be > 1; i.e,
302 * the # of buffers must always be > # of descriptors. This is needed because,
303 * while the shared memory buffers are sent up the stack on the receiver, the
304 * sender needs additional buffers that can be used for further transmits.
305 * See vsw_setup_rx_dring() for details.
306 */
307 uint32_t vsw_nrbufs_factor = 2;
308
309 /*
310 * Delay when rx descr not ready; used in both dring modes.
311 */
312 int vsw_recv_delay = 0;
313
314 /*
315 * Retry when rx descr not ready; used in both dring modes.
316 */
317 int vsw_recv_retries = 5;
318
319 /*
320 * Max number of mblks received in one receive operation.
321 */
322 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
323
324 /*
325 * Internal tunables for receive buffer pools, that is, the size and number of
326 * mblks for each pool. At least 3 sizes must be specified if these are used.
327 * The sizes must be specified in increasing order. Non-zero value of the first
328 * size will be used as a hint to use these values instead of the algorithm
329 * that determines the sizes based on MTU. Used in TxDring mode only.
330 */
331 uint32_t vsw_mblk_size1 = 0;
332 uint32_t vsw_mblk_size2 = 0;
333 uint32_t vsw_mblk_size3 = 0;
334 uint32_t vsw_mblk_size4 = 0;
335 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */
336 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */
337 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */
338 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */
339
340 /*
341 * Set this to non-zero to enable additional internal receive buffer pools
342 * based on the MTU of the device for better performance at the cost of more
343 * memory consumption. This is turned off by default, to use allocb(9F) for
344 * receive buffer allocations of sizes > 2K.
345 */
346 boolean_t vsw_jumbo_rxpools = B_FALSE;
347
348 /*
349 * vsw_max_tx_qcount is the maximum # of packets that can be queued
350 * before the tx worker thread begins processing the queue. Its value
351 * is chosen to be 4x the default length of tx descriptor ring.
352 */
353 uint32_t vsw_max_tx_qcount = 4 * VSW_NUM_DESCRIPTORS;
354
355 /*
356 * MAC callbacks
357 */
358 static mac_callbacks_t vsw_m_callbacks = {
359 0,
360 vsw_m_stat,
361 vsw_m_start,
362 vsw_m_stop,
363 vsw_m_promisc,
364 vsw_m_multicst,
365 vsw_m_unicst,
366 vsw_m_tx
367 };
368
369 static struct cb_ops vsw_cb_ops = {
370 nulldev, /* cb_open */
371 nulldev, /* cb_close */
372 nodev, /* cb_strategy */
373 nodev, /* cb_print */
374 nodev, /* cb_dump */
375 nodev, /* cb_read */
376 nodev, /* cb_write */
377 nodev, /* cb_ioctl */
378 nodev, /* cb_devmap */
379 nodev, /* cb_mmap */
380 nodev, /* cb_segmap */
381 nochpoll, /* cb_chpoll */
382 ddi_prop_op, /* cb_prop_op */
383 NULL, /* cb_stream */
384 D_MP, /* cb_flag */
385 CB_REV, /* rev */
386 nodev, /* int (*cb_aread)() */
387 nodev /* int (*cb_awrite)() */
388 };
389
390 static struct dev_ops vsw_ops = {
391 DEVO_REV, /* devo_rev */
392 0, /* devo_refcnt */
393 NULL, /* devo_getinfo */
394 nulldev, /* devo_identify */
395 nulldev, /* devo_probe */
396 vsw_attach, /* devo_attach */
397 vsw_detach, /* devo_detach */
398 nodev, /* devo_reset */
399 &vsw_cb_ops, /* devo_cb_ops */
400 (struct bus_ops *)NULL, /* devo_bus_ops */
401 ddi_power /* devo_power */
402 };
403
404 extern struct mod_ops mod_driverops;
405 static struct modldrv vswmodldrv = {
406 &mod_driverops,
407 "sun4v Virtual Switch",
408 &vsw_ops,
409 };
410
411 #define LDC_ENTER_LOCK(ldcp) \
412 mutex_enter(&((ldcp)->ldc_cblock));\
413 mutex_enter(&((ldcp)->ldc_rxlock));\
414 mutex_enter(&((ldcp)->ldc_txlock));
415 #define LDC_EXIT_LOCK(ldcp) \
416 mutex_exit(&((ldcp)->ldc_txlock));\
417 mutex_exit(&((ldcp)->ldc_rxlock));\
418 mutex_exit(&((ldcp)->ldc_cblock));
419
420 /* Driver soft state ptr */
421 static void *vsw_state;
422
423 /*
424 * Linked list of "vsw_t" structures - one per instance.
425 */
426 vsw_t *vsw_head = NULL;
427 krwlock_t vsw_rw;
428
429 /*
430 * Property names
431 */
432 static char vdev_propname[] = "virtual-device";
433 static char vsw_propname[] = "virtual-network-switch";
434 static char physdev_propname[] = "vsw-phys-dev";
435 static char smode_propname[] = "vsw-switch-mode";
436 static char macaddr_propname[] = "local-mac-address";
437 static char remaddr_propname[] = "remote-mac-address";
438 static char ldcids_propname[] = "ldc-ids";
439 static char chan_propname[] = "channel-endpoint";
440 static char id_propname[] = "id";
441 static char reg_propname[] = "reg";
442 static char pri_types_propname[] = "priority-ether-types";
443 static char vsw_pvid_propname[] = "port-vlan-id";
444 static char vsw_vid_propname[] = "vlan-id";
445 static char vsw_dvid_propname[] = "default-vlan-id";
446 static char port_pvid_propname[] = "remote-port-vlan-id";
447 static char port_vid_propname[] = "remote-vlan-id";
448 static char hybrid_propname[] = "hybrid";
449 static char vsw_mtu_propname[] = "mtu";
450 static char vsw_linkprop_propname[] = "linkprop";
451 static char vsw_maxbw_propname[] = "maxbw";
452 static char port_maxbw_propname[] = "maxbw";
453
454 /*
455 * Matching criteria passed to the MDEG to register interest
456 * in changes to 'virtual-device-port' nodes identified by their
457 * 'id' property.
458 */
459 static md_prop_match_t vport_prop_match[] = {
460 { MDET_PROP_VAL, "id" },
461 { MDET_LIST_END, NULL }
462 };
463
464 static mdeg_node_match_t vport_match = { "virtual-device-port",
465 vport_prop_match };
466
467 /*
468 * Matching criteria passed to the MDEG to register interest
469 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
470 * by their 'name' and 'cfg-handle' properties.
471 */
472 static md_prop_match_t vdev_prop_match[] = {
473 { MDET_PROP_STR, "name" },
474 { MDET_PROP_VAL, "cfg-handle" },
475 { MDET_LIST_END, NULL }
476 };
477
478 static mdeg_node_match_t vdev_match = { "virtual-device",
479 vdev_prop_match };
480
481
482 /*
483 * Specification of an MD node passed to the MDEG to filter any
484 * 'vport' nodes that do not belong to the specified node. This
485 * template is copied for each vsw instance and filled in with
486 * the appropriate 'cfg-handle' value before being passed to the MDEG.
487 */
488 static mdeg_prop_spec_t vsw_prop_template[] = {
489 { MDET_PROP_STR, "name", vsw_propname },
490 { MDET_PROP_VAL, "cfg-handle", NULL },
491 { MDET_LIST_END, NULL, NULL }
492 };
493
494 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val);
495
496 #ifdef DEBUG
497 /*
498 * Print debug messages - set to 0x1f to enable all msgs
499 * or 0x0 to turn all off.
500 */
501 int vswdbg = 0x0;
502
503 /*
504 * debug levels:
505 * 0x01: Function entry/exit tracing
506 * 0x02: Internal function messages
507 * 0x04: Verbose internal messages
508 * 0x08: Warning messages
509 * 0x10: Error messages
510 */
511
512 void
vswdebug(vsw_t * vswp,const char * fmt,...)513 vswdebug(vsw_t *vswp, const char *fmt, ...)
514 {
515 char buf[512];
516 va_list ap;
517
518 va_start(ap, fmt);
519 (void) vsprintf(buf, fmt, ap);
520 va_end(ap);
521
522 if (vswp == NULL)
523 cmn_err(CE_CONT, "%s\n", buf);
524 else
525 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
526 }
527
528 #endif /* DEBUG */
529
530 static struct modlinkage modlinkage = {
531 MODREV_1,
532 &vswmodldrv,
533 NULL
534 };
535
536 int
_init(void)537 _init(void)
538 {
539 int status;
540
541 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
542
543 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
544 if (status != 0) {
545 return (status);
546 }
547
548 mac_init_ops(&vsw_ops, DRV_NAME);
549 status = mod_install(&modlinkage);
550 if (status != 0) {
551 ddi_soft_state_fini(&vsw_state);
552 }
553 return (status);
554 }
555
556 int
_fini(void)557 _fini(void)
558 {
559 int status;
560
561 status = mod_remove(&modlinkage);
562 if (status != 0)
563 return (status);
564 mac_fini_ops(&vsw_ops);
565 ddi_soft_state_fini(&vsw_state);
566
567 rw_destroy(&vsw_rw);
568
569 return (status);
570 }
571
572 int
_info(struct modinfo * modinfop)573 _info(struct modinfo *modinfop)
574 {
575 return (mod_info(&modlinkage, modinfop));
576 }
577
578 static int
vsw_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)579 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
580 {
581 vsw_t *vswp;
582 int instance;
583 char hashname[MAXNAMELEN];
584 char qname[TASKQ_NAMELEN];
585 vsw_attach_progress_t progress = PROG_init;
586 int rv;
587
588 switch (cmd) {
589 case DDI_ATTACH:
590 break;
591 case DDI_RESUME:
592 /* nothing to do for this non-device */
593 return (DDI_SUCCESS);
594 case DDI_PM_RESUME:
595 default:
596 return (DDI_FAILURE);
597 }
598
599 instance = ddi_get_instance(dip);
600 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
601 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
602 return (DDI_FAILURE);
603 }
604 vswp = ddi_get_soft_state(vsw_state, instance);
605
606 if (vswp == NULL) {
607 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
608 goto vsw_attach_fail;
609 }
610
611 vswp->dip = dip;
612 vswp->instance = instance;
613 vswp->phys_link_state = LINK_STATE_UNKNOWN;
614 ddi_set_driver_private(dip, (caddr_t)vswp);
615
616 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
617 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
618 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
619 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
620 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
621 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
622 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
623 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
624
625 progress |= PROG_locks;
626
627 rv = vsw_read_mdprops(vswp);
628 if (rv != 0)
629 goto vsw_attach_fail;
630
631 progress |= PROG_readmd;
632
633 /* setup the unicast forwarding database */
634 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
635 vswp->instance);
636 D2(vswp, "creating unicast hash table (%s)...", hashname);
637 vswp->fdb_nchains = vsw_fdb_nchains;
638 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
639 mod_hash_null_valdtor, sizeof (void *));
640 vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
641 progress |= PROG_fdb;
642
643 /* setup the multicast fowarding database */
644 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
645 vswp->instance);
646 D2(vswp, "creating multicast hash table %s)...", hashname);
647 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
648 mod_hash_null_valdtor, sizeof (void *));
649
650 progress |= PROG_mfdb;
651
652 /*
653 * Create the taskq which will process all the VIO
654 * control messages.
655 */
656 (void) snprintf(qname, TASKQ_NAMELEN, "taskq%d", vswp->instance);
657 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
658 TASKQ_DEFAULTPRI, 0)) == NULL) {
659 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
660 vswp->instance);
661 goto vsw_attach_fail;
662 }
663
664 progress |= PROG_taskq;
665
666 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
667 vswp->instance);
668 if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
669 TASKQ_DEFAULTPRI, 0)) == NULL) {
670 cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
671 vswp->instance);
672 goto vsw_attach_fail;
673 }
674
675 progress |= PROG_rxp_taskq;
676
677 /* prevent auto-detaching */
678 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
679 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
680 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
681 "instance %u", DDI_NO_AUTODETACH, instance);
682 }
683
684 /*
685 * The null switching function is set to avoid panic until
686 * switch mode is setup.
687 */
688 vswp->vsw_switch_frame = vsw_switch_frame_nop;
689
690 /*
691 * Setup the required switching mode, based on the mdprops that we read
692 * earlier. We start a thread to do this, to avoid calling mac_open()
693 * directly from attach().
694 */
695 rv = vsw_setup_switching_start(vswp);
696 if (rv != 0) {
697 goto vsw_attach_fail;
698 }
699
700 progress |= PROG_swmode;
701
702 /* Register with mac layer as a provider */
703 rv = vsw_mac_register(vswp);
704 if (rv != 0)
705 goto vsw_attach_fail;
706
707 progress |= PROG_macreg;
708
709 /*
710 * Now we have everything setup, register an interest in
711 * specific MD nodes.
712 *
713 * The callback is invoked in 2 cases, firstly if upon mdeg
714 * registration there are existing nodes which match our specified
715 * criteria, and secondly if the MD is changed (and again, there
716 * are nodes which we are interested in present within it. Note
717 * that our callback will be invoked even if our specified nodes
718 * have not actually changed).
719 *
720 */
721 rv = vsw_mdeg_register(vswp);
722 if (rv != 0)
723 goto vsw_attach_fail;
724
725 progress |= PROG_mdreg;
726
727 vswp->attach_progress = progress;
728
729 WRITE_ENTER(&vsw_rw);
730 vswp->next = vsw_head;
731 vsw_head = vswp;
732 RW_EXIT(&vsw_rw);
733
734 ddi_report_dev(vswp->dip);
735 return (DDI_SUCCESS);
736
737 vsw_attach_fail:
738 DERR(NULL, "vsw_attach: failed");
739
740 vswp->attach_progress = progress;
741 (void) vsw_unattach(vswp);
742 ddi_soft_state_free(vsw_state, instance);
743 return (DDI_FAILURE);
744 }
745
746 static int
vsw_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)747 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
748 {
749 vsw_t **vswpp, *vswp;
750 int instance;
751
752 instance = ddi_get_instance(dip);
753 vswp = ddi_get_soft_state(vsw_state, instance);
754
755 if (vswp == NULL) {
756 return (DDI_FAILURE);
757 }
758
759 switch (cmd) {
760 case DDI_DETACH:
761 break;
762 case DDI_SUSPEND:
763 case DDI_PM_SUSPEND:
764 default:
765 return (DDI_FAILURE);
766 }
767
768 D2(vswp, "detaching instance %d", instance);
769
770 if (vsw_unattach(vswp) != 0) {
771 return (DDI_FAILURE);
772 }
773
774 ddi_remove_minor_node(dip, NULL);
775
776 WRITE_ENTER(&vsw_rw);
777 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
778 if (*vswpp == vswp) {
779 *vswpp = vswp->next;
780 break;
781 }
782 }
783 RW_EXIT(&vsw_rw);
784
785 ddi_soft_state_free(vsw_state, instance);
786
787 return (DDI_SUCCESS);
788 }
789
790 /*
791 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
792 * the only reason this function could fail is if mac_unregister() fails.
793 * Otherwise, this function must ensure that all resources are freed and return
794 * success.
795 */
796 static int
vsw_unattach(vsw_t * vswp)797 vsw_unattach(vsw_t *vswp)
798 {
799 vsw_attach_progress_t progress;
800
801 progress = vswp->attach_progress;
802
803 /*
804 * Unregister from the gldv3 subsystem. This can fail, in particular
805 * if there are still any open references to this mac device; in which
806 * case we just return failure without continuing to detach further.
807 */
808 if (progress & PROG_macreg) {
809 if (vsw_mac_unregister(vswp) != 0) {
810 cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
811 "MAC layer", vswp->instance);
812 return (1);
813 }
814 progress &= ~PROG_macreg;
815 }
816
817 /*
818 * Now that we have unregistered from gldv3, we must finish all other
819 * steps and successfully return from this function; otherwise we will
820 * end up leaving the device in a broken/unusable state.
821 *
822 * If we have registered with mdeg, unregister now to stop further
823 * callbacks to this vsw device and/or its ports. Then, detach any
824 * existing ports.
825 */
826 if (progress & PROG_mdreg) {
827 vsw_mdeg_unregister(vswp);
828 vsw_detach_ports(vswp);
829 progress &= ~PROG_mdreg;
830 }
831
832 /*
833 * If we have started a thread to setup the switching mode, stop it, if
834 * it is still running. If it has finished setting up the switching
835 * mode, then we need to clean up some additional things if we are
836 * running in L2 mode: first free up any hybrid resources; then stop
837 * and close the underlying physical device. Note that we would have
838 * already released all per mac_client resources (ucast, mcast addrs,
839 * hio-shares etc) as all the ports are detached and if the vsw device
840 * itself was in use as an interface, it has been unplumbed (otherwise
841 * mac_unregister() above would fail).
842 */
843 if (progress & PROG_swmode) {
844
845 vsw_setup_switching_stop(vswp);
846
847 if (vswp->hio_capable == B_TRUE) {
848 vsw_hio_cleanup(vswp);
849 vswp->hio_capable = B_FALSE;
850 }
851
852 mutex_enter(&vswp->mac_lock);
853 vsw_mac_close(vswp);
854 mutex_exit(&vswp->mac_lock);
855
856 progress &= ~PROG_swmode;
857 }
858
859 /*
860 * We now destroy the taskq used to clean up rx mblk pools that
861 * couldn't be destroyed when the ports/channels were detached.
862 * We implicitly wait for those tasks to complete in
863 * ddi_taskq_destroy().
864 */
865 if (progress & PROG_rxp_taskq) {
866 ddi_taskq_destroy(vswp->rxp_taskq);
867 progress &= ~PROG_rxp_taskq;
868 }
869
870 /*
871 * By now any pending tasks have finished and the underlying
872 * ldc's have been destroyed, so its safe to delete the control
873 * message taskq.
874 */
875 if (progress & PROG_taskq) {
876 ddi_taskq_destroy(vswp->taskq_p);
877 progress &= ~PROG_taskq;
878 }
879
880 /* Destroy the multicast hash table */
881 if (progress & PROG_mfdb) {
882 mod_hash_destroy_hash(vswp->mfdb);
883 progress &= ~PROG_mfdb;
884 }
885
886 /* Destroy the vlan hash table and fdb */
887 if (progress & PROG_fdb) {
888 vsw_destroy_vlans(vswp, VSW_LOCALDEV);
889 mod_hash_destroy_hash(vswp->fdb_hashp);
890 progress &= ~PROG_fdb;
891 }
892
893 if (progress & PROG_readmd) {
894 if (VSW_PRI_ETH_DEFINED(vswp)) {
895 kmem_free(vswp->pri_types,
896 sizeof (uint16_t) * vswp->pri_num_types);
897 (void) vio_destroy_mblks(vswp->pri_tx_vmp);
898 }
899 progress &= ~PROG_readmd;
900 }
901
902 if (progress & PROG_locks) {
903 rw_destroy(&vswp->plist.lockrw);
904 rw_destroy(&vswp->mfdbrw);
905 rw_destroy(&vswp->if_lockrw);
906 rw_destroy(&vswp->maccl_rwlock);
907 cv_destroy(&vswp->sw_thr_cv);
908 mutex_destroy(&vswp->sw_thr_lock);
909 mutex_destroy(&vswp->mca_lock);
910 mutex_destroy(&vswp->mac_lock);
911 progress &= ~PROG_locks;
912 }
913
914 vswp->attach_progress = progress;
915
916 return (0);
917 }
918
919 void
vsw_destroy_rxpools(void * arg)920 vsw_destroy_rxpools(void *arg)
921 {
922 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg;
923 vio_mblk_pool_t *npoolp;
924
925 while (poolp != NULL) {
926 npoolp = poolp->nextp;
927 while (vio_destroy_mblks(poolp) != 0) {
928 delay(drv_usectohz(vsw_rxpool_cleanup_delay));
929 }
930 poolp = npoolp;
931 }
932 }
933
934 /*
935 * Get the value of the "vsw-phys-dev" property in the specified
936 * node. This property is the name of the physical device that
937 * the virtual switch will use to talk to the outside world.
938 *
939 * Note it is valid for this property to be NULL (but the property
940 * itself must exist). Callers of this routine should verify that
941 * the value returned is what they expected (i.e. either NULL or non NULL).
942 *
943 * On success returns value of the property in region pointed to by
944 * the 'name' argument, and with return value of 0. Otherwise returns 1.
945 */
946 static int
vsw_get_md_physname(vsw_t * vswp,md_t * mdp,mde_cookie_t node,char * name)947 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
948 {
949 int len = 0;
950 int instance;
951 char *physname = NULL;
952 char *dev;
953 const char *dev_name;
954 char myname[MAXNAMELEN];
955
956 dev_name = ddi_driver_name(vswp->dip);
957 instance = ddi_get_instance(vswp->dip);
958 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
959
960 if (md_get_prop_data(mdp, node, physdev_propname,
961 (uint8_t **)(&physname), &len) != 0) {
962 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
963 "device(s) from MD", vswp->instance);
964 return (1);
965 } else if ((strlen(physname) + 1) > LIFNAMSIZ) {
966 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
967 vswp->instance, physname);
968 return (1);
969 } else if (strcmp(myname, physname) == 0) {
970 /*
971 * Prevent the vswitch from opening itself as the
972 * network device.
973 */
974 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
975 vswp->instance, physname);
976 return (1);
977 } else {
978 (void) strncpy(name, physname, strlen(physname) + 1);
979 D2(vswp, "%s: using first device specified (%s)",
980 __func__, physname);
981 }
982
983 #ifdef DEBUG
984 /*
985 * As a temporary measure to aid testing we check to see if there
986 * is a vsw.conf file present. If there is we use the value of the
987 * vsw_physname property in the file as the name of the physical
988 * device, overriding the value from the MD.
989 *
990 * There may be multiple devices listed, but for the moment
991 * we just use the first one.
992 */
993 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
994 "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
995 if ((strlen(dev) + 1) > LIFNAMSIZ) {
996 cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
997 vswp->instance, dev);
998 ddi_prop_free(dev);
999 return (1);
1000 } else {
1001 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
1002 "config file", vswp->instance, dev);
1003
1004 (void) strncpy(name, dev, strlen(dev) + 1);
1005 }
1006
1007 ddi_prop_free(dev);
1008 }
1009 #endif
1010
1011 return (0);
1012 }
1013
1014 /*
1015 * Read the 'vsw-switch-mode' property from the specified MD node.
1016 *
1017 * Returns 0 on success, otherwise returns 1.
1018 */
1019 static int
vsw_get_md_smodes(vsw_t * vswp,md_t * mdp,mde_cookie_t node,uint8_t * mode)1020 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
1021 {
1022 int len = 0;
1023 char *smode = NULL;
1024 char *curr_mode = NULL;
1025
1026 D1(vswp, "%s: enter", __func__);
1027
1028 /*
1029 * Get the switch-mode property. The modes are listed in
1030 * decreasing order of preference, i.e. prefered mode is
1031 * first item in list.
1032 */
1033 len = 0;
1034 if (md_get_prop_data(mdp, node, smode_propname,
1035 (uint8_t **)(&smode), &len) != 0) {
1036 /*
1037 * Unable to get switch-mode property from MD, nothing
1038 * more we can do.
1039 */
1040 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
1041 " from the MD", vswp->instance);
1042 return (1);
1043 }
1044
1045 curr_mode = smode;
1046 /*
1047 * Modes of operation:
1048 * 'switched' - layer 2 switching, underlying HW in
1049 * programmed mode.
1050 * 'promiscuous' - layer 2 switching, underlying HW in
1051 * promiscuous mode.
1052 * 'routed' - layer 3 (i.e. IP) routing, underlying HW
1053 * in non-promiscuous mode.
1054 */
1055 while (curr_mode < (smode + len)) {
1056 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
1057 if (strcmp(curr_mode, "switched") == 0) {
1058 *mode = VSW_LAYER2;
1059 } else if (strcmp(curr_mode, "promiscuous") == 0) {
1060 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
1061 } else if (strcmp(curr_mode, "routed") == 0) {
1062 *mode = VSW_LAYER3;
1063 } else {
1064 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
1065 "setting to default switched mode",
1066 vswp->instance, curr_mode);
1067 *mode = VSW_LAYER2;
1068 }
1069 curr_mode += strlen(curr_mode) + 1;
1070 }
1071
1072 D2(vswp, "%s: %d mode", __func__, *mode);
1073
1074 D1(vswp, "%s: exit", __func__);
1075
1076 return (0);
1077 }
1078
1079 /*
1080 * Register with the MAC layer as a network device, so we
1081 * can be plumbed if necessary.
1082 */
1083 static int
vsw_mac_register(vsw_t * vswp)1084 vsw_mac_register(vsw_t *vswp)
1085 {
1086 mac_register_t *macp;
1087 int rv;
1088
1089 D1(vswp, "%s: enter", __func__);
1090
1091 if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1092 return (EINVAL);
1093 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1094 macp->m_driver = vswp;
1095 macp->m_dip = vswp->dip;
1096 macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1097 macp->m_callbacks = &vsw_m_callbacks;
1098 macp->m_min_sdu = 0;
1099 macp->m_max_sdu = vswp->mtu;
1100 macp->m_margin = VLAN_TAGSZ;
1101 rv = mac_register(macp, &vswp->if_mh);
1102 mac_free(macp);
1103 if (rv != 0) {
1104 /*
1105 * Treat this as a non-fatal error as we may be
1106 * able to operate in some other mode.
1107 */
1108 cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1109 "a provider with MAC layer", vswp->instance);
1110 return (rv);
1111 }
1112
1113 vswp->if_state |= VSW_IF_REG;
1114
1115 D1(vswp, "%s: exit", __func__);
1116
1117 return (rv);
1118 }
1119
1120 static int
vsw_mac_unregister(vsw_t * vswp)1121 vsw_mac_unregister(vsw_t *vswp)
1122 {
1123 int rv = 0;
1124
1125 D1(vswp, "%s: enter", __func__);
1126
1127 WRITE_ENTER(&vswp->if_lockrw);
1128
1129 if (vswp->if_state & VSW_IF_REG) {
1130 rv = mac_unregister(vswp->if_mh);
1131 if (rv != 0) {
1132 DWARN(vswp, "%s: unable to unregister from MAC "
1133 "framework", __func__);
1134
1135 RW_EXIT(&vswp->if_lockrw);
1136 D1(vswp, "%s: fail exit", __func__);
1137 return (rv);
1138 }
1139
1140 /* mark i/f as down and unregistered */
1141 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1142 }
1143 RW_EXIT(&vswp->if_lockrw);
1144
1145 D1(vswp, "%s: exit", __func__);
1146
1147 return (rv);
1148 }
1149
1150 static int
vsw_m_stat(void * arg,uint_t stat,uint64_t * val)1151 vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1152 {
1153 vsw_t *vswp = (vsw_t *)arg;
1154
1155 D1(vswp, "%s: enter", __func__);
1156
1157 mutex_enter(&vswp->mac_lock);
1158 if (vswp->mh == NULL) {
1159 mutex_exit(&vswp->mac_lock);
1160 return (EINVAL);
1161 }
1162
1163 /* return stats from underlying device */
1164 *val = mac_stat_get(vswp->mh, stat);
1165
1166 mutex_exit(&vswp->mac_lock);
1167
1168 return (0);
1169 }
1170
1171 static void
vsw_m_stop(void * arg)1172 vsw_m_stop(void *arg)
1173 {
1174 vsw_t *vswp = (vsw_t *)arg;
1175
1176 D1(vswp, "%s: enter", __func__);
1177
1178 WRITE_ENTER(&vswp->if_lockrw);
1179 vswp->if_state &= ~VSW_IF_UP;
1180 RW_EXIT(&vswp->if_lockrw);
1181
1182 /* Cleanup and close the mac client */
1183 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1184
1185 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1186 }
1187
1188 static int
vsw_m_start(void * arg)1189 vsw_m_start(void *arg)
1190 {
1191 int rv;
1192 vsw_t *vswp = (vsw_t *)arg;
1193
1194 D1(vswp, "%s: enter", __func__);
1195
1196 WRITE_ENTER(&vswp->if_lockrw);
1197
1198 vswp->if_state |= VSW_IF_UP;
1199
1200 if (vswp->switching_setup_done == B_FALSE) {
1201 /*
1202 * If the switching mode has not been setup yet, just
1203 * return. The unicast address will be programmed
1204 * after the physical device is successfully setup by the
1205 * timeout handler.
1206 */
1207 RW_EXIT(&vswp->if_lockrw);
1208 return (0);
1209 }
1210
1211 /* if in layer2 mode, program unicast address. */
1212 if (vswp->mh != NULL) {
1213 /* Init a mac client and program addresses */
1214 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1215 if (rv != 0) {
1216 cmn_err(CE_NOTE,
1217 "!vsw%d: failed to program interface "
1218 "unicast address\n", vswp->instance);
1219 }
1220 }
1221
1222 RW_EXIT(&vswp->if_lockrw);
1223
1224 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1225 return (0);
1226 }
1227
1228 /*
1229 * Change the local interface address.
1230 *
1231 * Note: we don't support this entry point. The local
1232 * mac address of the switch can only be changed via its
1233 * MD node properties.
1234 */
1235 static int
vsw_m_unicst(void * arg,const uint8_t * macaddr)1236 vsw_m_unicst(void *arg, const uint8_t *macaddr)
1237 {
1238 _NOTE(ARGUNUSED(arg, macaddr))
1239
1240 return (DDI_FAILURE);
1241 }
1242
1243 static int
vsw_m_multicst(void * arg,boolean_t add,const uint8_t * mca)1244 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1245 {
1246 vsw_t *vswp = (vsw_t *)arg;
1247 mcst_addr_t *mcst_p = NULL;
1248 uint64_t addr = 0x0;
1249 int i, ret = 0;
1250
1251 D1(vswp, "%s: enter", __func__);
1252
1253 /*
1254 * Convert address into form that can be used
1255 * as hash table key.
1256 */
1257 for (i = 0; i < ETHERADDRL; i++) {
1258 addr = (addr << 8) | mca[i];
1259 }
1260
1261 D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1262
1263 if (add) {
1264 D2(vswp, "%s: adding multicast", __func__);
1265 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1266 /*
1267 * Update the list of multicast addresses
1268 * contained within the vsw_t structure to
1269 * include this new one.
1270 */
1271 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1272 if (mcst_p == NULL) {
1273 DERR(vswp, "%s unable to alloc mem", __func__);
1274 (void) vsw_del_mcst(vswp,
1275 VSW_LOCALDEV, addr, NULL);
1276 return (1);
1277 }
1278 mcst_p->addr = addr;
1279 ether_copy(mca, &mcst_p->mca);
1280
1281 /*
1282 * Call into the underlying driver to program the
1283 * address into HW.
1284 */
1285 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1286 VSW_LOCALDEV);
1287 if (ret != 0) {
1288 (void) vsw_del_mcst(vswp,
1289 VSW_LOCALDEV, addr, NULL);
1290 kmem_free(mcst_p, sizeof (*mcst_p));
1291 return (ret);
1292 }
1293
1294 mutex_enter(&vswp->mca_lock);
1295 mcst_p->nextp = vswp->mcap;
1296 vswp->mcap = mcst_p;
1297 mutex_exit(&vswp->mca_lock);
1298 } else {
1299 cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1300 "address", vswp->instance);
1301 }
1302 return (ret);
1303 }
1304
1305 D2(vswp, "%s: removing multicast", __func__);
1306 /*
1307 * Remove the address from the hash table..
1308 */
1309 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1310
1311 /*
1312 * ..and then from the list maintained in the
1313 * vsw_t structure.
1314 */
1315 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1316 ASSERT(mcst_p != NULL);
1317
1318 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1319 kmem_free(mcst_p, sizeof (*mcst_p));
1320 }
1321
1322 D1(vswp, "%s: exit", __func__);
1323
1324 return (0);
1325 }
1326
1327 static int
vsw_m_promisc(void * arg,boolean_t on)1328 vsw_m_promisc(void *arg, boolean_t on)
1329 {
1330 vsw_t *vswp = (vsw_t *)arg;
1331
1332 D1(vswp, "%s: enter", __func__);
1333
1334 WRITE_ENTER(&vswp->if_lockrw);
1335 if (on)
1336 vswp->if_state |= VSW_IF_PROMISC;
1337 else
1338 vswp->if_state &= ~VSW_IF_PROMISC;
1339 RW_EXIT(&vswp->if_lockrw);
1340
1341 D1(vswp, "%s: exit", __func__);
1342
1343 return (0);
1344 }
1345
1346 static mblk_t *
vsw_m_tx(void * arg,mblk_t * mp)1347 vsw_m_tx(void *arg, mblk_t *mp)
1348 {
1349 vsw_t *vswp = (vsw_t *)arg;
1350
1351 D1(vswp, "%s: enter", __func__);
1352
1353 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1354
1355 if (mp == NULL) {
1356 return (NULL);
1357 }
1358
1359 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1360
1361 D1(vswp, "%s: exit", __func__);
1362
1363 return (NULL);
1364 }
1365
1366 /*
1367 * Register for machine description (MD) updates.
1368 *
1369 * Returns 0 on success, 1 on failure.
1370 */
1371 static int
vsw_mdeg_register(vsw_t * vswp)1372 vsw_mdeg_register(vsw_t *vswp)
1373 {
1374 mdeg_prop_spec_t *pspecp;
1375 mdeg_node_spec_t *inst_specp;
1376 mdeg_handle_t mdeg_hdl, mdeg_port_hdl;
1377 size_t templatesz;
1378 int rv;
1379
1380 D1(vswp, "%s: enter", __func__);
1381
1382 /*
1383 * Allocate and initialize a per-instance copy
1384 * of the global property spec array that will
1385 * uniquely identify this vsw instance.
1386 */
1387 templatesz = sizeof (vsw_prop_template);
1388 pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1389
1390 bcopy(vsw_prop_template, pspecp, templatesz);
1391
1392 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1393
1394 /* initialize the complete prop spec structure */
1395 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1396 inst_specp->namep = "virtual-device";
1397 inst_specp->specp = pspecp;
1398
1399 D2(vswp, "%s: instance %d registering with mdeg", __func__,
1400 vswp->regprop);
1401 /*
1402 * Register an interest in 'virtual-device' nodes with a
1403 * 'name' property of 'virtual-network-switch'
1404 */
1405 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1406 (void *)vswp, &mdeg_hdl);
1407 if (rv != MDEG_SUCCESS) {
1408 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1409 __func__, rv);
1410 goto mdeg_reg_fail;
1411 }
1412
1413 /*
1414 * Register an interest in 'vsw-port' nodes.
1415 */
1416 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1417 (void *)vswp, &mdeg_port_hdl);
1418 if (rv != MDEG_SUCCESS) {
1419 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1420 (void) mdeg_unregister(mdeg_hdl);
1421 goto mdeg_reg_fail;
1422 }
1423
1424 /* save off data that will be needed later */
1425 vswp->inst_spec = inst_specp;
1426 vswp->mdeg_hdl = mdeg_hdl;
1427 vswp->mdeg_port_hdl = mdeg_port_hdl;
1428
1429 D1(vswp, "%s: exit", __func__);
1430 return (0);
1431
1432 mdeg_reg_fail:
1433 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1434 vswp->instance);
1435 kmem_free(pspecp, templatesz);
1436 kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1437
1438 vswp->mdeg_hdl = NULL;
1439 vswp->mdeg_port_hdl = NULL;
1440
1441 return (1);
1442 }
1443
1444 static void
vsw_mdeg_unregister(vsw_t * vswp)1445 vsw_mdeg_unregister(vsw_t *vswp)
1446 {
1447 D1(vswp, "vsw_mdeg_unregister: enter");
1448
1449 if (vswp->mdeg_hdl != NULL)
1450 (void) mdeg_unregister(vswp->mdeg_hdl);
1451
1452 if (vswp->mdeg_port_hdl != NULL)
1453 (void) mdeg_unregister(vswp->mdeg_port_hdl);
1454
1455 if (vswp->inst_spec != NULL) {
1456 if (vswp->inst_spec->specp != NULL) {
1457 (void) kmem_free(vswp->inst_spec->specp,
1458 sizeof (vsw_prop_template));
1459 vswp->inst_spec->specp = NULL;
1460 }
1461
1462 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1463 vswp->inst_spec = NULL;
1464 }
1465
1466 D1(vswp, "vsw_mdeg_unregister: exit");
1467 }
1468
1469 /*
1470 * Mdeg callback invoked for the vsw node itself.
1471 */
1472 static int
vsw_mdeg_cb(void * cb_argp,mdeg_result_t * resp)1473 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1474 {
1475 vsw_t *vswp;
1476 md_t *mdp;
1477 mde_cookie_t node;
1478 uint64_t inst;
1479 char *node_name = NULL;
1480
1481 if (resp == NULL)
1482 return (MDEG_FAILURE);
1483
1484 vswp = (vsw_t *)cb_argp;
1485
1486 D1(vswp, "%s: added %d : removed %d : curr matched %d"
1487 " : prev matched %d", __func__, resp->added.nelem,
1488 resp->removed.nelem, resp->match_curr.nelem,
1489 resp->match_prev.nelem);
1490
1491 /*
1492 * We get an initial callback for this node as 'added'
1493 * after registering with mdeg. Note that we would have
1494 * already gathered information about this vsw node by
1495 * walking MD earlier during attach (in vsw_read_mdprops()).
1496 * So, there is a window where the properties of this
1497 * node might have changed when we get this initial 'added'
1498 * callback. We handle this as if an update occured
1499 * and invoke the same function which handles updates to
1500 * the properties of this vsw-node if any.
1501 *
1502 * A non-zero 'match' value indicates that the MD has been
1503 * updated and that a virtual-network-switch node is
1504 * present which may or may not have been updated. It is
1505 * up to the clients to examine their own nodes and
1506 * determine if they have changed.
1507 */
1508 if (resp->added.nelem != 0) {
1509
1510 if (resp->added.nelem != 1) {
1511 cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1512 "invalid: %d\n", vswp->instance, resp->added.nelem);
1513 return (MDEG_FAILURE);
1514 }
1515
1516 mdp = resp->added.mdp;
1517 node = resp->added.mdep[0];
1518
1519 } else if (resp->match_curr.nelem != 0) {
1520
1521 if (resp->match_curr.nelem != 1) {
1522 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1523 "invalid: %d\n", vswp->instance,
1524 resp->match_curr.nelem);
1525 return (MDEG_FAILURE);
1526 }
1527
1528 mdp = resp->match_curr.mdp;
1529 node = resp->match_curr.mdep[0];
1530
1531 } else {
1532 return (MDEG_FAILURE);
1533 }
1534
1535 /* Validate name and instance */
1536 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1537 DERR(vswp, "%s: unable to get node name\n", __func__);
1538 return (MDEG_FAILURE);
1539 }
1540
1541 /* is this a virtual-network-switch? */
1542 if (strcmp(node_name, vsw_propname) != 0) {
1543 DERR(vswp, "%s: Invalid node name: %s\n",
1544 __func__, node_name);
1545 return (MDEG_FAILURE);
1546 }
1547
1548 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1549 DERR(vswp, "%s: prop(cfg-handle) not found\n",
1550 __func__);
1551 return (MDEG_FAILURE);
1552 }
1553
1554 /* is this the right instance of vsw? */
1555 if (inst != vswp->regprop) {
1556 DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1557 __func__, inst);
1558 return (MDEG_FAILURE);
1559 }
1560
1561 vsw_update_md_prop(vswp, mdp, node);
1562
1563 return (MDEG_SUCCESS);
1564 }
1565
1566 /*
1567 * Mdeg callback invoked for changes to the vsw-port nodes
1568 * under the vsw node.
1569 */
1570 static int
vsw_port_mdeg_cb(void * cb_argp,mdeg_result_t * resp)1571 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1572 {
1573 vsw_t *vswp;
1574 int idx;
1575 md_t *mdp;
1576 mde_cookie_t node;
1577 uint64_t inst;
1578 int rv;
1579
1580 if ((resp == NULL) || (cb_argp == NULL))
1581 return (MDEG_FAILURE);
1582
1583 vswp = (vsw_t *)cb_argp;
1584
1585 D2(vswp, "%s: added %d : removed %d : curr matched %d"
1586 " : prev matched %d", __func__, resp->added.nelem,
1587 resp->removed.nelem, resp->match_curr.nelem,
1588 resp->match_prev.nelem);
1589
1590 /* process added ports */
1591 for (idx = 0; idx < resp->added.nelem; idx++) {
1592 mdp = resp->added.mdp;
1593 node = resp->added.mdep[idx];
1594
1595 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1596
1597 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1598 cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1599 "(0x%lx), err=%d", vswp->instance, node, rv);
1600 }
1601 }
1602
1603 /* process removed ports */
1604 for (idx = 0; idx < resp->removed.nelem; idx++) {
1605 mdp = resp->removed.mdp;
1606 node = resp->removed.mdep[idx];
1607
1608 if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1609 DERR(vswp, "%s: prop(%s) not found in port(%d)",
1610 __func__, id_propname, idx);
1611 continue;
1612 }
1613
1614 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1615
1616 if (vsw_port_detach(vswp, inst) != 0) {
1617 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1618 vswp->instance, inst);
1619 }
1620 }
1621
1622 for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1623 (void) vsw_port_update(vswp, resp->match_curr.mdp,
1624 resp->match_curr.mdep[idx],
1625 resp->match_prev.mdp,
1626 resp->match_prev.mdep[idx]);
1627 }
1628
1629 D1(vswp, "%s: exit", __func__);
1630
1631 return (MDEG_SUCCESS);
1632 }
1633
1634 /*
1635 * Scan the machine description for this instance of vsw
1636 * and read its properties. Called only from vsw_attach().
1637 * Returns: 0 on success, 1 on failure.
1638 */
1639 static int
vsw_read_mdprops(vsw_t * vswp)1640 vsw_read_mdprops(vsw_t *vswp)
1641 {
1642 md_t *mdp = NULL;
1643 mde_cookie_t rootnode;
1644 mde_cookie_t *listp = NULL;
1645 uint64_t inst;
1646 uint64_t cfgh;
1647 char *name;
1648 int rv = 1;
1649 int num_nodes = 0;
1650 int num_devs = 0;
1651 int listsz = 0;
1652 int i;
1653
1654 /*
1655 * In each 'virtual-device' node in the MD there is a
1656 * 'cfg-handle' property which is the MD's concept of
1657 * an instance number (this may be completely different from
1658 * the device drivers instance #). OBP reads that value and
1659 * stores it in the 'reg' property of the appropriate node in
1660 * the device tree. We first read this reg property and use this
1661 * to compare against the 'cfg-handle' property of vsw nodes
1662 * in MD to get to this specific vsw instance and then read
1663 * other properties that we are interested in.
1664 * We also cache the value of 'reg' property and use it later
1665 * to register callbacks with mdeg (see vsw_mdeg_register())
1666 */
1667 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1668 DDI_PROP_DONTPASS, reg_propname, -1);
1669 if (inst == -1) {
1670 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1671 "OBP device tree", vswp->instance, reg_propname);
1672 return (rv);
1673 }
1674
1675 vswp->regprop = inst;
1676
1677 if ((mdp = md_get_handle()) == NULL) {
1678 DWARN(vswp, "%s: cannot init MD\n", __func__);
1679 return (rv);
1680 }
1681
1682 num_nodes = md_node_count(mdp);
1683 ASSERT(num_nodes > 0);
1684
1685 listsz = num_nodes * sizeof (mde_cookie_t);
1686 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1687
1688 rootnode = md_root_node(mdp);
1689
1690 /* search for all "virtual_device" nodes */
1691 num_devs = md_scan_dag(mdp, rootnode,
1692 md_find_name(mdp, vdev_propname),
1693 md_find_name(mdp, "fwd"), listp);
1694 if (num_devs <= 0) {
1695 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1696 goto vsw_readmd_exit;
1697 }
1698
1699 /*
1700 * Now loop through the list of virtual-devices looking for
1701 * devices with name "virtual-network-switch" and for each
1702 * such device compare its instance with what we have from
1703 * the 'reg' property to find the right node in MD and then
1704 * read all its properties.
1705 */
1706 for (i = 0; i < num_devs; i++) {
1707
1708 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1709 DWARN(vswp, "%s: name property not found\n",
1710 __func__);
1711 goto vsw_readmd_exit;
1712 }
1713
1714 /* is this a virtual-network-switch? */
1715 if (strcmp(name, vsw_propname) != 0)
1716 continue;
1717
1718 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1719 DWARN(vswp, "%s: cfg-handle property not found\n",
1720 __func__);
1721 goto vsw_readmd_exit;
1722 }
1723
1724 /* is this the required instance of vsw? */
1725 if (inst != cfgh)
1726 continue;
1727
1728 /* now read all properties of this vsw instance */
1729 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1730 break;
1731 }
1732
1733 vsw_readmd_exit:
1734
1735 kmem_free(listp, listsz);
1736 (void) md_fini_handle(mdp);
1737 return (rv);
1738 }
1739
1740 /*
1741 * Read the initial start-of-day values from the specified MD node.
1742 */
1743 static int
vsw_get_initial_md_properties(vsw_t * vswp,md_t * mdp,mde_cookie_t node)1744 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1745 {
1746 uint64_t macaddr = 0;
1747
1748 D1(vswp, "%s: enter", __func__);
1749
1750 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1751 return (1);
1752 }
1753
1754 /* mac address for vswitch device itself */
1755 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1756 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1757 vswp->instance);
1758 return (1);
1759 }
1760
1761 vsw_save_lmacaddr(vswp, macaddr);
1762
1763 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1764 DWARN(vswp, "%s: Unable to read %s property from MD, "
1765 "defaulting to 'switched' mode",
1766 __func__, smode_propname);
1767
1768 vswp->smode = VSW_LAYER2;
1769 }
1770
1771 /*
1772 * Read the 'linkprop' property to know if this
1773 * vsw device wants to get physical link updates.
1774 */
1775 vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1776
1777 /* read mtu */
1778 vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1779 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1780 vswp->mtu = ETHERMTU;
1781 }
1782 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1783 VLAN_TAGSZ;
1784
1785 /* read vlan id properties of this vsw instance */
1786 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1787 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1788
1789 /* read priority-ether-types */
1790 vsw_read_pri_eth_types(vswp, mdp, node);
1791
1792 /* read bandwidth property of this vsw instance */
1793 vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth);
1794
1795 D1(vswp, "%s: exit", __func__);
1796 return (0);
1797 }
1798
1799 /*
1800 * Read vlan id properties of the given MD node.
1801 * Arguments:
1802 * arg: device argument(vsw device or a port)
1803 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1804 * mdp: machine description
1805 * node: md node cookie
1806 *
1807 * Returns:
1808 * pvidp: port-vlan-id of the node
1809 * vidspp: list of vlan-ids of the node
1810 * nvidsp: # of vlan-ids in the list
1811 * default_idp: default-vlan-id of the node(if node is vsw device)
1812 */
1813 static void
vsw_vlan_read_ids(void * arg,int type,md_t * mdp,mde_cookie_t node,uint16_t * pvidp,vsw_vlanid_t ** vidspp,uint16_t * nvidsp,uint16_t * default_idp)1814 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1815 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1816 uint16_t *default_idp)
1817 {
1818 vsw_t *vswp;
1819 vsw_port_t *portp;
1820 char *pvid_propname;
1821 char *vid_propname;
1822 uint_t nvids = 0;
1823 uint32_t vids_size;
1824 int rv;
1825 int i;
1826 uint64_t *data;
1827 uint64_t val;
1828 int size;
1829 int inst;
1830
1831 if (type == VSW_LOCALDEV) {
1832
1833 vswp = (vsw_t *)arg;
1834 pvid_propname = vsw_pvid_propname;
1835 vid_propname = vsw_vid_propname;
1836 inst = vswp->instance;
1837
1838 } else if (type == VSW_VNETPORT) {
1839
1840 portp = (vsw_port_t *)arg;
1841 vswp = portp->p_vswp;
1842 pvid_propname = port_pvid_propname;
1843 vid_propname = port_vid_propname;
1844 inst = portp->p_instance;
1845
1846 } else {
1847 return;
1848 }
1849
1850 if (type == VSW_LOCALDEV && default_idp != NULL) {
1851 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1852 if (rv != 0) {
1853 DWARN(vswp, "%s: prop(%s) not found", __func__,
1854 vsw_dvid_propname);
1855
1856 *default_idp = vsw_default_vlan_id;
1857 } else {
1858 *default_idp = val & 0xFFF;
1859 D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1860 vsw_dvid_propname, inst, *default_idp);
1861 }
1862 }
1863
1864 rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1865 if (rv != 0) {
1866 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1867 *pvidp = vsw_default_vlan_id;
1868 } else {
1869
1870 *pvidp = val & 0xFFF;
1871 D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1872 pvid_propname, inst, *pvidp);
1873 }
1874
1875 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1876 &size);
1877 if (rv != 0) {
1878 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1879 size = 0;
1880 } else {
1881 size /= sizeof (uint64_t);
1882 }
1883 nvids = size;
1884
1885 if (nvids != 0) {
1886 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1887 vids_size = sizeof (vsw_vlanid_t) * nvids;
1888 *vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1889 for (i = 0; i < nvids; i++) {
1890 (*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1891 (*vidspp)[i].vl_set = B_FALSE;
1892 D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1893 }
1894 D2(vswp, "\n");
1895 }
1896
1897 *nvidsp = nvids;
1898 }
1899
1900 static void
vsw_port_read_bandwidth(vsw_port_t * portp,md_t * mdp,mde_cookie_t node,uint64_t * bw)1901 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node,
1902 uint64_t *bw)
1903 {
1904 int rv;
1905 uint64_t val;
1906 vsw_t *vswp;
1907
1908 vswp = portp->p_vswp;
1909
1910 rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val);
1911
1912 if (rv != 0) {
1913 *bw = 0;
1914 D3(vswp, "%s: prop(%s) not found\n", __func__,
1915 port_maxbw_propname);
1916 } else {
1917 *bw = val;
1918 D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname);
1919 }
1920 }
1921
1922 /*
1923 * This function reads "priority-ether-types" property from md. This property
1924 * is used to enable support for priority frames. Applications which need
1925 * guaranteed and timely delivery of certain high priority frames to/from
1926 * a vnet or vsw within ldoms, should configure this property by providing
1927 * the ether type(s) for which the priority facility is needed.
1928 * Normal data frames are delivered over a ldc channel using the descriptor
1929 * ring mechanism which is constrained by factors such as descriptor ring size,
1930 * the rate at which the ring is processed at the peer ldc end point, etc.
1931 * The priority mechanism provides an Out-Of-Band path to send/receive frames
1932 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1933 * descriptor ring path and enables a more reliable and timely delivery of
1934 * frames to the peer.
1935 */
1936 static void
vsw_read_pri_eth_types(vsw_t * vswp,md_t * mdp,mde_cookie_t node)1937 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1938 {
1939 int rv;
1940 uint16_t *types;
1941 uint64_t *data;
1942 int size;
1943 int i;
1944 size_t mblk_sz;
1945
1946 rv = md_get_prop_data(mdp, node, pri_types_propname,
1947 (uint8_t **)&data, &size);
1948 if (rv != 0) {
1949 /*
1950 * Property may not exist if we are running pre-ldoms1.1 f/w.
1951 * Check if 'vsw_pri_eth_type' has been set in that case.
1952 */
1953 if (vsw_pri_eth_type != 0) {
1954 size = sizeof (vsw_pri_eth_type);
1955 data = &vsw_pri_eth_type;
1956 } else {
1957 D3(vswp, "%s: prop(%s) not found", __func__,
1958 pri_types_propname);
1959 size = 0;
1960 }
1961 }
1962
1963 if (size == 0) {
1964 vswp->pri_num_types = 0;
1965 return;
1966 }
1967
1968 /*
1969 * we have some priority-ether-types defined;
1970 * allocate a table of these types and also
1971 * allocate a pool of mblks to transmit these
1972 * priority packets.
1973 */
1974 size /= sizeof (uint64_t);
1975 vswp->pri_num_types = size;
1976 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1977 for (i = 0, types = vswp->pri_types; i < size; i++) {
1978 types[i] = data[i] & 0xFFFF;
1979 }
1980 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1981 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, NULL,
1982 &vswp->pri_tx_vmp);
1983 }
1984
1985 static void
vsw_mtu_read(vsw_t * vswp,md_t * mdp,mde_cookie_t node,uint32_t * mtu)1986 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1987 {
1988 int rv;
1989 int inst;
1990 uint64_t val;
1991 char *mtu_propname;
1992
1993 mtu_propname = vsw_mtu_propname;
1994 inst = vswp->instance;
1995
1996 rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1997 if (rv != 0) {
1998 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1999 *mtu = vsw_ethermtu;
2000 } else {
2001
2002 *mtu = val & 0xFFFF;
2003 D2(vswp, "%s: %s(%d): (%d)\n", __func__,
2004 mtu_propname, inst, *mtu);
2005 }
2006 }
2007
2008 /*
2009 * Update the mtu of the vsw device. We first check if the device has been
2010 * plumbed and if so fail the mtu update. Otherwise, we continue to update the
2011 * new mtu and reset all ports to initiate handshake re-negotiation with peers
2012 * using the new mtu.
2013 */
2014 static int
vsw_mtu_update(vsw_t * vswp,uint32_t mtu)2015 vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
2016 {
2017 int rv;
2018
2019 WRITE_ENTER(&vswp->if_lockrw);
2020
2021 if (vswp->if_state & VSW_IF_UP) {
2022
2023 RW_EXIT(&vswp->if_lockrw);
2024
2025 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2026 " as the device is plumbed\n", vswp->instance);
2027 return (EBUSY);
2028
2029 } else {
2030
2031 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
2032 __func__, vswp->mtu, mtu);
2033
2034 vswp->mtu = mtu;
2035 vswp->max_frame_size = vswp->mtu +
2036 sizeof (struct ether_header) + VLAN_TAGSZ;
2037
2038 rv = mac_maxsdu_update(vswp->if_mh, mtu);
2039 if (rv != 0) {
2040 cmn_err(CE_NOTE,
2041 "!vsw%d: Unable to update mtu with mac"
2042 " layer\n", vswp->instance);
2043 }
2044
2045 RW_EXIT(&vswp->if_lockrw);
2046
2047 /* Reset ports to renegotiate with the new mtu */
2048 vsw_reset_ports(vswp);
2049
2050 }
2051
2052 return (0);
2053 }
2054
2055 static void
vsw_linkprop_read(vsw_t * vswp,md_t * mdp,mde_cookie_t node,boolean_t * pls)2056 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
2057 boolean_t *pls)
2058 {
2059 int rv;
2060 uint64_t val;
2061 char *linkpropname;
2062
2063 linkpropname = vsw_linkprop_propname;
2064
2065 rv = md_get_prop_val(mdp, node, linkpropname, &val);
2066 if (rv != 0) {
2067 D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
2068 *pls = B_FALSE;
2069 } else {
2070
2071 *pls = (val & 0x1) ? B_TRUE : B_FALSE;
2072 D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
2073 vswp->instance, *pls);
2074 }
2075 }
2076
2077 void
vsw_mac_link_update(vsw_t * vswp,link_state_t link_state)2078 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
2079 {
2080 READ_ENTER(&vswp->if_lockrw);
2081
2082 if (vswp->if_state & VSW_IF_REG) {
2083 mac_link_update(vswp->if_mh, link_state);
2084 }
2085
2086 RW_EXIT(&vswp->if_lockrw);
2087 }
2088
2089 void
vsw_physlink_state_update(vsw_t * vswp)2090 vsw_physlink_state_update(vsw_t *vswp)
2091 {
2092 if (vswp->pls_update == B_TRUE) {
2093 vsw_mac_link_update(vswp, vswp->phys_link_state);
2094 }
2095 vsw_physlink_update_ports(vswp);
2096 }
2097
2098 static void
vsw_bandwidth_read(vsw_t * vswp,md_t * mdp,mde_cookie_t node,uint64_t * bw)2099 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw)
2100 {
2101 /* read the vsw bandwidth from md */
2102 int rv;
2103 uint64_t val;
2104
2105 rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val);
2106 if (rv != 0) {
2107 *bw = 0;
2108 D3(vswp, "%s: prop(%s) not found", __func__,
2109 vsw_maxbw_propname);
2110 } else {
2111 *bw = val;
2112 D3(vswp, "%s: %s(%d): (%ld)\n", __func__,
2113 vsw_maxbw_propname, vswp->instance, *bw);
2114 }
2115 }
2116
2117 /*
2118 * Check to see if the relevant properties in the specified node have
2119 * changed, and if so take the appropriate action.
2120 *
2121 * If any of the properties are missing or invalid we don't take
2122 * any action, as this function should only be invoked when modifications
2123 * have been made to what we assume is a working configuration, which
2124 * we leave active.
2125 *
2126 * Note it is legal for this routine to be invoked even if none of the
2127 * properties in the port node within the MD have actually changed.
2128 */
2129 static void
vsw_update_md_prop(vsw_t * vswp,md_t * mdp,mde_cookie_t node)2130 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2131 {
2132 char physname[LIFNAMSIZ];
2133 char drv[LIFNAMSIZ];
2134 uint_t ddi_instance;
2135 uint8_t new_smode;
2136 int i;
2137 uint64_t macaddr = 0;
2138 enum {MD_init = 0x1,
2139 MD_physname = 0x2,
2140 MD_macaddr = 0x4,
2141 MD_smode = 0x8,
2142 MD_vlans = 0x10,
2143 MD_mtu = 0x20,
2144 MD_pls = 0x40,
2145 MD_bw = 0x80} updated;
2146 int rv;
2147 uint16_t pvid;
2148 vsw_vlanid_t *vids;
2149 uint16_t nvids;
2150 uint32_t mtu;
2151 boolean_t pls_update;
2152 uint64_t maxbw;
2153
2154 updated = MD_init;
2155
2156 D1(vswp, "%s: enter", __func__);
2157
2158 /*
2159 * Check if name of physical device in MD has changed.
2160 */
2161 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2162 /*
2163 * Do basic sanity check on new device name/instance,
2164 * if its non NULL. It is valid for the device name to
2165 * have changed from a non NULL to a NULL value, i.e.
2166 * the vsw is being changed to 'routed' mode.
2167 */
2168 if ((strlen(physname) != 0) &&
2169 (ddi_parse(physname, drv,
2170 &ddi_instance) != DDI_SUCCESS)) {
2171 cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2172 " a valid device name/instance",
2173 vswp->instance, physname);
2174 goto fail_reconf;
2175 }
2176
2177 if (strcmp(physname, vswp->physname)) {
2178 D2(vswp, "%s: device name changed from %s to %s",
2179 __func__, vswp->physname, physname);
2180
2181 updated |= MD_physname;
2182 } else {
2183 D2(vswp, "%s: device name unchanged at %s",
2184 __func__, vswp->physname);
2185 }
2186 } else {
2187 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2188 "device from updated MD.", vswp->instance);
2189 goto fail_reconf;
2190 }
2191
2192 /*
2193 * Check if MAC address has changed.
2194 */
2195 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2196 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2197 vswp->instance);
2198 goto fail_reconf;
2199 } else {
2200 uint64_t maddr = macaddr;
2201 READ_ENTER(&vswp->if_lockrw);
2202 for (i = ETHERADDRL - 1; i >= 0; i--) {
2203 if (vswp->if_addr.ether_addr_octet[i]
2204 != (macaddr & 0xFF)) {
2205 D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2206 __func__, i,
2207 vswp->if_addr.ether_addr_octet[i],
2208 (macaddr & 0xFF));
2209 updated |= MD_macaddr;
2210 macaddr = maddr;
2211 break;
2212 }
2213 macaddr >>= 8;
2214 }
2215 RW_EXIT(&vswp->if_lockrw);
2216 if (updated & MD_macaddr) {
2217 vsw_save_lmacaddr(vswp, macaddr);
2218 }
2219 }
2220
2221 /*
2222 * Check if switching modes have changed.
2223 */
2224 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2225 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2226 vswp->instance, smode_propname);
2227 goto fail_reconf;
2228 } else {
2229 if (new_smode != vswp->smode) {
2230 D2(vswp, "%s: switching mode changed from %d to %d",
2231 __func__, vswp->smode, new_smode);
2232
2233 updated |= MD_smode;
2234 }
2235 }
2236
2237 /* Read the vlan ids */
2238 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2239 &nvids, NULL);
2240
2241 /* Determine if there are any vlan id updates */
2242 if ((pvid != vswp->pvid) || /* pvid changed? */
2243 (nvids != vswp->nvids) || /* # of vids changed? */
2244 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */
2245 !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2246 updated |= MD_vlans;
2247 }
2248
2249 /* Read mtu */
2250 vsw_mtu_read(vswp, mdp, node, &mtu);
2251 if (mtu != vswp->mtu) {
2252 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2253 updated |= MD_mtu;
2254 } else {
2255 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2256 " as the specified value:%d is invalid\n",
2257 vswp->instance, mtu);
2258 }
2259 }
2260
2261 /*
2262 * Read the 'linkprop' property.
2263 */
2264 vsw_linkprop_read(vswp, mdp, node, &pls_update);
2265 if (pls_update != vswp->pls_update) {
2266 updated |= MD_pls;
2267 }
2268
2269 /* Read bandwidth */
2270 vsw_bandwidth_read(vswp, mdp, node, &maxbw);
2271 if (maxbw != vswp->bandwidth) {
2272 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2273 updated |= MD_bw;
2274 } else {
2275 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2276 " update as the specified value:%ld is invalid\n",
2277 vswp->instance, maxbw);
2278 }
2279 }
2280
2281 /*
2282 * Now make any changes which are needed...
2283 */
2284 if (updated & MD_pls) {
2285
2286 /* save the updated property. */
2287 vswp->pls_update = pls_update;
2288
2289 if (pls_update == B_FALSE) {
2290 /*
2291 * Phys link state update is now disabled for this vsw
2292 * interface. If we had previously reported a link-down
2293 * to the stack, undo that by sending a link-up.
2294 */
2295 if (vswp->phys_link_state == LINK_STATE_DOWN) {
2296 vsw_mac_link_update(vswp, LINK_STATE_UP);
2297 }
2298 } else {
2299 /*
2300 * Phys link state update is now enabled. Send up an
2301 * update based on the current phys link state.
2302 */
2303 if (vswp->smode & VSW_LAYER2) {
2304 vsw_mac_link_update(vswp,
2305 vswp->phys_link_state);
2306 }
2307 }
2308
2309 }
2310
2311 if (updated & (MD_physname | MD_smode | MD_mtu)) {
2312
2313 /*
2314 * Stop any pending thread to setup switching mode.
2315 */
2316 vsw_setup_switching_stop(vswp);
2317
2318 /* Cleanup HybridIO */
2319 vsw_hio_cleanup(vswp);
2320
2321 /*
2322 * Remove unicst, mcst addrs of vsw interface
2323 * and ports from the physdev. This also closes
2324 * the corresponding mac clients.
2325 */
2326 vsw_unset_addrs(vswp);
2327
2328 /*
2329 * Stop, detach and close the old device..
2330 */
2331 mutex_enter(&vswp->mac_lock);
2332 vsw_mac_close(vswp);
2333 mutex_exit(&vswp->mac_lock);
2334
2335 /*
2336 * Update phys name.
2337 */
2338 if (updated & MD_physname) {
2339 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2340 vswp->instance, vswp->physname, physname);
2341 (void) strncpy(vswp->physname,
2342 physname, strlen(physname) + 1);
2343 }
2344
2345 /*
2346 * Update array with the new switch mode values.
2347 */
2348 if (updated & MD_smode) {
2349 vswp->smode = new_smode;
2350 }
2351
2352 /* Update mtu */
2353 if (updated & MD_mtu) {
2354 rv = vsw_mtu_update(vswp, mtu);
2355 if (rv != 0) {
2356 goto fail_update;
2357 }
2358 }
2359
2360 /*
2361 * ..and attach, start the new device.
2362 */
2363 rv = vsw_setup_switching(vswp);
2364 if (rv == EAGAIN) {
2365 /*
2366 * Unable to setup switching mode.
2367 * As the error is EAGAIN, schedule a thread to retry
2368 * and return. Programming addresses of ports and
2369 * vsw interface will be done by the thread when the
2370 * switching setup completes successfully.
2371 */
2372 if (vsw_setup_switching_start(vswp) != 0) {
2373 goto fail_update;
2374 }
2375 return;
2376
2377 } else if (rv) {
2378 goto fail_update;
2379 }
2380
2381 vsw_setup_switching_post_process(vswp);
2382 } else if (updated & MD_macaddr) {
2383 /*
2384 * We enter here if only MD_macaddr is exclusively updated.
2385 * If MD_physname and/or MD_smode are also updated, then
2386 * as part of that, we would have implicitly processed
2387 * MD_macaddr update (above).
2388 */
2389 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2390 vswp->instance, macaddr);
2391
2392 READ_ENTER(&vswp->if_lockrw);
2393 if (vswp->if_state & VSW_IF_UP) {
2394 /* reconfigure with new address */
2395 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2396
2397 /*
2398 * Notify the MAC layer of the changed address.
2399 */
2400 mac_unicst_update(vswp->if_mh,
2401 (uint8_t *)&vswp->if_addr);
2402
2403 }
2404 RW_EXIT(&vswp->if_lockrw);
2405
2406 }
2407
2408 if (updated & MD_vlans) {
2409 /* Remove existing vlan ids from the hash table. */
2410 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2411
2412 if (vswp->if_state & VSW_IF_UP) {
2413 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2414 } else {
2415 if (vswp->nvids != 0) {
2416 kmem_free(vswp->vids,
2417 sizeof (vsw_vlanid_t) * vswp->nvids);
2418 }
2419 vswp->vids = vids;
2420 vswp->nvids = nvids;
2421 vswp->pvid = pvid;
2422 }
2423
2424 /* add these new vlan ids into hash table */
2425 vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2426 } else {
2427 if (nvids != 0) {
2428 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2429 }
2430 }
2431
2432 if (updated & MD_bw) {
2433 vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw);
2434 }
2435
2436 return;
2437
2438 fail_reconf:
2439 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2440 return;
2441
2442 fail_update:
2443 cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2444 vswp->instance);
2445 }
2446
2447 /*
2448 * Read the port's md properties.
2449 */
2450 static int
vsw_port_read_props(vsw_port_t * portp,vsw_t * vswp,md_t * mdp,mde_cookie_t * node)2451 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2452 md_t *mdp, mde_cookie_t *node)
2453 {
2454 uint64_t ldc_id;
2455 uint8_t *addrp;
2456 int i, addrsz;
2457 int num_nodes = 0, nchan = 0;
2458 int listsz = 0;
2459 mde_cookie_t *listp = NULL;
2460 struct ether_addr ea;
2461 uint64_t macaddr;
2462 uint64_t inst = 0;
2463 uint64_t val;
2464
2465 if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2466 DWARN(vswp, "%s: prop(%s) not found", __func__,
2467 id_propname);
2468 return (1);
2469 }
2470
2471 /*
2472 * Find the channel endpoint node(s) (which should be under this
2473 * port node) which contain the channel id(s).
2474 */
2475 if ((num_nodes = md_node_count(mdp)) <= 0) {
2476 DERR(vswp, "%s: invalid number of nodes found (%d)",
2477 __func__, num_nodes);
2478 return (1);
2479 }
2480
2481 D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2482
2483 /* allocate enough space for node list */
2484 listsz = num_nodes * sizeof (mde_cookie_t);
2485 listp = kmem_zalloc(listsz, KM_SLEEP);
2486
2487 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2488 md_find_name(mdp, "fwd"), listp);
2489
2490 if (nchan <= 0) {
2491 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2492 kmem_free(listp, listsz);
2493 return (1);
2494 }
2495
2496 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2497
2498 /* use property from first node found */
2499 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2500 DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2501 id_propname);
2502 kmem_free(listp, listsz);
2503 return (1);
2504 }
2505
2506 /* don't need list any more */
2507 kmem_free(listp, listsz);
2508
2509 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2510
2511 /* read mac-address property */
2512 if (md_get_prop_data(mdp, *node, remaddr_propname,
2513 &addrp, &addrsz)) {
2514 DWARN(vswp, "%s: prop(%s) not found",
2515 __func__, remaddr_propname);
2516 return (1);
2517 }
2518
2519 if (addrsz < ETHERADDRL) {
2520 DWARN(vswp, "%s: invalid address size", __func__);
2521 return (1);
2522 }
2523
2524 macaddr = *((uint64_t *)addrp);
2525 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2526
2527 for (i = ETHERADDRL - 1; i >= 0; i--) {
2528 ea.ether_addr_octet[i] = macaddr & 0xFF;
2529 macaddr >>= 8;
2530 }
2531
2532 /* now update all properties into the port */
2533 portp->p_vswp = vswp;
2534 portp->p_instance = inst;
2535 portp->addr_set = B_FALSE;
2536 ether_copy(&ea, &portp->p_macaddr);
2537 if (nchan > VSW_PORT_MAX_LDCS) {
2538 D2(vswp, "%s: using first of %d ldc ids",
2539 __func__, nchan);
2540 nchan = VSW_PORT_MAX_LDCS;
2541 }
2542 portp->num_ldcs = nchan;
2543 portp->ldc_ids =
2544 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2545 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2546
2547 /* read vlan id properties of this port node */
2548 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2549 &portp->vids, &portp->nvids, NULL);
2550
2551 /* Check if hybrid property is present */
2552 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2553 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2554 portp->p_hio_enabled = B_TRUE;
2555 } else {
2556 portp->p_hio_enabled = B_FALSE;
2557 }
2558 /*
2559 * Port hio capability determined after version
2560 * negotiation, i.e., when we know the peer is HybridIO capable.
2561 */
2562 portp->p_hio_capable = B_FALSE;
2563
2564 /* Read bandwidth of this port */
2565 vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth);
2566
2567 return (0);
2568 }
2569
2570 /*
2571 * Add a new port to the system.
2572 *
2573 * Returns 0 on success, 1 on failure.
2574 */
2575 int
vsw_port_add(vsw_t * vswp,md_t * mdp,mde_cookie_t * node)2576 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2577 {
2578 vsw_port_t *portp;
2579 int rv;
2580
2581 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2582
2583 rv = vsw_port_read_props(portp, vswp, mdp, node);
2584 if (rv != 0) {
2585 kmem_free(portp, sizeof (*portp));
2586 return (1);
2587 }
2588
2589 rv = vsw_port_attach(portp);
2590 if (rv != 0) {
2591 DERR(vswp, "%s: failed to attach port", __func__);
2592 return (1);
2593 }
2594
2595 return (0);
2596 }
2597
2598 static int
vsw_port_update(vsw_t * vswp,md_t * curr_mdp,mde_cookie_t curr_mdex,md_t * prev_mdp,mde_cookie_t prev_mdex)2599 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2600 md_t *prev_mdp, mde_cookie_t prev_mdex)
2601 {
2602 uint64_t cport_num;
2603 uint64_t pport_num;
2604 vsw_port_list_t *plistp;
2605 vsw_port_t *portp;
2606 uint16_t pvid;
2607 vsw_vlanid_t *vids;
2608 uint16_t nvids;
2609 uint64_t val;
2610 boolean_t hio_enabled = B_FALSE;
2611 uint64_t maxbw;
2612 enum {P_MD_init = 0x1,
2613 P_MD_vlans = 0x2,
2614 P_MD_hio = 0x4,
2615 P_MD_maxbw = 0x8} updated;
2616
2617 updated = P_MD_init;
2618
2619 /*
2620 * For now, we get port updates only if vlan ids changed.
2621 * We read the port num and do some sanity check.
2622 */
2623 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2624 return (1);
2625 }
2626
2627 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2628 return (1);
2629 }
2630 if (cport_num != pport_num)
2631 return (1);
2632
2633 plistp = &(vswp->plist);
2634
2635 READ_ENTER(&plistp->lockrw);
2636
2637 portp = vsw_lookup_port(vswp, cport_num);
2638 if (portp == NULL) {
2639 RW_EXIT(&plistp->lockrw);
2640 return (1);
2641 }
2642
2643 /* Read the vlan ids */
2644 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2645 &vids, &nvids, NULL);
2646
2647 /* Determine if there are any vlan id updates */
2648 if ((pvid != portp->pvid) || /* pvid changed? */
2649 (nvids != portp->nvids) || /* # of vids changed? */
2650 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */
2651 !vsw_cmp_vids(vids, portp->vids, nvids))) {
2652 updated |= P_MD_vlans;
2653 }
2654
2655 /* Check if hybrid property is present */
2656 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2657 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2658 hio_enabled = B_TRUE;
2659 }
2660
2661 if (portp->p_hio_enabled != hio_enabled) {
2662 updated |= P_MD_hio;
2663 }
2664
2665 /* Check if maxbw property is present */
2666 vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw);
2667 if (maxbw != portp->p_bandwidth) {
2668 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2669 updated |= P_MD_maxbw;
2670 } else {
2671 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2672 " update for port %d as the specified value:%ld"
2673 " is invalid\n",
2674 vswp->instance, portp->p_instance, maxbw);
2675 }
2676 }
2677
2678 if (updated & P_MD_vlans) {
2679 /* Remove existing vlan ids from the hash table. */
2680 vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2681
2682 /* Reconfigure vlans with network device */
2683 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2684
2685 /* add these new vlan ids into hash table */
2686 vsw_vlan_add_ids(portp, VSW_VNETPORT);
2687
2688 /* reset the port if it is vlan unaware (ver < 1.3) */
2689 vsw_vlan_unaware_port_reset(portp);
2690 }
2691
2692 if (updated & P_MD_hio) {
2693 vsw_hio_port_update(portp, hio_enabled);
2694 }
2695
2696 if (updated & P_MD_maxbw) {
2697 vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw);
2698 }
2699
2700 RW_EXIT(&plistp->lockrw);
2701
2702 return (0);
2703 }
2704
2705 /*
2706 * vsw_mac_rx -- A common function to send packets to the interface.
2707 * By default this function check if the interface is UP or not, the
2708 * rest of the behaviour depends on the flags as below:
2709 *
2710 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2711 * VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2712 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2713 */
2714 void
vsw_mac_rx(vsw_t * vswp,mac_resource_handle_t mrh,mblk_t * mp,vsw_macrx_flags_t flags)2715 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2716 mblk_t *mp, vsw_macrx_flags_t flags)
2717 {
2718 mblk_t *mpt;
2719
2720 D1(vswp, "%s:enter\n", __func__);
2721 READ_ENTER(&vswp->if_lockrw);
2722 /* Check if the interface is up */
2723 if (!(vswp->if_state & VSW_IF_UP)) {
2724 RW_EXIT(&vswp->if_lockrw);
2725 /* Free messages only if FREEMSG flag specified */
2726 if (flags & VSW_MACRX_FREEMSG) {
2727 freemsgchain(mp);
2728 }
2729 D1(vswp, "%s:exit\n", __func__);
2730 return;
2731 }
2732 /*
2733 * If PROMISC flag is passed, then check if
2734 * the interface is in the PROMISC mode.
2735 * If not, drop the messages.
2736 */
2737 if (flags & VSW_MACRX_PROMISC) {
2738 if (!(vswp->if_state & VSW_IF_PROMISC)) {
2739 RW_EXIT(&vswp->if_lockrw);
2740 /* Free messages only if FREEMSG flag specified */
2741 if (flags & VSW_MACRX_FREEMSG) {
2742 freemsgchain(mp);
2743 }
2744 D1(vswp, "%s:exit\n", __func__);
2745 return;
2746 }
2747 }
2748 RW_EXIT(&vswp->if_lockrw);
2749 /*
2750 * If COPYMSG flag is passed, then make a copy
2751 * of the message chain and send up the copy.
2752 */
2753 if (flags & VSW_MACRX_COPYMSG) {
2754 mp = copymsgchain(mp);
2755 if (mp == NULL) {
2756 D1(vswp, "%s:exit\n", __func__);
2757 return;
2758 }
2759 }
2760
2761 D2(vswp, "%s: sending up stack", __func__);
2762
2763 mpt = NULL;
2764 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2765 if (mp != NULL) {
2766 mac_rx(vswp->if_mh, mrh, mp);
2767 }
2768 D1(vswp, "%s:exit\n", __func__);
2769 }
2770
2771 /* copy mac address of vsw into soft state structure */
2772 static void
vsw_save_lmacaddr(vsw_t * vswp,uint64_t macaddr)2773 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2774 {
2775 int i;
2776
2777 WRITE_ENTER(&vswp->if_lockrw);
2778 for (i = ETHERADDRL - 1; i >= 0; i--) {
2779 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2780 macaddr >>= 8;
2781 }
2782 RW_EXIT(&vswp->if_lockrw);
2783 }
2784
2785 /* Compare VLAN ids, array size expected to be same. */
2786 static boolean_t
vsw_cmp_vids(vsw_vlanid_t * vids1,vsw_vlanid_t * vids2,int nvids)2787 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2788 {
2789 int i, j;
2790 uint16_t vid;
2791
2792 for (i = 0; i < nvids; i++) {
2793 vid = vids1[i].vl_vid;
2794 for (j = 0; j < nvids; j++) {
2795 if (vid == vids2[i].vl_vid)
2796 break;
2797 }
2798 if (j == nvids) {
2799 return (B_FALSE);
2800 }
2801 }
2802 return (B_TRUE);
2803 }
2804