1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64
65 /*
66 * Implementation of the mac provider functionality for vnet using the
67 * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68 */
69
70 /* Entry Points */
71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
72 const uint8_t *macaddr, void **vgenhdl);
73 int vgen_init_mdeg(void *arg);
74 void vgen_uninit(void *arg);
75 int vgen_dds_tx(void *arg, void *dmsg);
76 int vgen_enable_intr(void *arg);
77 int vgen_disable_intr(void *arg);
78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
88 #ifdef VNET_IOC_DEBUG
89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
90 #endif
91
92 /* Port/LDC Configuration */
93 static int vgen_read_mdprops(vgen_t *vgenp);
94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
96 mde_cookie_t node);
97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
98 uint32_t *mtu);
99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 boolean_t *pls);
101 static void vgen_detach_ports(vgen_t *vgenp);
102 static void vgen_port_detach(vgen_port_t *portp);
103 static void vgen_port_list_insert(vgen_port_t *portp);
104 static void vgen_port_list_remove(vgen_port_t *portp);
105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
106 int port_num);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
113 mde_cookie_t mdex);
114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
115 static int vgen_port_attach(vgen_port_t *portp);
116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
118 mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
119 static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat);
120 static void vgen_port_reset(vgen_port_t *portp);
121 static void vgen_reset_vsw_port(vgen_t *vgenp);
122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
126 static void vgen_port_init(vgen_port_t *portp);
127 static void vgen_port_uninit(vgen_port_t *portp);
128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
130 static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
131
132 /* I/O Processing */
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static void vgen_tx_watchdog(void *arg);
138
139 /* Dring Configuration */
140 static int vgen_create_dring(vgen_ldc_t *ldcp);
141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
144 static int vgen_mapin_avail(vgen_ldc_t *ldcp);
145
146 /* VIO Message Processing */
147 static int vgen_handshake(vgen_ldc_t *ldcp);
148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
158 uint8_t option);
159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
163 vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 uint32_t msglen);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
185
186 /* VLANs */
187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
188 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
189 uint16_t *nvidsp, uint16_t *default_idp);
190 static void vgen_vlan_create_hash(vgen_port_t *portp);
191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
192 static void vgen_vlan_add_ids(vgen_port_t *portp);
193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
196 uint16_t *vidp);
197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
198 boolean_t is_tagged, uint16_t vid);
199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
201
202 /* Exported functions */
203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
206 void vgen_destroy_rxpools(void *arg);
207
208 /* Externs */
209 extern void vnet_dds_rx(void *arg, void *dmsg);
210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen,
214 boolean_t caller_holds_lock);
215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
228 extern int vgen_dringsend(void *arg, mblk_t *mp);
229 extern void vgen_ldc_msg_worker(void *arg);
230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
231 uint32_t start, int32_t end, uint8_t pstate);
232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
235
236 #define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0)
237
238 #define LDC_LOCK(ldcp) \
239 mutex_enter(&((ldcp)->cblock));\
240 mutex_enter(&((ldcp)->rxlock));\
241 mutex_enter(&((ldcp)->wrlock));\
242 mutex_enter(&((ldcp)->txlock));\
243 mutex_enter(&((ldcp)->tclock));
244 #define LDC_UNLOCK(ldcp) \
245 mutex_exit(&((ldcp)->tclock));\
246 mutex_exit(&((ldcp)->txlock));\
247 mutex_exit(&((ldcp)->wrlock));\
248 mutex_exit(&((ldcp)->rxlock));\
249 mutex_exit(&((ldcp)->cblock));
250
251 #define VGEN_VER_EQ(ldcp, major, minor) \
252 ((ldcp)->local_hparams.ver_major == (major) && \
253 (ldcp)->local_hparams.ver_minor == (minor))
254
255 #define VGEN_VER_LT(ldcp, major, minor) \
256 (((ldcp)->local_hparams.ver_major < (major)) || \
257 ((ldcp)->local_hparams.ver_major == (major) && \
258 (ldcp)->local_hparams.ver_minor < (minor)))
259
260 #define VGEN_VER_GTEQ(ldcp, major, minor) \
261 (((ldcp)->local_hparams.ver_major > (major)) || \
262 ((ldcp)->local_hparams.ver_major == (major) && \
263 (ldcp)->local_hparams.ver_minor >= (minor)))
264
265 /*
266 * Property names
267 */
268 static char macaddr_propname[] = "mac-address";
269 static char rmacaddr_propname[] = "remote-mac-address";
270 static char channel_propname[] = "channel-endpoint";
271 static char reg_propname[] = "reg";
272 static char port_propname[] = "port";
273 static char swport_propname[] = "switch-port";
274 static char id_propname[] = "id";
275 static char vdev_propname[] = "virtual-device";
276 static char vnet_propname[] = "network";
277 static char pri_types_propname[] = "priority-ether-types";
278 static char vgen_pvid_propname[] = "port-vlan-id";
279 static char vgen_vid_propname[] = "vlan-id";
280 static char vgen_dvid_propname[] = "default-vlan-id";
281 static char port_pvid_propname[] = "remote-port-vlan-id";
282 static char port_vid_propname[] = "remote-vlan-id";
283 static char vgen_mtu_propname[] = "mtu";
284 static char vgen_linkprop_propname[] = "linkprop";
285
286 /*
287 * VIO Protocol Version Info:
288 *
289 * The version specified below represents the version of protocol currently
290 * supported in the driver. It means the driver can negotiate with peers with
291 * versions <= this version. Here is a summary of the feature(s) that are
292 * supported at each version of the protocol:
293 *
294 * 1.0 Basic VIO protocol.
295 * 1.1 vDisk protocol update (no virtual network update).
296 * 1.2 Support for priority frames (priority-ether-types).
297 * 1.3 VLAN and HybridIO support.
298 * 1.4 Jumbo Frame support.
299 * 1.5 Link State Notification support with optional support
300 * for Physical Link information.
301 * 1.6 Support for RxDringData mode.
302 */
303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 6} };
304
305 /* Tunables */
306 uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */
307 uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */
308 uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */
309 uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */
310 uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */
311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU; /* ldc mtu */
312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT; /* tx timeout in msec */
314
315 /*
316 * Max # of channel resets allowed during handshake.
317 */
318 uint32_t vgen_ldc_max_resets = 5;
319
320 /*
321 * See comments in vsw.c for details on the dring modes supported.
322 * In RxDringData mode, # of buffers is determined by multiplying the # of
323 * descriptors with the factor below. Note that the factor must be > 1; i.e,
324 * the # of buffers must always be > # of descriptors. This is needed because,
325 * while the shared memory buffers are sent up the stack on the receiver, the
326 * sender needs additional buffers that can be used for further transmits.
327 * See vgen_create_rx_dring() for details.
328 */
329 uint32_t vgen_nrbufs_factor = 2;
330
331 /*
332 * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
333 */
334 int vgen_rxpool_cleanup_delay = 100000; /* 100ms */
335
336 /*
337 * Delay when rx descr not ready; used in TxDring mode only.
338 */
339 uint32_t vgen_recv_delay = 1;
340
341 /*
342 * Retry when rx descr not ready; used in TxDring mode only.
343 */
344 uint32_t vgen_recv_retries = 10;
345
346 /*
347 * Max # of packets accumulated prior to sending them up. It is best
348 * to keep this at 60% of the number of receive buffers. Used in TxDring mode
349 * by the msg worker thread. Used in RxDringData mode while in interrupt mode
350 * (not used in polled mode).
351 */
352 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
353
354 /*
355 * Internal tunables for receive buffer pools, that is, the size and number of
356 * mblks for each pool. At least 3 sizes must be specified if these are used.
357 * The sizes must be specified in increasing order. Non-zero value of the first
358 * size will be used as a hint to use these values instead of the algorithm
359 * that determines the sizes based on MTU. Used in TxDring mode only.
360 */
361 uint32_t vgen_rbufsz1 = 0;
362 uint32_t vgen_rbufsz2 = 0;
363 uint32_t vgen_rbufsz3 = 0;
364 uint32_t vgen_rbufsz4 = 0;
365
366 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
367 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
368 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
369 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
370
371 /*
372 * In the absence of "priority-ether-types" property in MD, the following
373 * internal tunable can be set to specify a single priority ethertype.
374 */
375 uint64_t vgen_pri_eth_type = 0;
376
377 /*
378 * Number of transmit priority buffers that are preallocated per device.
379 * This number is chosen to be a small value to throttle transmission
380 * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
381 */
382 uint32_t vgen_pri_tx_nmblks = 64;
383
384 uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */
385
386 /*
387 * Matching criteria passed to the MDEG to register interest
388 * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
389 * by their 'name' and 'cfg-handle' properties.
390 */
391 static md_prop_match_t vdev_prop_match[] = {
392 { MDET_PROP_STR, "name" },
393 { MDET_PROP_VAL, "cfg-handle" },
394 { MDET_LIST_END, NULL }
395 };
396
397 static mdeg_node_match_t vdev_match = { "virtual-device",
398 vdev_prop_match };
399
400 /* MD update matching structure */
401 static md_prop_match_t vport_prop_match[] = {
402 { MDET_PROP_VAL, "id" },
403 { MDET_LIST_END, NULL }
404 };
405
406 static mdeg_node_match_t vport_match = { "virtual-device-port",
407 vport_prop_match };
408
409 /* Template for matching a particular vnet instance */
410 static mdeg_prop_spec_t vgen_prop_template[] = {
411 { MDET_PROP_STR, "name", "network" },
412 { MDET_PROP_VAL, "cfg-handle", NULL },
413 { MDET_LIST_END, NULL, NULL }
414 };
415
416 #define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val)
417
418 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
419
420 #ifdef VNET_IOC_DEBUG
421 #define VGEN_M_CALLBACK_FLAGS (MC_IOCTL)
422 #else
423 #define VGEN_M_CALLBACK_FLAGS (0)
424 #endif
425
426 static mac_callbacks_t vgen_m_callbacks = {
427 VGEN_M_CALLBACK_FLAGS,
428 vgen_stat,
429 vgen_start,
430 vgen_stop,
431 vgen_promisc,
432 vgen_multicst,
433 vgen_unicst,
434 vgen_tx,
435 NULL,
436 vgen_ioctl,
437 NULL,
438 NULL
439 };
440
441 /* Externs */
442 extern pri_t maxclsyspri;
443 extern proc_t p0;
444 extern uint32_t vnet_ethermtu;
445 extern uint16_t vnet_default_vlan_id;
446 extern uint32_t vnet_num_descriptors;
447
448 #ifdef DEBUG
449
450 #define DEBUG_PRINTF vgen_debug_printf
451
452 extern int vnet_dbglevel;
453
454 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
455 vgen_ldc_t *ldcp, const char *fmt, ...);
456
457 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
458 int vgendbg_ldcid = -1;
459
460 /* Flags to simulate error conditions for debugging */
461 int vgen_inject_err_flag = 0;
462
463
464 boolean_t
vgen_inject_error(vgen_ldc_t * ldcp,int error)465 vgen_inject_error(vgen_ldc_t *ldcp, int error)
466 {
467 if ((vgendbg_ldcid == ldcp->ldc_id) &&
468 (vgen_inject_err_flag & error)) {
469 return (B_TRUE);
470 }
471 return (B_FALSE);
472 }
473
474 #endif
475
476 /*
477 * vgen_init() is called by an instance of vnet driver to initialize the
478 * corresponding generic transport layer. This layer uses Logical Domain
479 * Channels (LDCs) to communicate with the virtual switch in the service domain
480 * and also with peer vnets in other guest domains in the system.
481 *
482 * Arguments:
483 * vnetp: an opaque pointer to the vnet instance
484 * regprop: frame to be transmitted
485 * vnetdip: dip of the vnet device
486 * macaddr: mac address of the vnet device
487 *
488 * Returns:
489 * Sucess: a handle to the vgen instance (vgen_t)
490 * Failure: NULL
491 */
492 int
vgen_init(void * vnetp,uint64_t regprop,dev_info_t * vnetdip,const uint8_t * macaddr,void ** vgenhdl)493 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
494 const uint8_t *macaddr, void **vgenhdl)
495 {
496 vgen_t *vgenp;
497 int instance;
498 int rv;
499 char qname[TASKQ_NAMELEN];
500
501 if ((vnetp == NULL) || (vnetdip == NULL))
502 return (DDI_FAILURE);
503
504 instance = ddi_get_instance(vnetdip);
505
506 DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
507
508 vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
509
510 vgenp->vnetp = vnetp;
511 vgenp->instance = instance;
512 vgenp->regprop = regprop;
513 vgenp->vnetdip = vnetdip;
514 bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
515 vgenp->phys_link_state = LINK_STATE_UNKNOWN;
516
517 /* allocate multicast table */
518 vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
519 sizeof (struct ether_addr), KM_SLEEP);
520 vgenp->mccount = 0;
521 vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
522
523 mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
524 rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
525
526 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
527 instance);
528 if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
529 TASKQ_DEFAULTPRI, 0)) == NULL) {
530 cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
531 instance);
532 goto vgen_init_fail;
533 }
534
535 rv = vgen_read_mdprops(vgenp);
536 if (rv != 0) {
537 goto vgen_init_fail;
538 }
539 *vgenhdl = (void *)vgenp;
540
541 DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
542 return (DDI_SUCCESS);
543
544 vgen_init_fail:
545 rw_destroy(&vgenp->vgenports.rwlock);
546 mutex_destroy(&vgenp->lock);
547 kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
548 sizeof (struct ether_addr));
549 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
550 kmem_free(vgenp->pri_types,
551 sizeof (uint16_t) * vgenp->pri_num_types);
552 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
553 }
554 if (vgenp->rxp_taskq != NULL) {
555 ddi_taskq_destroy(vgenp->rxp_taskq);
556 vgenp->rxp_taskq = NULL;
557 }
558 KMEM_FREE(vgenp);
559 return (DDI_FAILURE);
560 }
561
562 int
vgen_init_mdeg(void * arg)563 vgen_init_mdeg(void *arg)
564 {
565 vgen_t *vgenp = (vgen_t *)arg;
566
567 /* register with MD event generator */
568 return (vgen_mdeg_reg(vgenp));
569 }
570
571 /*
572 * Called by vnet to undo the initializations done by vgen_init().
573 * The handle provided by generic transport during vgen_init() is the argument.
574 */
575 void
vgen_uninit(void * arg)576 vgen_uninit(void *arg)
577 {
578 vgen_t *vgenp = (vgen_t *)arg;
579
580 if (vgenp == NULL) {
581 return;
582 }
583
584 DBG1(vgenp, NULL, "enter\n");
585
586 /* Unregister with MD event generator */
587 vgen_mdeg_unreg(vgenp);
588
589 mutex_enter(&vgenp->lock);
590
591 /*
592 * Detach all ports from the device; note that the device should have
593 * been unplumbed by this time (See vnet_unattach() for the sequence)
594 * and thus vgen_stop() has already been invoked on all the ports.
595 */
596 vgen_detach_ports(vgenp);
597
598 /*
599 * We now destroy the taskq used to clean up rx mblk pools that
600 * couldn't be destroyed when the ports/channels were detached.
601 * We implicitly wait for those tasks to complete in
602 * ddi_taskq_destroy().
603 */
604 if (vgenp->rxp_taskq != NULL) {
605 ddi_taskq_destroy(vgenp->rxp_taskq);
606 vgenp->rxp_taskq = NULL;
607 }
608
609 /* Free multicast table */
610 kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
611
612 /* Free pri_types table */
613 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
614 kmem_free(vgenp->pri_types,
615 sizeof (uint16_t) * vgenp->pri_num_types);
616 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
617 }
618
619 mutex_exit(&vgenp->lock);
620 rw_destroy(&vgenp->vgenports.rwlock);
621 mutex_destroy(&vgenp->lock);
622
623 DBG1(vgenp, NULL, "exit\n");
624 KMEM_FREE(vgenp);
625 }
626
627 /* enable transmit/receive for the device */
628 int
vgen_start(void * arg)629 vgen_start(void *arg)
630 {
631 vgen_port_t *portp = (vgen_port_t *)arg;
632 vgen_t *vgenp = portp->vgenp;
633
634 DBG1(vgenp, NULL, "enter\n");
635 mutex_enter(&portp->lock);
636 vgen_port_init(portp);
637 portp->flags |= VGEN_STARTED;
638 mutex_exit(&portp->lock);
639 DBG1(vgenp, NULL, "exit\n");
640
641 return (DDI_SUCCESS);
642 }
643
644 /* stop transmit/receive */
645 void
vgen_stop(void * arg)646 vgen_stop(void *arg)
647 {
648 vgen_port_t *portp = (vgen_port_t *)arg;
649 vgen_t *vgenp = portp->vgenp;
650
651 DBG1(vgenp, NULL, "enter\n");
652
653 mutex_enter(&portp->lock);
654 if (portp->flags & VGEN_STARTED) {
655 vgen_port_uninit(portp);
656 portp->flags &= ~(VGEN_STARTED);
657 }
658 mutex_exit(&portp->lock);
659 DBG1(vgenp, NULL, "exit\n");
660
661 }
662
663 /* vgen transmit function */
664 static mblk_t *
vgen_tx(void * arg,mblk_t * mp)665 vgen_tx(void *arg, mblk_t *mp)
666 {
667 vgen_port_t *portp;
668 int status;
669
670 portp = (vgen_port_t *)arg;
671 status = vgen_portsend(portp, mp);
672 if (status != VGEN_SUCCESS) {
673 /* failure */
674 return (mp);
675 }
676 /* success */
677 return (NULL);
678 }
679
680 /*
681 * This function provides any necessary tagging/untagging of the frames
682 * that are being transmitted over the port. It first verifies the vlan
683 * membership of the destination(port) and drops the packet if the
684 * destination doesn't belong to the given vlan.
685 *
686 * Arguments:
687 * portp: port over which the frames should be transmitted
688 * mp: frame to be transmitted
689 * is_tagged:
690 * B_TRUE: indicates frame header contains the vlan tag already.
691 * B_FALSE: indicates frame is untagged.
692 * vid: vlan in which the frame should be transmitted.
693 *
694 * Returns:
695 * Sucess: frame(mblk_t *) after doing the necessary tag/untag.
696 * Failure: NULL
697 */
698 static mblk_t *
vgen_vlan_frame_fixtag(vgen_port_t * portp,mblk_t * mp,boolean_t is_tagged,uint16_t vid)699 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
700 uint16_t vid)
701 {
702 vgen_t *vgenp;
703 boolean_t dst_tagged;
704 int rv;
705
706 vgenp = portp->vgenp;
707
708 /*
709 * If the packet is going to a vnet:
710 * Check if the destination vnet is in the same vlan.
711 * Check the frame header if tag or untag is needed.
712 *
713 * We do not check the above conditions if the packet is going to vsw:
714 * vsw must be present implicitly in all the vlans that a vnet device
715 * is configured into; even if vsw itself is not assigned to those
716 * vlans as an interface. For instance, the packet might be destined
717 * to another vnet(indirectly through vsw) or to an external host
718 * which is in the same vlan as this vnet and vsw itself may not be
719 * present in that vlan. Similarly packets going to vsw must be
720 * always tagged(unless in the default-vlan) if not already tagged,
721 * as we do not know the final destination. This is needed because
722 * vsw must always invoke its switching function only after tagging
723 * the packet; otherwise after switching function determines the
724 * destination we cannot figure out if the destination belongs to the
725 * the same vlan that the frame originated from and if it needs tag/
726 * untag. Note that vsw will tag the packet itself when it receives
727 * it over the channel from a client if needed. However, that is
728 * needed only in the case of vlan unaware clients such as obp or
729 * earlier versions of vnet.
730 *
731 */
732 if (portp != vgenp->vsw_portp) {
733 /*
734 * Packet going to a vnet. Check if the destination vnet is in
735 * the same vlan. Then check the frame header if tag/untag is
736 * needed.
737 */
738 rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
739 if (rv == B_FALSE) {
740 /* drop the packet */
741 freemsg(mp);
742 return (NULL);
743 }
744
745 /* is the destination tagged or untagged in this vlan? */
746 (vid == portp->pvid) ? (dst_tagged = B_FALSE) :
747 (dst_tagged = B_TRUE);
748
749 if (is_tagged == dst_tagged) {
750 /* no tagging/untagging needed */
751 return (mp);
752 }
753
754 if (is_tagged == B_TRUE) {
755 /* frame is tagged; destination needs untagged */
756 mp = vnet_vlan_remove_tag(mp);
757 return (mp);
758 }
759
760 /* (is_tagged == B_FALSE): fallthru to tag tx packet: */
761 }
762
763 /*
764 * Packet going to a vnet needs tagging.
765 * OR
766 * If the packet is going to vsw, then it must be tagged in all cases:
767 * unknown unicast, broadcast/multicast or to vsw interface.
768 */
769
770 if (is_tagged == B_FALSE) {
771 mp = vnet_vlan_insert_tag(mp, vid);
772 }
773
774 return (mp);
775 }
776
777 /* transmit packets over the given port */
778 static int
vgen_portsend(vgen_port_t * portp,mblk_t * mp)779 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
780 {
781 vgen_ldc_t *ldcp;
782 int status;
783 int rv = VGEN_SUCCESS;
784 vgen_t *vgenp;
785 vnet_t *vnetp;
786 boolean_t is_tagged;
787 boolean_t dec_refcnt = B_FALSE;
788 uint16_t vlan_id;
789 struct ether_header *ehp;
790
791 if (portp == NULL) {
792 return (VGEN_FAILURE);
793 }
794
795 vgenp = portp->vgenp;
796 vnetp = vgenp->vnetp;
797
798 if (portp->use_vsw_port) {
799 (void) atomic_inc_32(&vgenp->vsw_port_refcnt);
800 portp = portp->vgenp->vsw_portp;
801 ASSERT(portp != NULL);
802 dec_refcnt = B_TRUE;
803 }
804
805 /*
806 * Determine the vlan id that the frame belongs to.
807 */
808 ehp = (struct ether_header *)mp->b_rptr;
809 is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
810
811 if (vlan_id == vnetp->default_vlan_id) {
812
813 /* Frames in default vlan must be untagged */
814 ASSERT(is_tagged == B_FALSE);
815
816 /*
817 * If the destination is a vnet-port verify it belongs to the
818 * default vlan; otherwise drop the packet. We do not need
819 * this check for vsw-port, as it should implicitly belong to
820 * this vlan; see comments in vgen_vlan_frame_fixtag().
821 */
822 if (portp != vgenp->vsw_portp &&
823 portp->pvid != vnetp->default_vlan_id) {
824 freemsg(mp);
825 goto portsend_ret;
826 }
827
828 } else { /* frame not in default-vlan */
829
830 mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
831 if (mp == NULL) {
832 goto portsend_ret;
833 }
834
835 }
836
837 ldcp = portp->ldcp;
838 status = ldcp->tx(ldcp, mp);
839
840 if (status != VGEN_TX_SUCCESS) {
841 rv = VGEN_FAILURE;
842 }
843
844 portsend_ret:
845 if (dec_refcnt == B_TRUE) {
846 (void) atomic_dec_32(&vgenp->vsw_port_refcnt);
847 }
848 return (rv);
849 }
850
851 /*
852 * Wrapper function to transmit normal and/or priority frames over the channel.
853 */
854 static int
vgen_ldcsend(void * arg,mblk_t * mp)855 vgen_ldcsend(void *arg, mblk_t *mp)
856 {
857 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
858 int status;
859 struct ether_header *ehp;
860 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
861 uint32_t num_types;
862 uint16_t *types;
863 int i;
864
865 ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
866
867 num_types = vgenp->pri_num_types;
868 types = vgenp->pri_types;
869 ehp = (struct ether_header *)mp->b_rptr;
870
871 for (i = 0; i < num_types; i++) {
872
873 if (ehp->ether_type == types[i]) {
874 /* priority frame, use pri tx function */
875 vgen_ldcsend_pkt(ldcp, mp);
876 return (VGEN_SUCCESS);
877 }
878
879 }
880
881 if (ldcp->tx_dringdata == NULL) {
882 freemsg(mp);
883 return (VGEN_SUCCESS);
884 }
885
886 status = ldcp->tx_dringdata(ldcp, mp);
887 return (status);
888 }
889
890 /*
891 * This function transmits the frame in the payload of a raw data
892 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
893 * send special frames with high priorities, without going through
894 * the normal data path which uses descriptor ring mechanism.
895 */
896 static void
vgen_ldcsend_pkt(void * arg,mblk_t * mp)897 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
898 {
899 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
900 vio_raw_data_msg_t *pkt;
901 mblk_t *bp;
902 mblk_t *nmp = NULL;
903 vio_mblk_t *vmp;
904 caddr_t dst;
905 uint32_t mblksz;
906 uint32_t size;
907 uint32_t nbytes;
908 int rv;
909 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
910 vgen_stats_t *statsp = &ldcp->stats;
911
912 /* drop the packet if ldc is not up or handshake is not done */
913 if (ldcp->ldc_status != LDC_UP) {
914 (void) atomic_inc_32(&statsp->tx_pri_fail);
915 DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
916 ldcp->ldc_status);
917 goto send_pkt_exit;
918 }
919
920 if (ldcp->hphase != VH_DONE) {
921 (void) atomic_inc_32(&statsp->tx_pri_fail);
922 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
923 ldcp->hphase);
924 goto send_pkt_exit;
925 }
926
927 size = msgsize(mp);
928
929 /* frame size bigger than available payload len of raw data msg ? */
930 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
931 (void) atomic_inc_32(&statsp->tx_pri_fail);
932 DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
933 goto send_pkt_exit;
934 }
935
936 if (size < ETHERMIN)
937 size = ETHERMIN;
938
939 /* alloc space for a raw data message */
940 vmp = vio_allocb(vgenp->pri_tx_vmp);
941 if (vmp == NULL) {
942 (void) atomic_inc_32(&statsp->tx_pri_fail);
943 DWARN(vgenp, ldcp, "vio_allocb failed\n");
944 goto send_pkt_exit;
945 } else {
946 nmp = vmp->mp;
947 }
948 pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
949
950 /* copy frame into the payload of raw data message */
951 dst = (caddr_t)pkt->data;
952 for (bp = mp; bp != NULL; bp = bp->b_cont) {
953 mblksz = MBLKL(bp);
954 bcopy(bp->b_rptr, dst, mblksz);
955 dst += mblksz;
956 }
957
958 vmp->state = VIO_MBLK_HAS_DATA;
959
960 /* setup the raw data msg */
961 pkt->tag.vio_msgtype = VIO_TYPE_DATA;
962 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
963 pkt->tag.vio_subtype_env = VIO_PKT_DATA;
964 pkt->tag.vio_sid = ldcp->local_sid;
965 nbytes = VIO_PKT_DATA_HDRSIZE + size;
966
967 /* send the msg over ldc */
968 rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
969 if (rv != VGEN_SUCCESS) {
970 (void) atomic_inc_32(&statsp->tx_pri_fail);
971 DWARN(vgenp, ldcp, "Error sending priority frame\n");
972 if (rv == ECONNRESET) {
973 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
974 }
975 goto send_pkt_exit;
976 }
977
978 /* update stats */
979 (void) atomic_inc_64(&statsp->tx_pri_packets);
980 (void) atomic_add_64(&statsp->tx_pri_bytes, size);
981
982 send_pkt_exit:
983 if (nmp != NULL)
984 freemsg(nmp);
985 freemsg(mp);
986 }
987
988 /*
989 * enable/disable a multicast address
990 * note that the cblock of the ldc channel connected to the vsw is used for
991 * synchronization of the mctab.
992 */
993 int
vgen_multicst(void * arg,boolean_t add,const uint8_t * mca)994 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
995 {
996 vgen_t *vgenp;
997 vnet_mcast_msg_t mcastmsg;
998 vio_msg_tag_t *tagp;
999 vgen_port_t *portp;
1000 vgen_ldc_t *ldcp;
1001 struct ether_addr *addrp;
1002 int rv = DDI_FAILURE;
1003 uint32_t i;
1004
1005 portp = (vgen_port_t *)arg;
1006 vgenp = portp->vgenp;
1007
1008 if (portp->is_vsw_port != B_TRUE) {
1009 return (DDI_SUCCESS);
1010 }
1011
1012 addrp = (struct ether_addr *)mca;
1013 tagp = &mcastmsg.tag;
1014 bzero(&mcastmsg, sizeof (mcastmsg));
1015
1016 ldcp = portp->ldcp;
1017 if (ldcp == NULL) {
1018 return (DDI_FAILURE);
1019 }
1020
1021 mutex_enter(&ldcp->cblock);
1022
1023 if (ldcp->hphase == VH_DONE) {
1024 /*
1025 * If handshake is done, send a msg to vsw to add/remove
1026 * the multicast address. Otherwise, we just update this
1027 * mcast address in our table and the table will be sync'd
1028 * with vsw when handshake completes.
1029 */
1030 tagp->vio_msgtype = VIO_TYPE_CTRL;
1031 tagp->vio_subtype = VIO_SUBTYPE_INFO;
1032 tagp->vio_subtype_env = VNET_MCAST_INFO;
1033 tagp->vio_sid = ldcp->local_sid;
1034 bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1035 mcastmsg.set = add;
1036 mcastmsg.count = 1;
1037 if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1038 B_FALSE) != VGEN_SUCCESS) {
1039 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1040 rv = DDI_FAILURE;
1041 goto vgen_mcast_exit;
1042 }
1043 }
1044
1045 if (add) {
1046
1047 /* expand multicast table if necessary */
1048 if (vgenp->mccount >= vgenp->mcsize) {
1049 struct ether_addr *newtab;
1050 uint32_t newsize;
1051
1052
1053 newsize = vgenp->mcsize * 2;
1054
1055 newtab = kmem_zalloc(newsize *
1056 sizeof (struct ether_addr), KM_NOSLEEP);
1057 if (newtab == NULL)
1058 goto vgen_mcast_exit;
1059 bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1060 sizeof (struct ether_addr));
1061 kmem_free(vgenp->mctab,
1062 vgenp->mcsize * sizeof (struct ether_addr));
1063
1064 vgenp->mctab = newtab;
1065 vgenp->mcsize = newsize;
1066 }
1067
1068 /* add address to the table */
1069 vgenp->mctab[vgenp->mccount++] = *addrp;
1070
1071 } else {
1072
1073 /* delete address from the table */
1074 for (i = 0; i < vgenp->mccount; i++) {
1075 if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1076
1077 /*
1078 * If there's more than one address in this
1079 * table, delete the unwanted one by moving
1080 * the last one in the list over top of it;
1081 * otherwise, just remove it.
1082 */
1083 if (vgenp->mccount > 1) {
1084 vgenp->mctab[i] =
1085 vgenp->mctab[vgenp->mccount-1];
1086 }
1087 vgenp->mccount--;
1088 break;
1089 }
1090 }
1091 }
1092
1093 rv = DDI_SUCCESS;
1094
1095 vgen_mcast_exit:
1096
1097 mutex_exit(&ldcp->cblock);
1098 return (rv);
1099 }
1100
1101 /* set or clear promiscuous mode on the device */
1102 static int
vgen_promisc(void * arg,boolean_t on)1103 vgen_promisc(void *arg, boolean_t on)
1104 {
1105 _NOTE(ARGUNUSED(arg, on))
1106 return (DDI_SUCCESS);
1107 }
1108
1109 /* set the unicast mac address of the device */
1110 static int
vgen_unicst(void * arg,const uint8_t * mca)1111 vgen_unicst(void *arg, const uint8_t *mca)
1112 {
1113 _NOTE(ARGUNUSED(arg, mca))
1114 return (DDI_SUCCESS);
1115 }
1116
1117 /* get device statistics */
1118 int
vgen_stat(void * arg,uint_t stat,uint64_t * val)1119 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1120 {
1121 vgen_port_t *portp = (vgen_port_t *)arg;
1122
1123 *val = vgen_port_stat(portp, stat);
1124 return (0);
1125 }
1126
1127 /* vgen internal functions */
1128 /* detach all ports from the device */
1129 static void
vgen_detach_ports(vgen_t * vgenp)1130 vgen_detach_ports(vgen_t *vgenp)
1131 {
1132 vgen_port_t *portp;
1133 vgen_portlist_t *plistp;
1134
1135 plistp = &(vgenp->vgenports);
1136 WRITE_ENTER(&plistp->rwlock);
1137 while ((portp = plistp->headp) != NULL) {
1138 vgen_port_detach(portp);
1139 }
1140 RW_EXIT(&plistp->rwlock);
1141 }
1142
1143 /*
1144 * detach the given port.
1145 */
1146 static void
vgen_port_detach(vgen_port_t * portp)1147 vgen_port_detach(vgen_port_t *portp)
1148 {
1149 vgen_t *vgenp;
1150 int port_num;
1151
1152 vgenp = portp->vgenp;
1153 port_num = portp->port_num;
1154
1155 DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1156
1157 /*
1158 * If this port is connected to the vswitch, then
1159 * potentially there could be ports that may be using
1160 * this port to transmit packets. To address this do
1161 * the following:
1162 * - First set vgenp->vsw_portp to NULL, so that
1163 * its not used after that.
1164 * - Then wait for the refcnt to go down to 0.
1165 * - Now we can safely detach this port.
1166 */
1167 if (vgenp->vsw_portp == portp) {
1168 vgenp->vsw_portp = NULL;
1169 while (vgenp->vsw_port_refcnt > 0) {
1170 delay(drv_usectohz(vgen_tx_delay));
1171 }
1172 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1173 }
1174
1175 if (portp->vhp != NULL) {
1176 vio_net_resource_unreg(portp->vhp);
1177 portp->vhp = NULL;
1178 }
1179
1180 vgen_vlan_destroy_hash(portp);
1181
1182 /* remove it from port list */
1183 vgen_port_list_remove(portp);
1184
1185 /* detach channels from this port */
1186 vgen_ldc_detach(portp->ldcp);
1187
1188 if (portp->num_ldcs != 0) {
1189 kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1190 portp->num_ldcs = 0;
1191 }
1192
1193 mutex_destroy(&portp->lock);
1194 KMEM_FREE(portp);
1195
1196 DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1197 }
1198
1199 /* add a port to port list */
1200 static void
vgen_port_list_insert(vgen_port_t * portp)1201 vgen_port_list_insert(vgen_port_t *portp)
1202 {
1203 vgen_portlist_t *plistp;
1204 vgen_t *vgenp;
1205
1206 vgenp = portp->vgenp;
1207 plistp = &(vgenp->vgenports);
1208
1209 if (plistp->headp == NULL) {
1210 plistp->headp = portp;
1211 } else {
1212 plistp->tailp->nextp = portp;
1213 }
1214 plistp->tailp = portp;
1215 portp->nextp = NULL;
1216 }
1217
1218 /* remove a port from port list */
1219 static void
vgen_port_list_remove(vgen_port_t * portp)1220 vgen_port_list_remove(vgen_port_t *portp)
1221 {
1222 vgen_port_t *prevp;
1223 vgen_port_t *nextp;
1224 vgen_portlist_t *plistp;
1225 vgen_t *vgenp;
1226
1227 vgenp = portp->vgenp;
1228
1229 plistp = &(vgenp->vgenports);
1230
1231 if (plistp->headp == NULL)
1232 return;
1233
1234 if (portp == plistp->headp) {
1235 plistp->headp = portp->nextp;
1236 if (portp == plistp->tailp)
1237 plistp->tailp = plistp->headp;
1238 } else {
1239 for (prevp = plistp->headp;
1240 ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1241 prevp = nextp)
1242 ;
1243 if (nextp == portp) {
1244 prevp->nextp = portp->nextp;
1245 }
1246 if (portp == plistp->tailp)
1247 plistp->tailp = prevp;
1248 }
1249 }
1250
1251 /* lookup a port in the list based on port_num */
1252 static vgen_port_t *
vgen_port_lookup(vgen_portlist_t * plistp,int port_num)1253 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1254 {
1255 vgen_port_t *portp = NULL;
1256
1257 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1258 if (portp->port_num == port_num) {
1259 break;
1260 }
1261 }
1262
1263 return (portp);
1264 }
1265
1266 static void
vgen_port_init(vgen_port_t * portp)1267 vgen_port_init(vgen_port_t *portp)
1268 {
1269 /* Add the port to the specified vlans */
1270 vgen_vlan_add_ids(portp);
1271
1272 /* Bring up the channel */
1273 (void) vgen_ldc_init(portp->ldcp);
1274 }
1275
1276 static void
vgen_port_uninit(vgen_port_t * portp)1277 vgen_port_uninit(vgen_port_t *portp)
1278 {
1279 vgen_ldc_uninit(portp->ldcp);
1280
1281 /* remove the port from vlans it has been assigned to */
1282 vgen_vlan_remove_ids(portp);
1283 }
1284
1285 /*
1286 * Scan the machine description for this instance of vnet
1287 * and read its properties. Called only from vgen_init().
1288 * Returns: 0 on success, 1 on failure.
1289 */
1290 static int
vgen_read_mdprops(vgen_t * vgenp)1291 vgen_read_mdprops(vgen_t *vgenp)
1292 {
1293 vnet_t *vnetp = vgenp->vnetp;
1294 md_t *mdp = NULL;
1295 mde_cookie_t rootnode;
1296 mde_cookie_t *listp = NULL;
1297 uint64_t cfgh;
1298 char *name;
1299 int rv = 1;
1300 int num_nodes = 0;
1301 int num_devs = 0;
1302 int listsz = 0;
1303 int i;
1304
1305 if ((mdp = md_get_handle()) == NULL) {
1306 return (rv);
1307 }
1308
1309 num_nodes = md_node_count(mdp);
1310 ASSERT(num_nodes > 0);
1311
1312 listsz = num_nodes * sizeof (mde_cookie_t);
1313 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1314
1315 rootnode = md_root_node(mdp);
1316
1317 /* search for all "virtual_device" nodes */
1318 num_devs = md_scan_dag(mdp, rootnode,
1319 md_find_name(mdp, vdev_propname),
1320 md_find_name(mdp, "fwd"), listp);
1321 if (num_devs <= 0) {
1322 goto vgen_readmd_exit;
1323 }
1324
1325 /*
1326 * Now loop through the list of virtual-devices looking for
1327 * devices with name "network" and for each such device compare
1328 * its instance with what we have from the 'reg' property to
1329 * find the right node in MD and then read all its properties.
1330 */
1331 for (i = 0; i < num_devs; i++) {
1332
1333 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1334 goto vgen_readmd_exit;
1335 }
1336
1337 /* is this a "network" device? */
1338 if (strcmp(name, vnet_propname) != 0)
1339 continue;
1340
1341 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1342 goto vgen_readmd_exit;
1343 }
1344
1345 /* is this the required instance of vnet? */
1346 if (vgenp->regprop != cfgh)
1347 continue;
1348
1349 /*
1350 * Read the 'linkprop' property to know if this vnet
1351 * device should get physical link updates from vswitch.
1352 */
1353 vgen_linkprop_read(vgenp, mdp, listp[i],
1354 &vnetp->pls_update);
1355
1356 /*
1357 * Read the mtu. Note that we set the mtu of vnet device within
1358 * this routine itself, after validating the range.
1359 */
1360 vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1361 if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1362 vnetp->mtu = ETHERMTU;
1363 }
1364 vgenp->max_frame_size = vnetp->mtu +
1365 sizeof (struct ether_header) + VLAN_TAGSZ;
1366
1367 /* read priority ether types */
1368 vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1369
1370 /* read vlan id properties of this vnet instance */
1371 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1372 &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1373 &vnetp->default_vlan_id);
1374
1375 rv = 0;
1376 break;
1377 }
1378
1379 vgen_readmd_exit:
1380
1381 kmem_free(listp, listsz);
1382 (void) md_fini_handle(mdp);
1383 return (rv);
1384 }
1385
1386 /*
1387 * Read vlan id properties of the given MD node.
1388 * Arguments:
1389 * arg: device argument(vnet device or a port)
1390 * type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1391 * mdp: machine description
1392 * node: md node cookie
1393 *
1394 * Returns:
1395 * pvidp: port-vlan-id of the node
1396 * vidspp: list of vlan-ids of the node
1397 * nvidsp: # of vlan-ids in the list
1398 * default_idp: default-vlan-id of the node(if node is vnet device)
1399 */
1400 static void
vgen_vlan_read_ids(void * arg,int type,md_t * mdp,mde_cookie_t node,uint16_t * pvidp,uint16_t ** vidspp,uint16_t * nvidsp,uint16_t * default_idp)1401 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1402 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1403 uint16_t *default_idp)
1404 {
1405 vgen_t *vgenp;
1406 vnet_t *vnetp;
1407 vgen_port_t *portp;
1408 char *pvid_propname;
1409 char *vid_propname;
1410 uint_t nvids;
1411 uint32_t vids_size;
1412 int rv;
1413 int i;
1414 uint64_t *data;
1415 uint64_t val;
1416 int size;
1417 int inst;
1418
1419 if (type == VGEN_LOCAL) {
1420
1421 vgenp = (vgen_t *)arg;
1422 vnetp = vgenp->vnetp;
1423 pvid_propname = vgen_pvid_propname;
1424 vid_propname = vgen_vid_propname;
1425 inst = vnetp->instance;
1426
1427 } else if (type == VGEN_PEER) {
1428
1429 portp = (vgen_port_t *)arg;
1430 vgenp = portp->vgenp;
1431 vnetp = vgenp->vnetp;
1432 pvid_propname = port_pvid_propname;
1433 vid_propname = port_vid_propname;
1434 inst = portp->port_num;
1435
1436 } else {
1437 return;
1438 }
1439
1440 if (type == VGEN_LOCAL && default_idp != NULL) {
1441 rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1442 if (rv != 0) {
1443 DWARN(vgenp, NULL, "prop(%s) not found",
1444 vgen_dvid_propname);
1445
1446 *default_idp = vnet_default_vlan_id;
1447 } else {
1448 *default_idp = val & 0xFFF;
1449 DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1450 inst, *default_idp);
1451 }
1452 }
1453
1454 rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1455 if (rv != 0) {
1456 DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1457 *pvidp = vnet_default_vlan_id;
1458 } else {
1459
1460 *pvidp = val & 0xFFF;
1461 DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1462 pvid_propname, inst, *pvidp);
1463 }
1464
1465 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1466 &size);
1467 if (rv != 0) {
1468 DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1469 size = 0;
1470 } else {
1471 size /= sizeof (uint64_t);
1472 }
1473 nvids = size;
1474
1475 if (nvids != 0) {
1476 DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1477 vids_size = sizeof (uint16_t) * nvids;
1478 *vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1479 for (i = 0; i < nvids; i++) {
1480 (*vidspp)[i] = data[i] & 0xFFFF;
1481 DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1482 }
1483 DBG2(vgenp, NULL, "\n");
1484 }
1485
1486 *nvidsp = nvids;
1487 }
1488
1489 /*
1490 * Create a vlan id hash table for the given port.
1491 */
1492 static void
vgen_vlan_create_hash(vgen_port_t * portp)1493 vgen_vlan_create_hash(vgen_port_t *portp)
1494 {
1495 char hashname[MAXNAMELEN];
1496
1497 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1498 portp->port_num);
1499
1500 portp->vlan_nchains = vgen_vlan_nchains;
1501 portp->vlan_hashp = mod_hash_create_idhash(hashname,
1502 portp->vlan_nchains, mod_hash_null_valdtor);
1503 }
1504
1505 /*
1506 * Destroy the vlan id hash table in the given port.
1507 */
1508 static void
vgen_vlan_destroy_hash(vgen_port_t * portp)1509 vgen_vlan_destroy_hash(vgen_port_t *portp)
1510 {
1511 if (portp->vlan_hashp != NULL) {
1512 mod_hash_destroy_hash(portp->vlan_hashp);
1513 portp->vlan_hashp = NULL;
1514 portp->vlan_nchains = 0;
1515 }
1516 }
1517
1518 /*
1519 * Add a port to the vlans specified in its port properites.
1520 */
1521 static void
vgen_vlan_add_ids(vgen_port_t * portp)1522 vgen_vlan_add_ids(vgen_port_t *portp)
1523 {
1524 int rv;
1525 int i;
1526
1527 rv = mod_hash_insert(portp->vlan_hashp,
1528 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1529 (mod_hash_val_t)B_TRUE);
1530 ASSERT(rv == 0);
1531
1532 for (i = 0; i < portp->nvids; i++) {
1533 rv = mod_hash_insert(portp->vlan_hashp,
1534 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1535 (mod_hash_val_t)B_TRUE);
1536 ASSERT(rv == 0);
1537 }
1538 }
1539
1540 /*
1541 * Remove a port from the vlans it has been assigned to.
1542 */
1543 static void
vgen_vlan_remove_ids(vgen_port_t * portp)1544 vgen_vlan_remove_ids(vgen_port_t *portp)
1545 {
1546 int rv;
1547 int i;
1548 mod_hash_val_t vp;
1549
1550 rv = mod_hash_remove(portp->vlan_hashp,
1551 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1552 (mod_hash_val_t *)&vp);
1553 ASSERT(rv == 0);
1554
1555 for (i = 0; i < portp->nvids; i++) {
1556 rv = mod_hash_remove(portp->vlan_hashp,
1557 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1558 (mod_hash_val_t *)&vp);
1559 ASSERT(rv == 0);
1560 }
1561 }
1562
1563 /*
1564 * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1565 * then the vlan-id is available in the tag; otherwise, its vlan id is
1566 * implicitly obtained from the port-vlan-id of the vnet device.
1567 * The vlan id determined is returned in vidp.
1568 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1569 */
1570 static boolean_t
vgen_frame_lookup_vid(vnet_t * vnetp,struct ether_header * ehp,uint16_t * vidp)1571 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1572 {
1573 struct ether_vlan_header *evhp;
1574
1575 /* If it's a tagged frame, get the vlan id from vlan header */
1576 if (ehp->ether_type == ETHERTYPE_VLAN) {
1577
1578 evhp = (struct ether_vlan_header *)ehp;
1579 *vidp = VLAN_ID(ntohs(evhp->ether_tci));
1580 return (B_TRUE);
1581 }
1582
1583 /* Untagged frame, vlan-id is the pvid of vnet device */
1584 *vidp = vnetp->pvid;
1585 return (B_FALSE);
1586 }
1587
1588 /*
1589 * Find the given vlan id in the hash table.
1590 * Return: B_TRUE if the id is found; B_FALSE if not found.
1591 */
1592 static boolean_t
vgen_vlan_lookup(mod_hash_t * vlan_hashp,uint16_t vid)1593 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1594 {
1595 int rv;
1596 mod_hash_val_t vp;
1597
1598 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1599
1600 if (rv != 0)
1601 return (B_FALSE);
1602
1603 return (B_TRUE);
1604 }
1605
1606 /*
1607 * This function reads "priority-ether-types" property from md. This property
1608 * is used to enable support for priority frames. Applications which need
1609 * guaranteed and timely delivery of certain high priority frames to/from
1610 * a vnet or vsw within ldoms, should configure this property by providing
1611 * the ether type(s) for which the priority facility is needed.
1612 * Normal data frames are delivered over a ldc channel using the descriptor
1613 * ring mechanism which is constrained by factors such as descriptor ring size,
1614 * the rate at which the ring is processed at the peer ldc end point, etc.
1615 * The priority mechanism provides an Out-Of-Band path to send/receive frames
1616 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1617 * descriptor ring path and enables a more reliable and timely delivery of
1618 * frames to the peer.
1619 */
1620 static void
vgen_read_pri_eth_types(vgen_t * vgenp,md_t * mdp,mde_cookie_t node)1621 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1622 {
1623 int rv;
1624 uint16_t *types;
1625 uint64_t *data;
1626 int size;
1627 int i;
1628 size_t mblk_sz;
1629
1630 rv = md_get_prop_data(mdp, node, pri_types_propname,
1631 (uint8_t **)&data, &size);
1632 if (rv != 0) {
1633 /*
1634 * Property may not exist if we are running pre-ldoms1.1 f/w.
1635 * Check if 'vgen_pri_eth_type' has been set in that case.
1636 */
1637 if (vgen_pri_eth_type != 0) {
1638 size = sizeof (vgen_pri_eth_type);
1639 data = &vgen_pri_eth_type;
1640 } else {
1641 DBG2(vgenp, NULL,
1642 "prop(%s) not found", pri_types_propname);
1643 size = 0;
1644 }
1645 }
1646
1647 if (size == 0) {
1648 vgenp->pri_num_types = 0;
1649 return;
1650 }
1651
1652 /*
1653 * we have some priority-ether-types defined;
1654 * allocate a table of these types and also
1655 * allocate a pool of mblks to transmit these
1656 * priority packets.
1657 */
1658 size /= sizeof (uint64_t);
1659 vgenp->pri_num_types = size;
1660 vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1661 for (i = 0, types = vgenp->pri_types; i < size; i++) {
1662 types[i] = data[i] & 0xFFFF;
1663 }
1664 mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1665 (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1666 &vgenp->pri_tx_vmp);
1667 }
1668
1669 static void
vgen_mtu_read(vgen_t * vgenp,md_t * mdp,mde_cookie_t node,uint32_t * mtu)1670 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1671 {
1672 int rv;
1673 uint64_t val;
1674 char *mtu_propname;
1675
1676 mtu_propname = vgen_mtu_propname;
1677
1678 rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1679 if (rv != 0) {
1680 DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1681 *mtu = vnet_ethermtu;
1682 } else {
1683
1684 *mtu = val & 0xFFFF;
1685 DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1686 vgenp->instance, *mtu);
1687 }
1688 }
1689
1690 static void
vgen_linkprop_read(vgen_t * vgenp,md_t * mdp,mde_cookie_t node,boolean_t * pls)1691 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1692 boolean_t *pls)
1693 {
1694 int rv;
1695 uint64_t val;
1696 char *linkpropname;
1697
1698 linkpropname = vgen_linkprop_propname;
1699
1700 rv = md_get_prop_val(mdp, node, linkpropname, &val);
1701 if (rv != 0) {
1702 DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1703 *pls = B_FALSE;
1704 } else {
1705
1706 *pls = (val & 0x1) ? B_TRUE : B_FALSE;
1707 DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1708 vgenp->instance, *pls);
1709 }
1710 }
1711
1712 /* register with MD event generator */
1713 static int
vgen_mdeg_reg(vgen_t * vgenp)1714 vgen_mdeg_reg(vgen_t *vgenp)
1715 {
1716 mdeg_prop_spec_t *pspecp;
1717 mdeg_node_spec_t *parentp;
1718 uint_t templatesz;
1719 int rv;
1720 mdeg_handle_t dev_hdl = 0;
1721 mdeg_handle_t port_hdl = 0;
1722
1723 templatesz = sizeof (vgen_prop_template);
1724 pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1725 if (pspecp == NULL) {
1726 return (DDI_FAILURE);
1727 }
1728 parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1729 if (parentp == NULL) {
1730 kmem_free(pspecp, templatesz);
1731 return (DDI_FAILURE);
1732 }
1733
1734 bcopy(vgen_prop_template, pspecp, templatesz);
1735
1736 /*
1737 * NOTE: The instance here refers to the value of "reg" property and
1738 * not the dev_info instance (ddi_get_instance()) of vnet.
1739 */
1740 VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1741
1742 parentp->namep = "virtual-device";
1743 parentp->specp = pspecp;
1744
1745 /* save parentp in vgen_t */
1746 vgenp->mdeg_parentp = parentp;
1747
1748 /*
1749 * Register an interest in 'virtual-device' nodes with a
1750 * 'name' property of 'network'
1751 */
1752 rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1753 if (rv != MDEG_SUCCESS) {
1754 DERR(vgenp, NULL, "mdeg_register failed\n");
1755 goto mdeg_reg_fail;
1756 }
1757
1758 /* Register an interest in 'port' nodes */
1759 rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1760 &port_hdl);
1761 if (rv != MDEG_SUCCESS) {
1762 DERR(vgenp, NULL, "mdeg_register failed\n");
1763 goto mdeg_reg_fail;
1764 }
1765
1766 /* save mdeg handle in vgen_t */
1767 vgenp->mdeg_dev_hdl = dev_hdl;
1768 vgenp->mdeg_port_hdl = port_hdl;
1769
1770 return (DDI_SUCCESS);
1771
1772 mdeg_reg_fail:
1773 if (dev_hdl != 0) {
1774 (void) mdeg_unregister(dev_hdl);
1775 }
1776 KMEM_FREE(parentp);
1777 kmem_free(pspecp, templatesz);
1778 vgenp->mdeg_parentp = NULL;
1779 return (DDI_FAILURE);
1780 }
1781
1782 /* unregister with MD event generator */
1783 static void
vgen_mdeg_unreg(vgen_t * vgenp)1784 vgen_mdeg_unreg(vgen_t *vgenp)
1785 {
1786 if (vgenp->mdeg_dev_hdl != 0) {
1787 (void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1788 vgenp->mdeg_dev_hdl = 0;
1789 }
1790 if (vgenp->mdeg_port_hdl != 0) {
1791 (void) mdeg_unregister(vgenp->mdeg_port_hdl);
1792 vgenp->mdeg_port_hdl = 0;
1793 }
1794
1795 if (vgenp->mdeg_parentp != NULL) {
1796 kmem_free(vgenp->mdeg_parentp->specp,
1797 sizeof (vgen_prop_template));
1798 KMEM_FREE(vgenp->mdeg_parentp);
1799 vgenp->mdeg_parentp = NULL;
1800 }
1801 }
1802
1803 /* mdeg callback function for the port node */
1804 static int
vgen_mdeg_port_cb(void * cb_argp,mdeg_result_t * resp)1805 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1806 {
1807 int idx;
1808 int vsw_idx = -1;
1809 uint64_t val;
1810 vgen_t *vgenp;
1811
1812 if ((resp == NULL) || (cb_argp == NULL)) {
1813 return (MDEG_FAILURE);
1814 }
1815
1816 vgenp = (vgen_t *)cb_argp;
1817 DBG1(vgenp, NULL, "enter\n");
1818
1819 mutex_enter(&vgenp->lock);
1820
1821 DBG1(vgenp, NULL, "ports: removed(%x), "
1822 "added(%x), updated(%x)\n", resp->removed.nelem,
1823 resp->added.nelem, resp->match_curr.nelem);
1824
1825 for (idx = 0; idx < resp->removed.nelem; idx++) {
1826 (void) vgen_remove_port(vgenp, resp->removed.mdp,
1827 resp->removed.mdep[idx]);
1828 }
1829
1830 if (vgenp->vsw_portp == NULL) {
1831 /*
1832 * find vsw_port and add it first, because other ports need
1833 * this when adding fdb entry (see vgen_port_init()).
1834 */
1835 for (idx = 0; idx < resp->added.nelem; idx++) {
1836 if (!(md_get_prop_val(resp->added.mdp,
1837 resp->added.mdep[idx], swport_propname, &val))) {
1838 if (val == 0) {
1839 /*
1840 * This port is connected to the
1841 * vsw on service domain.
1842 */
1843 vsw_idx = idx;
1844 if (vgen_add_port(vgenp,
1845 resp->added.mdp,
1846 resp->added.mdep[idx]) !=
1847 DDI_SUCCESS) {
1848 cmn_err(CE_NOTE, "vnet%d Could "
1849 "not initialize virtual "
1850 "switch port.",
1851 vgenp->instance);
1852 mutex_exit(&vgenp->lock);
1853 return (MDEG_FAILURE);
1854 }
1855 break;
1856 }
1857 }
1858 }
1859 if (vsw_idx == -1) {
1860 DWARN(vgenp, NULL, "can't find vsw_port\n");
1861 mutex_exit(&vgenp->lock);
1862 return (MDEG_FAILURE);
1863 }
1864 }
1865
1866 for (idx = 0; idx < resp->added.nelem; idx++) {
1867 if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1868 continue;
1869
1870 /* If this port can't be added just skip it. */
1871 (void) vgen_add_port(vgenp, resp->added.mdp,
1872 resp->added.mdep[idx]);
1873 }
1874
1875 for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1876 (void) vgen_update_port(vgenp, resp->match_curr.mdp,
1877 resp->match_curr.mdep[idx],
1878 resp->match_prev.mdp,
1879 resp->match_prev.mdep[idx]);
1880 }
1881
1882 mutex_exit(&vgenp->lock);
1883 DBG1(vgenp, NULL, "exit\n");
1884 return (MDEG_SUCCESS);
1885 }
1886
1887 /* mdeg callback function for the vnet node */
1888 static int
vgen_mdeg_cb(void * cb_argp,mdeg_result_t * resp)1889 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1890 {
1891 vgen_t *vgenp;
1892 vnet_t *vnetp;
1893 md_t *mdp;
1894 mde_cookie_t node;
1895 uint64_t inst;
1896 char *node_name = NULL;
1897
1898 if ((resp == NULL) || (cb_argp == NULL)) {
1899 return (MDEG_FAILURE);
1900 }
1901
1902 vgenp = (vgen_t *)cb_argp;
1903 vnetp = vgenp->vnetp;
1904
1905 DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1906 " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1907 resp->match_curr.nelem, resp->match_prev.nelem);
1908
1909 mutex_enter(&vgenp->lock);
1910
1911 /*
1912 * We get an initial callback for this node as 'added' after
1913 * registering with mdeg. Note that we would have already gathered
1914 * information about this vnet node by walking MD earlier during attach
1915 * (in vgen_read_mdprops()). So, there is a window where the properties
1916 * of this node might have changed when we get this initial 'added'
1917 * callback. We handle this as if an update occured and invoke the same
1918 * function which handles updates to the properties of this vnet-node
1919 * if any. A non-zero 'match' value indicates that the MD has been
1920 * updated and that a 'network' node is present which may or may not
1921 * have been updated. It is up to the clients to examine their own
1922 * nodes and determine if they have changed.
1923 */
1924 if (resp->added.nelem != 0) {
1925
1926 if (resp->added.nelem != 1) {
1927 cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1928 "invalid: %d\n", vnetp->instance,
1929 resp->added.nelem);
1930 goto vgen_mdeg_cb_err;
1931 }
1932
1933 mdp = resp->added.mdp;
1934 node = resp->added.mdep[0];
1935
1936 } else if (resp->match_curr.nelem != 0) {
1937
1938 if (resp->match_curr.nelem != 1) {
1939 cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1940 "invalid: %d\n", vnetp->instance,
1941 resp->match_curr.nelem);
1942 goto vgen_mdeg_cb_err;
1943 }
1944
1945 mdp = resp->match_curr.mdp;
1946 node = resp->match_curr.mdep[0];
1947
1948 } else {
1949 goto vgen_mdeg_cb_err;
1950 }
1951
1952 /* Validate name and instance */
1953 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1954 DERR(vgenp, NULL, "unable to get node name\n");
1955 goto vgen_mdeg_cb_err;
1956 }
1957
1958 /* is this a virtual-network device? */
1959 if (strcmp(node_name, vnet_propname) != 0) {
1960 DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1961 goto vgen_mdeg_cb_err;
1962 }
1963
1964 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1965 DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1966 goto vgen_mdeg_cb_err;
1967 }
1968
1969 /* is this the right instance of vnet? */
1970 if (inst != vgenp->regprop) {
1971 DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst);
1972 goto vgen_mdeg_cb_err;
1973 }
1974
1975 vgen_update_md_prop(vgenp, mdp, node);
1976
1977 mutex_exit(&vgenp->lock);
1978 return (MDEG_SUCCESS);
1979
1980 vgen_mdeg_cb_err:
1981 mutex_exit(&vgenp->lock);
1982 return (MDEG_FAILURE);
1983 }
1984
1985 /*
1986 * Check to see if the relevant properties in the specified node have
1987 * changed, and if so take the appropriate action.
1988 */
1989 static void
vgen_update_md_prop(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)1990 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1991 {
1992 uint16_t pvid;
1993 uint16_t *vids;
1994 uint16_t nvids;
1995 vnet_t *vnetp = vgenp->vnetp;
1996 uint32_t mtu;
1997 boolean_t pls_update;
1998 enum { MD_init = 0x1,
1999 MD_vlans = 0x2,
2000 MD_mtu = 0x4,
2001 MD_pls = 0x8 } updated;
2002 int rv;
2003
2004 updated = MD_init;
2005
2006 /* Read the vlan ids */
2007 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2008 &nvids, NULL);
2009
2010 /* Determine if there are any vlan id updates */
2011 if ((pvid != vnetp->pvid) || /* pvid changed? */
2012 (nvids != vnetp->nvids) || /* # of vids changed? */
2013 ((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */
2014 bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2015 updated |= MD_vlans;
2016 }
2017
2018 /* Read mtu */
2019 vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2020 if (mtu != vnetp->mtu) {
2021 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2022 updated |= MD_mtu;
2023 } else {
2024 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2025 " as the specified value:%d is invalid\n",
2026 vnetp->instance, mtu);
2027 }
2028 }
2029
2030 /*
2031 * Read the 'linkprop' property.
2032 */
2033 vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2034 if (pls_update != vnetp->pls_update) {
2035 updated |= MD_pls;
2036 }
2037
2038 /* Now process the updated props */
2039
2040 if (updated & MD_vlans) {
2041
2042 /* save the new vlan ids */
2043 vnetp->pvid = pvid;
2044 if (vnetp->nvids != 0) {
2045 kmem_free(vnetp->vids,
2046 sizeof (uint16_t) * vnetp->nvids);
2047 vnetp->nvids = 0;
2048 }
2049 if (nvids != 0) {
2050 vnetp->nvids = nvids;
2051 vnetp->vids = vids;
2052 }
2053
2054 /* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2055 vgen_reset_vlan_unaware_ports(vgenp);
2056
2057 } else {
2058
2059 if (nvids != 0) {
2060 kmem_free(vids, sizeof (uint16_t) * nvids);
2061 }
2062 }
2063
2064 if (updated & MD_mtu) {
2065
2066 DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2067 vnetp->mtu, mtu);
2068
2069 rv = vnet_mtu_update(vnetp, mtu);
2070 if (rv == 0) {
2071 vgenp->max_frame_size = mtu +
2072 sizeof (struct ether_header) + VLAN_TAGSZ;
2073 }
2074 }
2075
2076 if (updated & MD_pls) {
2077 /* enable/disable physical link state updates */
2078 vnetp->pls_update = pls_update;
2079 mutex_exit(&vgenp->lock);
2080
2081 /* reset vsw-port to re-negotiate with the updated prop. */
2082 vgen_reset_vsw_port(vgenp);
2083
2084 mutex_enter(&vgenp->lock);
2085 }
2086 }
2087
2088 /* add a new port to the device */
2089 static int
vgen_add_port(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2090 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2091 {
2092 vgen_port_t *portp;
2093 int rv;
2094
2095 portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2096
2097 rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2098 if (rv != DDI_SUCCESS) {
2099 KMEM_FREE(portp);
2100 return (DDI_FAILURE);
2101 }
2102
2103 rv = vgen_port_attach(portp);
2104 if (rv != DDI_SUCCESS) {
2105 return (DDI_FAILURE);
2106 }
2107
2108 return (DDI_SUCCESS);
2109 }
2110
2111 /* read properties of the port from its md node */
2112 static int
vgen_port_read_props(vgen_port_t * portp,vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2113 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2114 mde_cookie_t mdex)
2115 {
2116 uint64_t port_num;
2117 uint64_t *ldc_ids;
2118 uint64_t macaddr;
2119 uint64_t val;
2120 int num_ldcs;
2121 int i;
2122 int addrsz;
2123 int num_nodes = 0;
2124 int listsz = 0;
2125 mde_cookie_t *listp = NULL;
2126 uint8_t *addrp;
2127 struct ether_addr ea;
2128
2129 /* read "id" property to get the port number */
2130 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2131 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2132 return (DDI_FAILURE);
2133 }
2134
2135 /*
2136 * Find the channel endpoint node(s) under this port node.
2137 */
2138 if ((num_nodes = md_node_count(mdp)) <= 0) {
2139 DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2140 num_nodes);
2141 return (DDI_FAILURE);
2142 }
2143
2144 /* allocate space for node list */
2145 listsz = num_nodes * sizeof (mde_cookie_t);
2146 listp = kmem_zalloc(listsz, KM_NOSLEEP);
2147 if (listp == NULL)
2148 return (DDI_FAILURE);
2149
2150 num_ldcs = md_scan_dag(mdp, mdex,
2151 md_find_name(mdp, channel_propname),
2152 md_find_name(mdp, "fwd"), listp);
2153
2154 if (num_ldcs <= 0) {
2155 DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2156 kmem_free(listp, listsz);
2157 return (DDI_FAILURE);
2158 }
2159
2160 if (num_ldcs > 1) {
2161 DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2162 port_num, num_ldcs);
2163 }
2164
2165 ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2166 if (ldc_ids == NULL) {
2167 kmem_free(listp, listsz);
2168 return (DDI_FAILURE);
2169 }
2170
2171 for (i = 0; i < num_ldcs; i++) {
2172 /* read channel ids */
2173 if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2174 DWARN(vgenp, NULL, "prop(%s) not found\n",
2175 id_propname);
2176 kmem_free(listp, listsz);
2177 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2178 return (DDI_FAILURE);
2179 }
2180 DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2181 }
2182
2183 kmem_free(listp, listsz);
2184
2185 if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2186 &addrsz)) {
2187 DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2188 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2189 return (DDI_FAILURE);
2190 }
2191
2192 if (addrsz < ETHERADDRL) {
2193 DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2194 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2195 return (DDI_FAILURE);
2196 }
2197
2198 macaddr = *((uint64_t *)addrp);
2199
2200 DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2201
2202 for (i = ETHERADDRL - 1; i >= 0; i--) {
2203 ea.ether_addr_octet[i] = macaddr & 0xFF;
2204 macaddr >>= 8;
2205 }
2206
2207 if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2208 if (val == 0) {
2209 /* This port is connected to the vswitch */
2210 portp->is_vsw_port = B_TRUE;
2211 } else {
2212 portp->is_vsw_port = B_FALSE;
2213 }
2214 }
2215
2216 /* now update all properties into the port */
2217 portp->vgenp = vgenp;
2218 portp->port_num = port_num;
2219 ether_copy(&ea, &portp->macaddr);
2220 portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2221 bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2222 portp->num_ldcs = num_ldcs;
2223
2224 /* read vlan id properties of this port node */
2225 vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2226 &portp->vids, &portp->nvids, NULL);
2227
2228 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2229
2230 return (DDI_SUCCESS);
2231 }
2232
2233 /* remove a port from the device */
2234 static int
vgen_remove_port(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2235 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2236 {
2237 uint64_t port_num;
2238 vgen_port_t *portp;
2239 vgen_portlist_t *plistp;
2240
2241 /* read "id" property to get the port number */
2242 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2243 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2244 return (DDI_FAILURE);
2245 }
2246
2247 plistp = &(vgenp->vgenports);
2248
2249 WRITE_ENTER(&plistp->rwlock);
2250 portp = vgen_port_lookup(plistp, (int)port_num);
2251 if (portp == NULL) {
2252 DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2253 RW_EXIT(&plistp->rwlock);
2254 return (DDI_FAILURE);
2255 }
2256
2257 vgen_port_detach_mdeg(portp);
2258 RW_EXIT(&plistp->rwlock);
2259
2260 return (DDI_SUCCESS);
2261 }
2262
2263 /* attach a port to the device based on mdeg data */
2264 static int
vgen_port_attach(vgen_port_t * portp)2265 vgen_port_attach(vgen_port_t *portp)
2266 {
2267 vgen_portlist_t *plistp;
2268 vgen_t *vgenp;
2269 uint64_t *ldcids;
2270 mac_register_t *macp;
2271 vio_net_res_type_t type;
2272 int rv;
2273
2274 ASSERT(portp != NULL);
2275 vgenp = portp->vgenp;
2276 ldcids = portp->ldc_ids;
2277
2278 DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2279 portp->port_num, ldcids[0]);
2280
2281 mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2282
2283 /*
2284 * attach the channel under the port using its channel id;
2285 * note that we only support one channel per port for now.
2286 */
2287 if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2288 vgen_port_detach(portp);
2289 return (DDI_FAILURE);
2290 }
2291
2292 /* create vlan id hash table */
2293 vgen_vlan_create_hash(portp);
2294
2295 if (portp->is_vsw_port == B_TRUE) {
2296 /* This port is connected to the switch port */
2297 (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2298 type = VIO_NET_RES_LDC_SERVICE;
2299 } else {
2300 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2301 type = VIO_NET_RES_LDC_GUEST;
2302 }
2303
2304 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2305 vgen_port_detach(portp);
2306 return (DDI_FAILURE);
2307 }
2308 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2309 macp->m_driver = portp;
2310 macp->m_dip = vgenp->vnetdip;
2311 macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2312 macp->m_callbacks = &vgen_m_callbacks;
2313 macp->m_min_sdu = 0;
2314 macp->m_max_sdu = ETHERMTU;
2315
2316 mutex_enter(&portp->lock);
2317 rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2318 portp->macaddr, &portp->vhp, &portp->vcb);
2319 mutex_exit(&portp->lock);
2320 mac_free(macp);
2321
2322 if (rv == 0) {
2323 /* link it into the list of ports */
2324 plistp = &(vgenp->vgenports);
2325 WRITE_ENTER(&plistp->rwlock);
2326 vgen_port_list_insert(portp);
2327 RW_EXIT(&plistp->rwlock);
2328
2329 if (portp->is_vsw_port == B_TRUE) {
2330 /* We now have the vswitch port attached */
2331 vgenp->vsw_portp = portp;
2332 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2333 }
2334 } else {
2335 DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2336 portp);
2337 vgen_port_detach(portp);
2338 }
2339
2340 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2341 return (DDI_SUCCESS);
2342 }
2343
2344 /* detach a port from the device based on mdeg data */
2345 static void
vgen_port_detach_mdeg(vgen_port_t * portp)2346 vgen_port_detach_mdeg(vgen_port_t *portp)
2347 {
2348 vgen_t *vgenp = portp->vgenp;
2349
2350 DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2351
2352 mutex_enter(&portp->lock);
2353
2354 /* stop the port if needed */
2355 if (portp->flags & VGEN_STARTED) {
2356 vgen_port_uninit(portp);
2357 portp->flags &= ~(VGEN_STARTED);
2358 }
2359
2360 mutex_exit(&portp->lock);
2361 vgen_port_detach(portp);
2362
2363 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2364 }
2365
2366 static int
vgen_update_port(vgen_t * vgenp,md_t * curr_mdp,mde_cookie_t curr_mdex,md_t * prev_mdp,mde_cookie_t prev_mdex)2367 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2368 md_t *prev_mdp, mde_cookie_t prev_mdex)
2369 {
2370 uint64_t cport_num;
2371 uint64_t pport_num;
2372 vgen_portlist_t *plistp;
2373 vgen_port_t *portp;
2374 boolean_t updated_vlans = B_FALSE;
2375 uint16_t pvid;
2376 uint16_t *vids;
2377 uint16_t nvids;
2378
2379 /*
2380 * For now, we get port updates only if vlan ids changed.
2381 * We read the port num and do some sanity check.
2382 */
2383 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2384 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2385 return (DDI_FAILURE);
2386 }
2387
2388 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2389 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2390 return (DDI_FAILURE);
2391 }
2392 if (cport_num != pport_num)
2393 return (DDI_FAILURE);
2394
2395 plistp = &(vgenp->vgenports);
2396
2397 READ_ENTER(&plistp->rwlock);
2398
2399 portp = vgen_port_lookup(plistp, (int)cport_num);
2400 if (portp == NULL) {
2401 DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2402 RW_EXIT(&plistp->rwlock);
2403 return (DDI_FAILURE);
2404 }
2405
2406 /* Read the vlan ids */
2407 vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2408 &nvids, NULL);
2409
2410 /* Determine if there are any vlan id updates */
2411 if ((pvid != portp->pvid) || /* pvid changed? */
2412 (nvids != portp->nvids) || /* # of vids changed? */
2413 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */
2414 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2415 updated_vlans = B_TRUE;
2416 }
2417
2418 if (updated_vlans == B_FALSE) {
2419 RW_EXIT(&plistp->rwlock);
2420 return (DDI_FAILURE);
2421 }
2422
2423 /* remove the port from vlans it has been assigned to */
2424 vgen_vlan_remove_ids(portp);
2425
2426 /* save the new vlan ids */
2427 portp->pvid = pvid;
2428 if (portp->nvids != 0) {
2429 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2430 portp->nvids = 0;
2431 }
2432 if (nvids != 0) {
2433 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2434 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2435 portp->nvids = nvids;
2436 kmem_free(vids, sizeof (uint16_t) * nvids);
2437 }
2438
2439 /* add port to the new vlans */
2440 vgen_vlan_add_ids(portp);
2441
2442 /* reset the port if it is vlan unaware (ver < 1.3) */
2443 vgen_vlan_unaware_port_reset(portp);
2444
2445 RW_EXIT(&plistp->rwlock);
2446
2447 return (DDI_SUCCESS);
2448 }
2449
2450 static uint64_t
vgen_port_stat(vgen_port_t * portp,uint_t stat)2451 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2452 {
2453 return (vgen_ldc_stat(portp->ldcp, stat));
2454 }
2455
2456 /* attach the channel corresponding to the given ldc_id to the port */
2457 static int
vgen_ldc_attach(vgen_port_t * portp,uint64_t ldc_id)2458 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2459 {
2460 vgen_t *vgenp;
2461 vgen_ldc_t *ldcp;
2462 ldc_attr_t attr;
2463 int status;
2464 ldc_status_t istatus;
2465 char kname[MAXNAMELEN];
2466 int instance;
2467 enum {AST_init = 0x0, AST_ldc_alloc = 0x1,
2468 AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2469 AST_ldc_reg_cb = 0x8 } attach_state;
2470
2471 attach_state = AST_init;
2472 vgenp = portp->vgenp;
2473
2474 ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2475 if (ldcp == NULL) {
2476 goto ldc_attach_failed;
2477 }
2478 ldcp->ldc_id = ldc_id;
2479 ldcp->portp = portp;
2480
2481 attach_state |= AST_ldc_alloc;
2482
2483 mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2484 mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2485 mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2486 mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2487 mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2488 mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2489 mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2490 cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2491
2492 attach_state |= AST_mutex_init;
2493
2494 attr.devclass = LDC_DEV_NT;
2495 attr.instance = vgenp->instance;
2496 attr.mode = LDC_MODE_UNRELIABLE;
2497 attr.mtu = vgen_ldc_mtu;
2498 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2499 if (status != 0) {
2500 DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2501 goto ldc_attach_failed;
2502 }
2503 attach_state |= AST_ldc_init;
2504
2505 status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2506 if (status != 0) {
2507 DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2508 status);
2509 goto ldc_attach_failed;
2510 }
2511 /*
2512 * allocate a message for ldc_read()s, big enough to hold ctrl and
2513 * data msgs, including raw data msgs used to recv priority frames.
2514 */
2515 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2516 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2517 attach_state |= AST_ldc_reg_cb;
2518
2519 (void) ldc_status(ldcp->ldc_handle, &istatus);
2520 ASSERT(istatus == LDC_INIT);
2521 ldcp->ldc_status = istatus;
2522
2523 /* Setup kstats for the channel */
2524 instance = vgenp->instance;
2525 (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2526 ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2527 if (ldcp->ksp == NULL) {
2528 goto ldc_attach_failed;
2529 }
2530
2531 /* initialize vgen_versions supported */
2532 bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2533 vgen_reset_vnet_proto_ops(ldcp);
2534
2535 /* Link this channel to the port */
2536 portp->ldcp = ldcp;
2537
2538 ldcp->link_state = LINK_STATE_UNKNOWN;
2539 #ifdef VNET_IOC_DEBUG
2540 ldcp->link_down_forced = B_FALSE;
2541 #endif
2542 ldcp->flags |= CHANNEL_ATTACHED;
2543 return (DDI_SUCCESS);
2544
2545 ldc_attach_failed:
2546 if (attach_state & AST_ldc_reg_cb) {
2547 (void) ldc_unreg_callback(ldcp->ldc_handle);
2548 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2549 }
2550
2551 if (attach_state & AST_ldc_init) {
2552 (void) ldc_fini(ldcp->ldc_handle);
2553 }
2554 if (attach_state & AST_mutex_init) {
2555 mutex_destroy(&ldcp->tclock);
2556 mutex_destroy(&ldcp->txlock);
2557 mutex_destroy(&ldcp->cblock);
2558 mutex_destroy(&ldcp->wrlock);
2559 mutex_destroy(&ldcp->rxlock);
2560 mutex_destroy(&ldcp->pollq_lock);
2561 }
2562 if (attach_state & AST_ldc_alloc) {
2563 KMEM_FREE(ldcp);
2564 }
2565 return (DDI_FAILURE);
2566 }
2567
2568 /* detach a channel from the port */
2569 static void
vgen_ldc_detach(vgen_ldc_t * ldcp)2570 vgen_ldc_detach(vgen_ldc_t *ldcp)
2571 {
2572 vgen_port_t *portp;
2573 vgen_t *vgenp;
2574
2575 ASSERT(ldcp != NULL);
2576
2577 portp = ldcp->portp;
2578 vgenp = portp->vgenp;
2579
2580 if (ldcp->ldc_status != LDC_INIT) {
2581 DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2582 }
2583
2584 if (ldcp->flags & CHANNEL_ATTACHED) {
2585 ldcp->flags &= ~(CHANNEL_ATTACHED);
2586
2587 (void) ldc_unreg_callback(ldcp->ldc_handle);
2588 (void) ldc_fini(ldcp->ldc_handle);
2589
2590 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2591 vgen_destroy_kstats(ldcp->ksp);
2592 ldcp->ksp = NULL;
2593 mutex_destroy(&ldcp->tclock);
2594 mutex_destroy(&ldcp->txlock);
2595 mutex_destroy(&ldcp->cblock);
2596 mutex_destroy(&ldcp->wrlock);
2597 mutex_destroy(&ldcp->rxlock);
2598 mutex_destroy(&ldcp->pollq_lock);
2599 mutex_destroy(&ldcp->msg_thr_lock);
2600 cv_destroy(&ldcp->msg_thr_cv);
2601
2602 KMEM_FREE(ldcp);
2603 }
2604 }
2605
2606 /* enable transmit/receive on the channel */
2607 static int
vgen_ldc_init(vgen_ldc_t * ldcp)2608 vgen_ldc_init(vgen_ldc_t *ldcp)
2609 {
2610 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2611 ldc_status_t istatus;
2612 int rv;
2613 enum { ST_init = 0x0, ST_ldc_open = 0x1,
2614 ST_cb_enable = 0x2} init_state;
2615 int flag = 0;
2616
2617 init_state = ST_init;
2618
2619 DBG1(vgenp, ldcp, "enter\n");
2620 LDC_LOCK(ldcp);
2621
2622 rv = ldc_open(ldcp->ldc_handle);
2623 if (rv != 0) {
2624 DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2625 goto ldcinit_failed;
2626 }
2627 init_state |= ST_ldc_open;
2628
2629 (void) ldc_status(ldcp->ldc_handle, &istatus);
2630 if (istatus != LDC_OPEN && istatus != LDC_READY) {
2631 DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2632 goto ldcinit_failed;
2633 }
2634 ldcp->ldc_status = istatus;
2635
2636 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2637 if (rv != 0) {
2638 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2639 goto ldcinit_failed;
2640 }
2641
2642 init_state |= ST_cb_enable;
2643
2644 vgen_ldc_up(ldcp);
2645
2646 (void) ldc_status(ldcp->ldc_handle, &istatus);
2647 if (istatus == LDC_UP) {
2648 DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2649 }
2650
2651 ldcp->ldc_status = istatus;
2652
2653 ldcp->hphase = VH_PHASE0;
2654 ldcp->hstate = 0;
2655 ldcp->flags |= CHANNEL_STARTED;
2656
2657 vgen_setup_handshake_params(ldcp);
2658
2659 /* if channel is already UP - start handshake */
2660 if (istatus == LDC_UP) {
2661 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2662 if (ldcp->portp != vgenp->vsw_portp) {
2663 /*
2664 * As the channel is up, use this port from now on.
2665 */
2666 (void) atomic_swap_32(
2667 &ldcp->portp->use_vsw_port, B_FALSE);
2668 }
2669
2670 /* Initialize local session id */
2671 ldcp->local_sid = ddi_get_lbolt();
2672
2673 /* clear peer session id */
2674 ldcp->peer_sid = 0;
2675
2676 mutex_exit(&ldcp->tclock);
2677 mutex_exit(&ldcp->txlock);
2678 mutex_exit(&ldcp->wrlock);
2679 mutex_exit(&ldcp->rxlock);
2680 rv = vgen_handshake(vh_nextphase(ldcp));
2681 mutex_exit(&ldcp->cblock);
2682 if (rv != 0) {
2683 flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2684 VGEN_FLAG_NEED_LDCRESET;
2685 (void) vgen_process_reset(ldcp, flag);
2686 }
2687 } else {
2688 LDC_UNLOCK(ldcp);
2689 }
2690
2691 return (DDI_SUCCESS);
2692
2693 ldcinit_failed:
2694 if (init_state & ST_cb_enable) {
2695 (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2696 }
2697 if (init_state & ST_ldc_open) {
2698 (void) ldc_close(ldcp->ldc_handle);
2699 }
2700 LDC_UNLOCK(ldcp);
2701 DBG1(vgenp, ldcp, "exit\n");
2702 return (DDI_FAILURE);
2703 }
2704
2705 /* stop transmit/receive on the channel */
2706 static void
vgen_ldc_uninit(vgen_ldc_t * ldcp)2707 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2708 {
2709 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2710
2711 DBG1(vgenp, ldcp, "enter\n");
2712
2713 LDC_LOCK(ldcp);
2714
2715 if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2716 LDC_UNLOCK(ldcp);
2717 DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2718 return;
2719 }
2720
2721 LDC_UNLOCK(ldcp);
2722
2723 while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2724 delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2725 }
2726
2727 (void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2728
2729 DBG1(vgenp, ldcp, "exit\n");
2730 }
2731
2732 /*
2733 * Create a descriptor ring, that will be exported to the peer for mapping.
2734 */
2735 static int
vgen_create_dring(vgen_ldc_t * ldcp)2736 vgen_create_dring(vgen_ldc_t *ldcp)
2737 {
2738 vgen_hparams_t *lp = &ldcp->local_hparams;
2739 int rv;
2740
2741 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2742 rv = vgen_create_rx_dring(ldcp);
2743 } else {
2744 rv = vgen_create_tx_dring(ldcp);
2745 }
2746
2747 return (rv);
2748 }
2749
2750 /*
2751 * Destroy the descriptor ring.
2752 */
2753 static void
vgen_destroy_dring(vgen_ldc_t * ldcp)2754 vgen_destroy_dring(vgen_ldc_t *ldcp)
2755 {
2756 vgen_hparams_t *lp = &ldcp->local_hparams;
2757
2758 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2759 vgen_destroy_rx_dring(ldcp);
2760 } else {
2761 vgen_destroy_tx_dring(ldcp);
2762 }
2763 }
2764
2765 /*
2766 * Map the descriptor ring exported by the peer.
2767 */
2768 static int
vgen_map_dring(vgen_ldc_t * ldcp,void * pkt)2769 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2770 {
2771 int rv;
2772 vgen_hparams_t *lp = &ldcp->local_hparams;
2773
2774 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2775 /*
2776 * In RxDringData mode, dring that we map in
2777 * becomes our transmit descriptor ring.
2778 */
2779 rv = vgen_map_tx_dring(ldcp, pkt);
2780 } else {
2781
2782 /*
2783 * In TxDring mode, dring that we map in
2784 * becomes our receive descriptor ring.
2785 */
2786 rv = vgen_map_rx_dring(ldcp, pkt);
2787 }
2788
2789 return (rv);
2790 }
2791
2792 /*
2793 * Unmap the descriptor ring exported by the peer.
2794 */
2795 static void
vgen_unmap_dring(vgen_ldc_t * ldcp)2796 vgen_unmap_dring(vgen_ldc_t *ldcp)
2797 {
2798 vgen_hparams_t *lp = &ldcp->local_hparams;
2799
2800 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2801 vgen_unmap_tx_dring(ldcp);
2802 } else {
2803 vgen_unmap_rx_dring(ldcp);
2804 }
2805 }
2806
2807 void
vgen_destroy_rxpools(void * arg)2808 vgen_destroy_rxpools(void *arg)
2809 {
2810 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg;
2811 vio_mblk_pool_t *npoolp;
2812
2813 while (poolp != NULL) {
2814 npoolp = poolp->nextp;
2815 while (vio_destroy_mblks(poolp) != 0) {
2816 delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2817 }
2818 poolp = npoolp;
2819 }
2820 }
2821
2822 /* get channel statistics */
2823 static uint64_t
vgen_ldc_stat(vgen_ldc_t * ldcp,uint_t stat)2824 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2825 {
2826 vgen_stats_t *statsp;
2827 uint64_t val;
2828
2829 val = 0;
2830 statsp = &ldcp->stats;
2831 switch (stat) {
2832
2833 case MAC_STAT_MULTIRCV:
2834 val = statsp->multircv;
2835 break;
2836
2837 case MAC_STAT_BRDCSTRCV:
2838 val = statsp->brdcstrcv;
2839 break;
2840
2841 case MAC_STAT_MULTIXMT:
2842 val = statsp->multixmt;
2843 break;
2844
2845 case MAC_STAT_BRDCSTXMT:
2846 val = statsp->brdcstxmt;
2847 break;
2848
2849 case MAC_STAT_NORCVBUF:
2850 val = statsp->norcvbuf;
2851 break;
2852
2853 case MAC_STAT_IERRORS:
2854 val = statsp->ierrors;
2855 break;
2856
2857 case MAC_STAT_NOXMTBUF:
2858 val = statsp->noxmtbuf;
2859 break;
2860
2861 case MAC_STAT_OERRORS:
2862 val = statsp->oerrors;
2863 break;
2864
2865 case MAC_STAT_COLLISIONS:
2866 break;
2867
2868 case MAC_STAT_RBYTES:
2869 val = statsp->rbytes;
2870 break;
2871
2872 case MAC_STAT_IPACKETS:
2873 val = statsp->ipackets;
2874 break;
2875
2876 case MAC_STAT_OBYTES:
2877 val = statsp->obytes;
2878 break;
2879
2880 case MAC_STAT_OPACKETS:
2881 val = statsp->opackets;
2882 break;
2883
2884 /* stats not relevant to ldc, return 0 */
2885 case MAC_STAT_IFSPEED:
2886 case ETHER_STAT_ALIGN_ERRORS:
2887 case ETHER_STAT_FCS_ERRORS:
2888 case ETHER_STAT_FIRST_COLLISIONS:
2889 case ETHER_STAT_MULTI_COLLISIONS:
2890 case ETHER_STAT_DEFER_XMTS:
2891 case ETHER_STAT_TX_LATE_COLLISIONS:
2892 case ETHER_STAT_EX_COLLISIONS:
2893 case ETHER_STAT_MACXMT_ERRORS:
2894 case ETHER_STAT_CARRIER_ERRORS:
2895 case ETHER_STAT_TOOLONG_ERRORS:
2896 case ETHER_STAT_XCVR_ADDR:
2897 case ETHER_STAT_XCVR_ID:
2898 case ETHER_STAT_XCVR_INUSE:
2899 case ETHER_STAT_CAP_1000FDX:
2900 case ETHER_STAT_CAP_1000HDX:
2901 case ETHER_STAT_CAP_100FDX:
2902 case ETHER_STAT_CAP_100HDX:
2903 case ETHER_STAT_CAP_10FDX:
2904 case ETHER_STAT_CAP_10HDX:
2905 case ETHER_STAT_CAP_ASMPAUSE:
2906 case ETHER_STAT_CAP_PAUSE:
2907 case ETHER_STAT_CAP_AUTONEG:
2908 case ETHER_STAT_ADV_CAP_1000FDX:
2909 case ETHER_STAT_ADV_CAP_1000HDX:
2910 case ETHER_STAT_ADV_CAP_100FDX:
2911 case ETHER_STAT_ADV_CAP_100HDX:
2912 case ETHER_STAT_ADV_CAP_10FDX:
2913 case ETHER_STAT_ADV_CAP_10HDX:
2914 case ETHER_STAT_ADV_CAP_ASMPAUSE:
2915 case ETHER_STAT_ADV_CAP_PAUSE:
2916 case ETHER_STAT_ADV_CAP_AUTONEG:
2917 case ETHER_STAT_LP_CAP_1000FDX:
2918 case ETHER_STAT_LP_CAP_1000HDX:
2919 case ETHER_STAT_LP_CAP_100FDX:
2920 case ETHER_STAT_LP_CAP_100HDX:
2921 case ETHER_STAT_LP_CAP_10FDX:
2922 case ETHER_STAT_LP_CAP_10HDX:
2923 case ETHER_STAT_LP_CAP_ASMPAUSE:
2924 case ETHER_STAT_LP_CAP_PAUSE:
2925 case ETHER_STAT_LP_CAP_AUTONEG:
2926 case ETHER_STAT_LINK_ASMPAUSE:
2927 case ETHER_STAT_LINK_PAUSE:
2928 case ETHER_STAT_LINK_AUTONEG:
2929 case ETHER_STAT_LINK_DUPLEX:
2930 default:
2931 val = 0;
2932 break;
2933
2934 }
2935 return (val);
2936 }
2937
2938 /*
2939 * LDC channel is UP, start handshake process with peer.
2940 */
2941 static void
vgen_handle_evt_up(vgen_ldc_t * ldcp)2942 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2943 {
2944 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2945
2946 DBG1(vgenp, ldcp, "enter\n");
2947
2948 ASSERT(MUTEX_HELD(&ldcp->cblock));
2949
2950 if (ldcp->portp != vgenp->vsw_portp) {
2951 /*
2952 * As the channel is up, use this port from now on.
2953 */
2954 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2955 }
2956
2957 /* Initialize local session id */
2958 ldcp->local_sid = ddi_get_lbolt();
2959
2960 /* clear peer session id */
2961 ldcp->peer_sid = 0;
2962
2963 /* Initiate Handshake process with peer ldc endpoint */
2964 (void) vgen_handshake(vh_nextphase(ldcp));
2965
2966 DBG1(vgenp, ldcp, "exit\n");
2967 }
2968
2969 /*
2970 * LDC channel is Reset, terminate connection with peer and try to
2971 * bring the channel up again.
2972 */
2973 int
vgen_handle_evt_reset(vgen_ldc_t * ldcp,vgen_caller_t caller)2974 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2975 {
2976 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2977 ASSERT(MUTEX_HELD(&ldcp->cblock));
2978 }
2979
2980 /* Set the flag to indicate reset is in progress */
2981 if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2982 /* another thread is already in the process of resetting */
2983 return (EBUSY);
2984 }
2985
2986 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2987 mutex_exit(&ldcp->cblock);
2988 }
2989
2990 (void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2991
2992 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2993 mutex_enter(&ldcp->cblock);
2994 }
2995
2996 return (0);
2997 }
2998
2999 /* Interrupt handler for the channel */
3000 static uint_t
vgen_ldc_cb(uint64_t event,caddr_t arg)3001 vgen_ldc_cb(uint64_t event, caddr_t arg)
3002 {
3003 _NOTE(ARGUNUSED(event))
3004 vgen_ldc_t *ldcp;
3005 vgen_t *vgenp;
3006 ldc_status_t istatus;
3007 vgen_stats_t *statsp;
3008 uint_t ret = LDC_SUCCESS;
3009
3010 ldcp = (vgen_ldc_t *)arg;
3011 vgenp = LDC_TO_VGEN(ldcp);
3012 statsp = &ldcp->stats;
3013
3014 DBG1(vgenp, ldcp, "enter\n");
3015
3016 mutex_enter(&ldcp->cblock);
3017 statsp->callbacks++;
3018 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == 0)) {
3019 DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3020 ldcp->ldc_status);
3021 mutex_exit(&ldcp->cblock);
3022 return (LDC_SUCCESS);
3023 }
3024
3025 /*
3026 * NOTE: not using switch() as event could be triggered by
3027 * a state change and a read request. Also the ordering of the
3028 * check for the event types is deliberate.
3029 */
3030 if (event & LDC_EVT_UP) {
3031 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3032 DWARN(vgenp, ldcp, "ldc_status err\n");
3033 /* status couldn't be determined */
3034 ret = LDC_FAILURE;
3035 goto ldc_cb_ret;
3036 }
3037 ldcp->ldc_status = istatus;
3038 if (ldcp->ldc_status != LDC_UP) {
3039 DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3040 " but ldc status is not UP(0x%x)\n",
3041 ldcp->ldc_status);
3042 /* spurious interrupt, return success */
3043 goto ldc_cb_ret;
3044 }
3045 DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3046 event, ldcp->ldc_status);
3047
3048 vgen_handle_evt_up(ldcp);
3049
3050 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3051 }
3052
3053 /* Handle RESET/DOWN before READ event */
3054 if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3055 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3056 DWARN(vgenp, ldcp, "ldc_status error\n");
3057 /* status couldn't be determined */
3058 ret = LDC_FAILURE;
3059 goto ldc_cb_ret;
3060 }
3061 ldcp->ldc_status = istatus;
3062 DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3063 event, ldcp->ldc_status);
3064
3065 (void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3066
3067 /*
3068 * As the channel is down/reset, ignore READ event
3069 * but print a debug warning message.
3070 */
3071 if (event & LDC_EVT_READ) {
3072 DWARN(vgenp, ldcp,
3073 "LDC_EVT_READ set along with RESET/DOWN\n");
3074 event &= ~LDC_EVT_READ;
3075 }
3076 }
3077
3078 if (event & LDC_EVT_READ) {
3079 DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3080 event, ldcp->ldc_status);
3081
3082 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3083
3084 if (ldcp->msg_thread != NULL) {
3085 /*
3086 * If the receive thread is enabled, then
3087 * wakeup the receive thread to process the
3088 * LDC messages.
3089 */
3090 mutex_exit(&ldcp->cblock);
3091 mutex_enter(&ldcp->msg_thr_lock);
3092 if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3093 ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3094 cv_signal(&ldcp->msg_thr_cv);
3095 }
3096 mutex_exit(&ldcp->msg_thr_lock);
3097 mutex_enter(&ldcp->cblock);
3098 } else {
3099 (void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3100 }
3101 }
3102
3103 ldc_cb_ret:
3104 mutex_exit(&ldcp->cblock);
3105 DBG1(vgenp, ldcp, "exit\n");
3106 return (ret);
3107 }
3108
3109 int
vgen_handle_evt_read(vgen_ldc_t * ldcp,vgen_caller_t caller)3110 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3111 {
3112 int rv;
3113 uint64_t *ldcmsg;
3114 size_t msglen;
3115 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3116 vio_msg_tag_t *tagp;
3117 ldc_status_t istatus;
3118 boolean_t has_data;
3119
3120 DBG1(vgenp, ldcp, "enter\n");
3121
3122 if (caller == VGEN_LDC_CB) {
3123 ASSERT(MUTEX_HELD(&ldcp->cblock));
3124 } else if (caller == VGEN_MSG_THR) {
3125 mutex_enter(&ldcp->cblock);
3126 } else {
3127 return (EINVAL);
3128 }
3129
3130 ldcmsg = ldcp->ldcmsg;
3131
3132 vgen_evtread:
3133 do {
3134 msglen = ldcp->msglen;
3135 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3136
3137 if (rv != 0) {
3138 DWARN(vgenp, ldcp, "ldc_read() failed "
3139 "rv(%d) len(%d)\n", rv, msglen);
3140 if (rv == ECONNRESET)
3141 goto vgen_evtread_error;
3142 break;
3143 }
3144 if (msglen == 0) {
3145 DBG2(vgenp, ldcp, "ldc_read NODATA");
3146 break;
3147 }
3148 DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3149
3150 tagp = (vio_msg_tag_t *)ldcmsg;
3151
3152 if (ldcp->peer_sid) {
3153 /*
3154 * check sid only after we have received peer's sid
3155 * in the version negotiate msg.
3156 */
3157 #ifdef DEBUG
3158 if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3159 /* simulate bad sid condition */
3160 tagp->vio_sid = 0;
3161 vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3162 }
3163 #endif
3164 rv = vgen_check_sid(ldcp, tagp);
3165 if (rv != VGEN_SUCCESS) {
3166 /*
3167 * If sid mismatch is detected,
3168 * reset the channel.
3169 */
3170 DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3171 goto vgen_evtread_error;
3172 }
3173 }
3174
3175 switch (tagp->vio_msgtype) {
3176 case VIO_TYPE_CTRL:
3177 rv = vgen_handle_ctrlmsg(ldcp, tagp);
3178 if (rv != 0) {
3179 DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3180 " failed rv(%d)\n", rv);
3181 }
3182 break;
3183
3184 case VIO_TYPE_DATA:
3185 rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3186 if (rv != 0) {
3187 DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3188 " failed rv(%d)\n", rv);
3189 }
3190 break;
3191
3192 case VIO_TYPE_ERR:
3193 vgen_handle_errmsg(ldcp, tagp);
3194 break;
3195
3196 default:
3197 DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3198 tagp->vio_msgtype);
3199 break;
3200 }
3201
3202 /*
3203 * If an error is encountered, stop processing and
3204 * handle the error.
3205 */
3206 if (rv != 0) {
3207 goto vgen_evtread_error;
3208 }
3209
3210 } while (msglen);
3211
3212 /* check once more before exiting */
3213 rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3214 if ((rv == 0) && (has_data == B_TRUE)) {
3215 DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3216 goto vgen_evtread;
3217 }
3218
3219 vgen_evtread_error:
3220 if (rv != 0) {
3221 /*
3222 * We handle the error and then return the error value. If we
3223 * are running in the context of the msg worker, the error
3224 * tells the worker thread to exit, as the channel would have
3225 * been reset.
3226 */
3227 if (rv == ECONNRESET) {
3228 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3229 DWARN(vgenp, ldcp, "ldc_status err\n");
3230 } else {
3231 ldcp->ldc_status = istatus;
3232 }
3233 (void) vgen_handle_evt_reset(ldcp, caller);
3234 } else {
3235 DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3236 (void) vgen_ldc_reset(ldcp, caller);
3237 }
3238 }
3239
3240 if (caller == VGEN_MSG_THR) {
3241 mutex_exit(&ldcp->cblock);
3242 }
3243
3244 DBG1(vgenp, ldcp, "exit\n");
3245 return (rv);
3246 }
3247
3248 /* vgen handshake functions */
3249
3250 /* change the hphase for the channel to the next phase */
3251 static vgen_ldc_t *
vh_nextphase(vgen_ldc_t * ldcp)3252 vh_nextphase(vgen_ldc_t *ldcp)
3253 {
3254 if (ldcp->hphase == VH_PHASE4) {
3255 ldcp->hphase = VH_DONE;
3256 } else {
3257 ldcp->hphase++;
3258 }
3259 return (ldcp);
3260 }
3261
3262 /* send version negotiate message to the peer over ldc */
3263 static int
vgen_send_version_negotiate(vgen_ldc_t * ldcp)3264 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3265 {
3266 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3267 vio_ver_msg_t vermsg;
3268 vio_msg_tag_t *tagp = &vermsg.tag;
3269 int rv;
3270
3271 bzero(&vermsg, sizeof (vermsg));
3272
3273 tagp->vio_msgtype = VIO_TYPE_CTRL;
3274 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3275 tagp->vio_subtype_env = VIO_VER_INFO;
3276 tagp->vio_sid = ldcp->local_sid;
3277
3278 /* get version msg payload from ldcp->local */
3279 vermsg.ver_major = ldcp->local_hparams.ver_major;
3280 vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3281 vermsg.dev_class = ldcp->local_hparams.dev_class;
3282
3283 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3284 if (rv != VGEN_SUCCESS) {
3285 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3286 return (rv);
3287 }
3288
3289 ldcp->hstate |= VER_INFO_SENT;
3290 DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3291 vermsg.ver_major, vermsg.ver_minor);
3292
3293 return (VGEN_SUCCESS);
3294 }
3295
3296 /* send attr info message to the peer over ldc */
3297 static int
vgen_send_attr_info(vgen_ldc_t * ldcp)3298 vgen_send_attr_info(vgen_ldc_t *ldcp)
3299 {
3300 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3301 vnet_attr_msg_t attrmsg;
3302 vio_msg_tag_t *tagp = &attrmsg.tag;
3303 int rv;
3304
3305 bzero(&attrmsg, sizeof (attrmsg));
3306
3307 tagp->vio_msgtype = VIO_TYPE_CTRL;
3308 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3309 tagp->vio_subtype_env = VIO_ATTR_INFO;
3310 tagp->vio_sid = ldcp->local_sid;
3311
3312 /* get attr msg payload from ldcp->local */
3313 attrmsg.mtu = ldcp->local_hparams.mtu;
3314 attrmsg.addr = ldcp->local_hparams.addr;
3315 attrmsg.addr_type = ldcp->local_hparams.addr_type;
3316 attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3317 attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3318 attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3319 attrmsg.options = ldcp->local_hparams.dring_mode;
3320
3321 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3322 if (rv != VGEN_SUCCESS) {
3323 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3324 return (rv);
3325 }
3326
3327 ldcp->hstate |= ATTR_INFO_SENT;
3328 DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3329
3330 return (VGEN_SUCCESS);
3331 }
3332
3333 /*
3334 * Send descriptor ring register message to the peer over ldc.
3335 * Invoked in RxDringData mode.
3336 */
3337 static int
vgen_send_rx_dring_reg(vgen_ldc_t * ldcp)3338 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3339 {
3340 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3341 vio_dring_reg_msg_t *msg;
3342 vio_dring_reg_ext_msg_t *emsg;
3343 int rv;
3344 uint8_t *buf;
3345 uint_t msgsize;
3346
3347 msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3348 msg = kmem_zalloc(msgsize, KM_SLEEP);
3349
3350 /* Initialize the common part of dring reg msg */
3351 vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3352
3353 /* skip over dring cookies at the tail of common section */
3354 buf = (uint8_t *)msg->cookie;
3355 ASSERT(msg->ncookies == 1);
3356 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3357
3358 /* Now setup the extended part, specific to RxDringData mode */
3359 emsg = (vio_dring_reg_ext_msg_t *)buf;
3360
3361 /* copy data_ncookies in the msg */
3362 emsg->data_ncookies = ldcp->rx_data_ncookies;
3363
3364 /* copy data area size in the msg */
3365 emsg->data_area_size = ldcp->rx_data_sz;
3366
3367 /* copy data area cookies in the msg */
3368 bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3369 sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3370
3371 rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3372 if (rv != VGEN_SUCCESS) {
3373 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3374 kmem_free(msg, msgsize);
3375 return (rv);
3376 }
3377
3378 ldcp->hstate |= DRING_INFO_SENT;
3379 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3380
3381 kmem_free(msg, msgsize);
3382 return (VGEN_SUCCESS);
3383 }
3384
3385 /*
3386 * Send descriptor ring register message to the peer over ldc.
3387 * Invoked in TxDring mode.
3388 */
3389 static int
vgen_send_tx_dring_reg(vgen_ldc_t * ldcp)3390 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3391 {
3392 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3393 vio_dring_reg_msg_t msg;
3394 int rv;
3395
3396 bzero(&msg, sizeof (msg));
3397
3398 /*
3399 * Initialize only the common part of dring reg msg in TxDring mode.
3400 */
3401 vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3402
3403 rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3404 if (rv != VGEN_SUCCESS) {
3405 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3406 return (rv);
3407 }
3408
3409 ldcp->hstate |= DRING_INFO_SENT;
3410 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3411
3412 return (VGEN_SUCCESS);
3413 }
3414
3415 static int
vgen_send_rdx_info(vgen_ldc_t * ldcp)3416 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3417 {
3418 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3419 vio_rdx_msg_t rdxmsg;
3420 vio_msg_tag_t *tagp = &rdxmsg.tag;
3421 int rv;
3422
3423 bzero(&rdxmsg, sizeof (rdxmsg));
3424
3425 tagp->vio_msgtype = VIO_TYPE_CTRL;
3426 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3427 tagp->vio_subtype_env = VIO_RDX;
3428 tagp->vio_sid = ldcp->local_sid;
3429
3430 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3431 if (rv != VGEN_SUCCESS) {
3432 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3433 return (rv);
3434 }
3435
3436 ldcp->hstate |= RDX_INFO_SENT;
3437 DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3438
3439 return (VGEN_SUCCESS);
3440 }
3441
3442 /* send multicast addr info message to vsw */
3443 static int
vgen_send_mcast_info(vgen_ldc_t * ldcp)3444 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3445 {
3446 vnet_mcast_msg_t mcastmsg;
3447 vnet_mcast_msg_t *msgp;
3448 vio_msg_tag_t *tagp;
3449 vgen_t *vgenp;
3450 struct ether_addr *mca;
3451 int rv;
3452 int i;
3453 uint32_t size;
3454 uint32_t mccount;
3455 uint32_t n;
3456
3457 msgp = &mcastmsg;
3458 tagp = &msgp->tag;
3459 vgenp = LDC_TO_VGEN(ldcp);
3460
3461 mccount = vgenp->mccount;
3462 i = 0;
3463
3464 do {
3465 tagp->vio_msgtype = VIO_TYPE_CTRL;
3466 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3467 tagp->vio_subtype_env = VNET_MCAST_INFO;
3468 tagp->vio_sid = ldcp->local_sid;
3469
3470 n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3471 size = n * sizeof (struct ether_addr);
3472
3473 mca = &(vgenp->mctab[i]);
3474 bcopy(mca, (msgp->mca), size);
3475 msgp->set = B_TRUE;
3476 msgp->count = n;
3477
3478 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3479 B_FALSE);
3480 if (rv != VGEN_SUCCESS) {
3481 DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3482 return (rv);
3483 }
3484
3485 mccount -= n;
3486 i += n;
3487
3488 } while (mccount);
3489
3490 return (VGEN_SUCCESS);
3491 }
3492
3493 /*
3494 * vgen_dds_rx -- post DDS messages to vnet.
3495 */
3496 static int
vgen_dds_rx(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)3497 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3498 {
3499 vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
3500 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3501
3502 if (dmsg->dds_class != DDS_VNET_NIU) {
3503 DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3504 return (EBADMSG);
3505 }
3506 vnet_dds_rx(vgenp->vnetp, dmsg);
3507 return (0);
3508 }
3509
3510 /*
3511 * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3512 */
3513 int
vgen_dds_tx(void * arg,void * msg)3514 vgen_dds_tx(void *arg, void *msg)
3515 {
3516 vgen_t *vgenp = arg;
3517 vio_dds_msg_t *dmsg = msg;
3518 vgen_portlist_t *plistp = &vgenp->vgenports;
3519 vgen_ldc_t *ldcp;
3520 int rv = EIO;
3521
3522 READ_ENTER(&plistp->rwlock);
3523 ldcp = vgenp->vsw_portp->ldcp;
3524 if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3525 goto vgen_dsend_exit;
3526 }
3527
3528 dmsg->tag.vio_sid = ldcp->local_sid;
3529 rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3530 if (rv != VGEN_SUCCESS) {
3531 rv = EIO;
3532 } else {
3533 rv = 0;
3534 }
3535
3536 vgen_dsend_exit:
3537 RW_EXIT(&plistp->rwlock);
3538 return (rv);
3539
3540 }
3541
3542 /* Initiate Phase 2 of handshake */
3543 static int
vgen_handshake_phase2(vgen_ldc_t * ldcp)3544 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3545 {
3546 int rv;
3547
3548 #ifdef DEBUG
3549 if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3550 /* simulate out of state condition */
3551 vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3552 rv = vgen_send_rdx_info(ldcp);
3553 return (rv);
3554 }
3555 if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3556 /* simulate timeout condition */
3557 vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3558 return (VGEN_SUCCESS);
3559 }
3560 #endif
3561 rv = vgen_send_attr_info(ldcp);
3562 if (rv != VGEN_SUCCESS) {
3563 return (rv);
3564 }
3565
3566 return (VGEN_SUCCESS);
3567 }
3568
3569 static int
vgen_handshake_phase3(vgen_ldc_t * ldcp)3570 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3571 {
3572 int rv;
3573 vgen_hparams_t *lp = &ldcp->local_hparams;
3574 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3575 vgen_stats_t *statsp = &ldcp->stats;
3576
3577 /* dring mode has been negotiated in attr phase; save in stats */
3578 statsp->dring_mode = lp->dring_mode;
3579
3580 if (lp->dring_mode == VIO_RX_DRING_DATA) { /* RxDringData mode */
3581 ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3582 ldcp->tx_dringdata = vgen_dringsend_shm;
3583 if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3584 /*
3585 * If priority frames are not in use, we don't need a
3586 * separate wrapper function for 'tx', so we set it to
3587 * 'tx_dringdata'. If priority frames are configured,
3588 * we leave the 'tx' pointer as is (initialized in
3589 * vgen_set_vnet_proto_ops()).
3590 */
3591 ldcp->tx = ldcp->tx_dringdata;
3592 }
3593 } else { /* TxDring mode */
3594 ldcp->msg_thread = thread_create(NULL,
3595 2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3596 &p0, TS_RUN, maxclsyspri);
3597 }
3598
3599 rv = vgen_create_dring(ldcp);
3600 if (rv != VGEN_SUCCESS) {
3601 return (rv);
3602 }
3603
3604 /* update local dring_info params */
3605 if (lp->dring_mode == VIO_RX_DRING_DATA) {
3606 bcopy(&(ldcp->rx_dring_cookie),
3607 &(ldcp->local_hparams.dring_cookie),
3608 sizeof (ldc_mem_cookie_t));
3609 ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3610 ldcp->local_hparams.num_desc = ldcp->num_rxds;
3611 ldcp->local_hparams.desc_size =
3612 sizeof (vnet_rx_dringdata_desc_t);
3613 rv = vgen_send_rx_dring_reg(ldcp);
3614 } else {
3615 bcopy(&(ldcp->tx_dring_cookie),
3616 &(ldcp->local_hparams.dring_cookie),
3617 sizeof (ldc_mem_cookie_t));
3618 ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3619 ldcp->local_hparams.num_desc = ldcp->num_txds;
3620 ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3621 rv = vgen_send_tx_dring_reg(ldcp);
3622 }
3623
3624 if (rv != VGEN_SUCCESS) {
3625 return (rv);
3626 }
3627
3628 return (VGEN_SUCCESS);
3629 }
3630
3631 /*
3632 * Set vnet-protocol-version dependent functions based on version.
3633 */
3634 static void
vgen_set_vnet_proto_ops(vgen_ldc_t * ldcp)3635 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3636 {
3637 vgen_hparams_t *lp = &ldcp->local_hparams;
3638 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3639
3640 /*
3641 * Setup the appropriate dring data processing routine and any
3642 * associated thread based on the version.
3643 *
3644 * In versions < 1.6, we only support TxDring mode. In this mode, the
3645 * msg worker thread processes all types of VIO msgs (ctrl and data).
3646 *
3647 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3648 * all msgs including dring data messages are handled directly by the
3649 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3650 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3651 * disabled while the polling thread is active, in which case the
3652 * polling thread processes the rcv descriptor ring.
3653 *
3654 * However, for versions >= 1.6, we can force to only use TxDring mode.
3655 * This could happen if RxDringData mode has been disabled (see
3656 * below) on this guest or on the peer guest. This info is determined
3657 * as part of attr exchange phase of handshake. Hence, we setup these
3658 * pointers for v1.6 after attr msg phase completes during handshake.
3659 */
3660 if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* Ver >= 1.6 */
3661 /*
3662 * Set data dring mode for vgen_send_attr_info().
3663 */
3664 if (vgen_mapin_avail(ldcp) == B_TRUE) {
3665 lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3666 } else {
3667 lp->dring_mode = VIO_TX_DRING;
3668 }
3669 } else { /* Ver <= 1.5 */
3670 lp->dring_mode = VIO_TX_DRING;
3671 }
3672
3673 if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3674 vgen_port_t *portp = ldcp->portp;
3675 vnet_t *vnetp = vgenp->vnetp;
3676 /*
3677 * If the version negotiated with vswitch is >= 1.5 (link
3678 * status update support), set the required bits in our
3679 * attributes if this vnet device has been configured to get
3680 * physical link state updates.
3681 */
3682 if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3683 lp->physlink_update = PHYSLINK_UPDATE_STATE;
3684 } else {
3685 lp->physlink_update = PHYSLINK_UPDATE_NONE;
3686 }
3687 }
3688
3689 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3690 /*
3691 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3692 * Support), set the mtu in our attributes to max_frame_size.
3693 */
3694 lp->mtu = vgenp->max_frame_size;
3695 } else if (VGEN_VER_EQ(ldcp, 1, 3)) {
3696 /*
3697 * If the version negotiated with peer is == 1.3 (Vlan Tag
3698 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3699 */
3700 lp->mtu = ETHERMAX + VLAN_TAGSZ;
3701 } else {
3702 vgen_port_t *portp = ldcp->portp;
3703 vnet_t *vnetp = vgenp->vnetp;
3704 /*
3705 * Pre-1.3 peers expect max frame size of ETHERMAX.
3706 * We can negotiate that size with those peers provided the
3707 * following conditions are true:
3708 * - Only pvid is defined for our peer and there are no vids.
3709 * - pvids are equal.
3710 * If the above conditions are true, then we can send/recv only
3711 * untagged frames of max size ETHERMAX.
3712 */
3713 if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3714 lp->mtu = ETHERMAX;
3715 }
3716 }
3717
3718 if (VGEN_VER_GTEQ(ldcp, 1, 2)) { /* Versions >= 1.2 */
3719 /*
3720 * Starting v1.2 we support priority frames; so set the
3721 * dring processing routines and xfer modes based on the
3722 * version. Note that the dring routines could be changed after
3723 * attribute handshake phase for versions >= 1.6 (See
3724 * vgen_handshake_phase3())
3725 */
3726 ldcp->tx_dringdata = vgen_dringsend;
3727 ldcp->rx_dringdata = vgen_handle_dringdata;
3728
3729 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3730 /*
3731 * Enable priority routines and pkt mode only if
3732 * at least one pri-eth-type is specified in MD.
3733 */
3734 ldcp->tx = vgen_ldcsend;
3735 ldcp->rx_pktdata = vgen_handle_pkt_data;
3736
3737 /* set xfer mode for vgen_send_attr_info() */
3738 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3739 } else {
3740 /* No priority eth types defined in MD */
3741 ldcp->tx = ldcp->tx_dringdata;
3742 ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3743
3744 /* Set xfer mode for vgen_send_attr_info() */
3745 lp->xfer_mode = VIO_DRING_MODE_V1_2;
3746 }
3747 } else { /* Versions prior to 1.2 */
3748 vgen_reset_vnet_proto_ops(ldcp);
3749 }
3750 }
3751
3752 /*
3753 * Reset vnet-protocol-version dependent functions to pre-v1.2.
3754 */
3755 static void
vgen_reset_vnet_proto_ops(vgen_ldc_t * ldcp)3756 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3757 {
3758 vgen_hparams_t *lp = &ldcp->local_hparams;
3759
3760 ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3761 ldcp->rx_dringdata = vgen_handle_dringdata;
3762 ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3763
3764 /* set xfer mode for vgen_send_attr_info() */
3765 lp->xfer_mode = VIO_DRING_MODE_V1_0;
3766 }
3767
3768 static void
vgen_vlan_unaware_port_reset(vgen_port_t * portp)3769 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3770 {
3771 vgen_ldc_t *ldcp = portp->ldcp;
3772 vgen_t *vgenp = portp->vgenp;
3773 vnet_t *vnetp = vgenp->vnetp;
3774 boolean_t need_reset = B_FALSE;
3775
3776 mutex_enter(&ldcp->cblock);
3777
3778 /*
3779 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3780 * the connection. See comments in vgen_set_vnet_proto_ops().
3781 */
3782 if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3783 (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3784 need_reset = B_TRUE;
3785 }
3786 mutex_exit(&ldcp->cblock);
3787
3788 if (need_reset == B_TRUE) {
3789 (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3790 }
3791 }
3792
3793 static void
vgen_port_reset(vgen_port_t * portp)3794 vgen_port_reset(vgen_port_t *portp)
3795 {
3796 (void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3797 }
3798
3799 static void
vgen_reset_vlan_unaware_ports(vgen_t * vgenp)3800 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3801 {
3802 vgen_port_t *portp;
3803 vgen_portlist_t *plistp;
3804
3805 plistp = &(vgenp->vgenports);
3806 READ_ENTER(&plistp->rwlock);
3807
3808 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3809
3810 vgen_vlan_unaware_port_reset(portp);
3811
3812 }
3813
3814 RW_EXIT(&plistp->rwlock);
3815 }
3816
3817 static void
vgen_reset_vsw_port(vgen_t * vgenp)3818 vgen_reset_vsw_port(vgen_t *vgenp)
3819 {
3820 vgen_port_t *portp;
3821
3822 if ((portp = vgenp->vsw_portp) != NULL) {
3823 vgen_port_reset(portp);
3824 }
3825 }
3826
3827 static void
vgen_setup_handshake_params(vgen_ldc_t * ldcp)3828 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3829 {
3830 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3831
3832 /*
3833 * clear local handshake params and initialize.
3834 */
3835 bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3836
3837 /* set version to the highest version supported */
3838 ldcp->local_hparams.ver_major =
3839 ldcp->vgen_versions[0].ver_major;
3840 ldcp->local_hparams.ver_minor =
3841 ldcp->vgen_versions[0].ver_minor;
3842 ldcp->local_hparams.dev_class = VDEV_NETWORK;
3843
3844 /* set attr_info params */
3845 ldcp->local_hparams.mtu = vgenp->max_frame_size;
3846 ldcp->local_hparams.addr =
3847 vnet_macaddr_strtoul(vgenp->macaddr);
3848 ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3849 ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3850 ldcp->local_hparams.ack_freq = 0; /* don't need acks */
3851 ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3852
3853 /* reset protocol version specific function pointers */
3854 vgen_reset_vnet_proto_ops(ldcp);
3855 ldcp->local_hparams.dring_ident = 0;
3856 ldcp->local_hparams.dring_ready = B_FALSE;
3857
3858 /* clear peer_hparams */
3859 bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3860 ldcp->peer_hparams.dring_ready = B_FALSE;
3861 }
3862
3863 /*
3864 * Process Channel Reset. We tear down the resources (timers, threads,
3865 * descriptor rings etc) associated with the channel and reinitialize the
3866 * channel based on the flags.
3867 *
3868 * Arguments:
3869 * ldcp: The channel being processed.
3870 *
3871 * flags:
3872 * VGEN_FLAG_EVT_RESET:
3873 * A ECONNRESET error occured while doing ldc operations such as
3874 * ldc_read() or ldc_write(); the channel is already reset and it
3875 * needs to be handled.
3876 * VGEN_FLAG_NEED_LDCRESET:
3877 * Some other errors occured and the error handling code needs to
3878 * explicitly reset the channel and restart handshake with the
3879 * peer. The error could be either in ldc operations or other
3880 * parts of the code such as timeouts or mdeg events etc.
3881 * VGEN_FLAG_UNINIT:
3882 * The channel is being torn down; no need to bring up the channel
3883 * after resetting.
3884 */
3885 static int
vgen_process_reset(vgen_ldc_t * ldcp,int flags)3886 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3887 {
3888 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3889 vgen_port_t *portp = ldcp->portp;
3890 vgen_hparams_t *lp = &ldcp->local_hparams;
3891 boolean_t is_vsw_port = B_FALSE;
3892 boolean_t link_update = B_FALSE;
3893 ldc_status_t istatus;
3894 int rv;
3895 uint_t retries = 0;
3896 timeout_id_t htid = 0;
3897 timeout_id_t wd_tid = 0;
3898
3899 if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3900 is_vsw_port = B_TRUE;
3901 }
3902
3903 /*
3904 * Report that the channel is being reset; it ensures that any HybridIO
3905 * configuration is torn down before we reset the channel if it is not
3906 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3907 */
3908 if (is_vsw_port == B_TRUE) {
3909 vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3910 rep_err(portp->vhp, VIO_NET_RES_DOWN);
3911 }
3912
3913 again:
3914 mutex_enter(&ldcp->cblock);
3915
3916 /* Clear hstate and hphase */
3917 ldcp->hstate = 0;
3918 ldcp->hphase = VH_PHASE0;
3919 if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3920 DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3921 (void) ldc_down(ldcp->ldc_handle);
3922 (void) ldc_status(ldcp->ldc_handle, &istatus);
3923 DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3924 ldcp->ldc_status = istatus;
3925
3926 if (flags == VGEN_FLAG_UNINIT) {
3927 /* disable further callbacks */
3928 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3929 if (rv != 0) {
3930 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3931 }
3932 }
3933
3934 } else {
3935 /* flags == VGEN_FLAG_EVT_RESET */
3936 DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3937 }
3938
3939 /*
3940 * As the connection is now reset, mark the channel
3941 * link_state as 'down' and notify the stack if needed.
3942 */
3943 if (ldcp->link_state != LINK_STATE_DOWN) {
3944 ldcp->link_state = LINK_STATE_DOWN;
3945
3946 if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3947 /*
3948 * As the channel link is down, mark physical link also
3949 * as down. After the channel comes back up and
3950 * handshake completes, we will get an update on the
3951 * physlink state from vswitch (if this device has been
3952 * configured to get phys link updates).
3953 */
3954 vgenp->phys_link_state = LINK_STATE_DOWN;
3955 link_update = B_TRUE;
3956
3957 }
3958 }
3959
3960 if (ldcp->htid != 0) {
3961 htid = ldcp->htid;
3962 ldcp->htid = 0;
3963 }
3964
3965 if (ldcp->wd_tid != 0) {
3966 wd_tid = ldcp->wd_tid;
3967 ldcp->wd_tid = 0;
3968 }
3969
3970 mutex_exit(&ldcp->cblock);
3971
3972 /* Update link state to the stack */
3973 if (link_update == B_TRUE) {
3974 vgen_link_update(vgenp, ldcp->link_state);
3975 }
3976
3977 /*
3978 * As the channel is being reset, redirect traffic to the peer through
3979 * vswitch, until the channel becomes ready to be used again.
3980 */
3981 if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3982 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3983 }
3984
3985 /* Cancel handshake watchdog timeout */
3986 if (htid) {
3987 (void) untimeout(htid);
3988 }
3989
3990 /* Cancel transmit watchdog timeout */
3991 if (wd_tid) {
3992 (void) untimeout(wd_tid);
3993 }
3994
3995 /* Stop the msg worker thread */
3996 if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
3997 vgen_stop_msg_thread(ldcp);
3998 }
3999
4000 /* Grab all locks while we tear down tx/rx resources */
4001 LDC_LOCK(ldcp);
4002
4003 /* Destroy the local dring which is exported to the peer */
4004 vgen_destroy_dring(ldcp);
4005
4006 /* Unmap the remote dring which is imported from the peer */
4007 vgen_unmap_dring(ldcp);
4008
4009 /*
4010 * Bring up the channel and restart handshake
4011 * only if the channel is not being torn down.
4012 */
4013 if (flags != VGEN_FLAG_UNINIT) {
4014
4015 /* Setup handshake parameters to restart a new handshake */
4016 vgen_setup_handshake_params(ldcp);
4017
4018 /* Bring the channel up */
4019 vgen_ldc_up(ldcp);
4020
4021 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4022 DWARN(vgenp, ldcp, "ldc_status err\n");
4023 } else {
4024 ldcp->ldc_status = istatus;
4025 }
4026
4027 /* If the channel is UP, start handshake */
4028 if (ldcp->ldc_status == LDC_UP) {
4029
4030 if (is_vsw_port == B_FALSE) {
4031 /*
4032 * Channel is up; use this port from now on.
4033 */
4034 (void) atomic_swap_32(&portp->use_vsw_port,
4035 B_FALSE);
4036 }
4037
4038 /* Initialize local session id */
4039 ldcp->local_sid = ddi_get_lbolt();
4040
4041 /* clear peer session id */
4042 ldcp->peer_sid = 0;
4043
4044 /*
4045 * Initiate Handshake process with peer ldc endpoint by
4046 * sending version info vio message. If that fails we
4047 * go back to the top of this function to process the
4048 * error again. Note that we can be in this loop for
4049 * 'vgen_ldc_max_resets' times, after which the channel
4050 * is not brought up.
4051 */
4052 mutex_exit(&ldcp->tclock);
4053 mutex_exit(&ldcp->txlock);
4054 mutex_exit(&ldcp->wrlock);
4055 mutex_exit(&ldcp->rxlock);
4056 rv = vgen_handshake(vh_nextphase(ldcp));
4057 mutex_exit(&ldcp->cblock);
4058 if (rv != 0) {
4059 if (rv == ECONNRESET) {
4060 flags = VGEN_FLAG_EVT_RESET;
4061 } else {
4062 flags = VGEN_FLAG_NEED_LDCRESET;
4063 }
4064
4065 /*
4066 * We still hold 'reset_in_progress'; so we can
4067 * just loop back to the top to restart error
4068 * processing.
4069 */
4070 goto again;
4071 }
4072 } else {
4073 LDC_UNLOCK(ldcp);
4074 }
4075
4076 } else { /* flags == VGEN_FLAG_UNINIT */
4077
4078 /* Close the channel - retry on EAGAIN */
4079 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4080 if (++retries > vgen_ldccl_retries) {
4081 break;
4082 }
4083 drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4084 }
4085 if (rv != 0) {
4086 cmn_err(CE_NOTE,
4087 "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4088 vgenp->instance, rv, ldcp->ldc_id);
4089 }
4090
4091 ldcp->ldc_reset_count = 0;
4092 ldcp->ldc_status = LDC_INIT;
4093 ldcp->flags &= ~(CHANNEL_STARTED);
4094
4095 LDC_UNLOCK(ldcp);
4096 }
4097
4098 /* Done processing channel reset; clear the atomic flag */
4099 ldcp->reset_in_progress = 0;
4100 return (0);
4101 }
4102
4103 /*
4104 * Initiate handshake with the peer by sending various messages
4105 * based on the handshake-phase that the channel is currently in.
4106 */
4107 static int
vgen_handshake(vgen_ldc_t * ldcp)4108 vgen_handshake(vgen_ldc_t *ldcp)
4109 {
4110 uint32_t hphase = ldcp->hphase;
4111 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4112 int rv = 0;
4113 timeout_id_t htid;
4114
4115 switch (hphase) {
4116
4117 case VH_PHASE1:
4118
4119 /*
4120 * start timer, for entire handshake process, turn this timer
4121 * off if all phases of handshake complete successfully and
4122 * hphase goes to VH_DONE(below) or channel is reset due to
4123 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4124 */
4125 ASSERT(ldcp->htid == 0);
4126 ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4127 drv_usectohz(vgen_hwd_interval * MICROSEC));
4128
4129 /* Phase 1 involves negotiating the version */
4130 rv = vgen_send_version_negotiate(ldcp);
4131 break;
4132
4133 case VH_PHASE2:
4134 rv = vgen_handshake_phase2(ldcp);
4135 break;
4136
4137 case VH_PHASE3:
4138 rv = vgen_handshake_phase3(ldcp);
4139 break;
4140
4141 case VH_PHASE4:
4142 rv = vgen_send_rdx_info(ldcp);
4143 break;
4144
4145 case VH_DONE:
4146
4147 ldcp->ldc_reset_count = 0;
4148
4149 DBG1(vgenp, ldcp, "Handshake Done\n");
4150
4151 /*
4152 * The channel is up and handshake is done successfully. Now we
4153 * can mark the channel link_state as 'up'. We also notify the
4154 * stack if the channel is connected to vswitch.
4155 */
4156 ldcp->link_state = LINK_STATE_UP;
4157
4158 if (ldcp->portp == vgenp->vsw_portp) {
4159 /*
4160 * If this channel(port) is connected to vsw,
4161 * need to sync multicast table with vsw.
4162 */
4163 rv = vgen_send_mcast_info(ldcp);
4164 if (rv != VGEN_SUCCESS)
4165 break;
4166
4167 if (vgenp->pls_negotiated == B_FALSE) {
4168 /*
4169 * We haven't negotiated with vswitch to get
4170 * physical link state updates. We can update
4171 * update the stack at this point as the
4172 * channel to vswitch is up and the handshake
4173 * is done successfully.
4174 *
4175 * If we have negotiated to get physical link
4176 * state updates, then we won't notify the
4177 * the stack here; we do that as soon as
4178 * vswitch sends us the initial phys link state
4179 * (see vgen_handle_physlink_info()).
4180 */
4181 mutex_exit(&ldcp->cblock);
4182 vgen_link_update(vgenp, ldcp->link_state);
4183 mutex_enter(&ldcp->cblock);
4184 }
4185 }
4186
4187 if (ldcp->htid != 0) {
4188 htid = ldcp->htid;
4189 ldcp->htid = 0;
4190
4191 mutex_exit(&ldcp->cblock);
4192 (void) untimeout(htid);
4193 mutex_enter(&ldcp->cblock);
4194 }
4195
4196 /*
4197 * Check if mac layer should be notified to restart
4198 * transmissions. This can happen if the channel got
4199 * reset and while tx_blocked is set.
4200 */
4201 mutex_enter(&ldcp->tclock);
4202 if (ldcp->tx_blocked) {
4203 vio_net_tx_update_t vtx_update =
4204 ldcp->portp->vcb.vio_net_tx_update;
4205
4206 ldcp->tx_blocked = B_FALSE;
4207 vtx_update(ldcp->portp->vhp);
4208 }
4209 mutex_exit(&ldcp->tclock);
4210
4211 /* start transmit watchdog timer */
4212 ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4213 drv_usectohz(vgen_txwd_interval * 1000));
4214
4215 break;
4216
4217 default:
4218 break;
4219 }
4220
4221 return (rv);
4222 }
4223
4224 /*
4225 * Check if the current handshake phase has completed successfully and
4226 * return the status.
4227 */
4228 static int
vgen_handshake_done(vgen_ldc_t * ldcp)4229 vgen_handshake_done(vgen_ldc_t *ldcp)
4230 {
4231 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4232 uint32_t hphase = ldcp->hphase;
4233 int status = 0;
4234
4235 switch (hphase) {
4236
4237 case VH_PHASE1:
4238 /*
4239 * Phase1 is done, if version negotiation
4240 * completed successfully.
4241 */
4242 status = ((ldcp->hstate & VER_NEGOTIATED) ==
4243 VER_NEGOTIATED);
4244 break;
4245
4246 case VH_PHASE2:
4247 /*
4248 * Phase 2 is done, if attr info
4249 * has been exchanged successfully.
4250 */
4251 status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4252 ATTR_INFO_EXCHANGED);
4253 break;
4254
4255 case VH_PHASE3:
4256 /*
4257 * Phase 3 is done, if dring registration
4258 * has been exchanged successfully.
4259 */
4260 status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4261 DRING_INFO_EXCHANGED);
4262 break;
4263
4264 case VH_PHASE4:
4265 /* Phase 4 is done, if rdx msg has been exchanged */
4266 status = ((ldcp->hstate & RDX_EXCHANGED) ==
4267 RDX_EXCHANGED);
4268 break;
4269
4270 default:
4271 break;
4272 }
4273
4274 if (status == 0) {
4275 return (VGEN_FAILURE);
4276 }
4277 DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4278 return (VGEN_SUCCESS);
4279 }
4280
4281 /*
4282 * Link State Update Notes:
4283 * The link state of the channel connected to vswitch is reported as the link
4284 * state of the vnet device, by default. If the channel is down or reset, then
4285 * the link state is marked 'down'. If the channel is 'up' *and* handshake
4286 * between the vnet and vswitch is successful, then the link state is marked
4287 * 'up'. If physical network link state is desired, then the vnet device must
4288 * be configured to get physical link updates and the 'linkprop' property
4289 * in the virtual-device MD node indicates this. As part of attribute exchange
4290 * the vnet device negotiates with the vswitch to obtain physical link state
4291 * updates. If it successfully negotiates, vswitch sends an initial physlink
4292 * msg once the handshake is done and further whenever the physical link state
4293 * changes. Currently we don't have mac layer interfaces to report two distinct
4294 * link states - virtual and physical. Thus, if the vnet has been configured to
4295 * get physical link updates, then the link status will be reported as 'up'
4296 * only when both the virtual and physical links are up.
4297 */
4298 static void
vgen_link_update(vgen_t * vgenp,link_state_t link_state)4299 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4300 {
4301 vnet_link_update(vgenp->vnetp, link_state);
4302 }
4303
4304 /*
4305 * Handle a version info msg from the peer or an ACK/NACK from the peer
4306 * to a version info msg that we sent.
4307 */
4308 static int
vgen_handle_version_negotiate(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4309 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4310 {
4311 vgen_t *vgenp;
4312 vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp;
4313 int ack = 0;
4314 int failed = 0;
4315 int idx;
4316 vgen_ver_t *versions = ldcp->vgen_versions;
4317 int rv = 0;
4318
4319 vgenp = LDC_TO_VGEN(ldcp);
4320 DBG1(vgenp, ldcp, "enter\n");
4321 switch (tagp->vio_subtype) {
4322 case VIO_SUBTYPE_INFO:
4323
4324 /* Cache sid of peer if this is the first time */
4325 if (ldcp->peer_sid == 0) {
4326 DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4327 tagp->vio_sid);
4328 ldcp->peer_sid = tagp->vio_sid;
4329 }
4330
4331 if (ldcp->hphase != VH_PHASE1) {
4332 /*
4333 * If we are not already in VH_PHASE1, reset to
4334 * pre-handshake state, and initiate handshake
4335 * to the peer too.
4336 */
4337 return (EINVAL);
4338 }
4339
4340 ldcp->hstate |= VER_INFO_RCVD;
4341
4342 /* save peer's requested values */
4343 ldcp->peer_hparams.ver_major = vermsg->ver_major;
4344 ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4345 ldcp->peer_hparams.dev_class = vermsg->dev_class;
4346
4347 if ((vermsg->dev_class != VDEV_NETWORK) &&
4348 (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4349 /* unsupported dev_class, send NACK */
4350
4351 DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4352
4353 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4354 tagp->vio_sid = ldcp->local_sid;
4355 /* send reply msg back to peer */
4356 rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4357 sizeof (*vermsg), B_FALSE);
4358 if (rv != VGEN_SUCCESS) {
4359 return (rv);
4360 }
4361 return (VGEN_FAILURE);
4362 }
4363
4364 DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4365 vermsg->ver_major, vermsg->ver_minor);
4366
4367 idx = 0;
4368
4369 for (;;) {
4370
4371 if (vermsg->ver_major > versions[idx].ver_major) {
4372
4373 /* nack with next lower version */
4374 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4375 vermsg->ver_major = versions[idx].ver_major;
4376 vermsg->ver_minor = versions[idx].ver_minor;
4377 break;
4378 }
4379
4380 if (vermsg->ver_major == versions[idx].ver_major) {
4381
4382 /* major version match - ACK version */
4383 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4384 ack = 1;
4385
4386 /*
4387 * lower minor version to the one this endpt
4388 * supports, if necessary
4389 */
4390 if (vermsg->ver_minor >
4391 versions[idx].ver_minor) {
4392 vermsg->ver_minor =
4393 versions[idx].ver_minor;
4394 ldcp->peer_hparams.ver_minor =
4395 versions[idx].ver_minor;
4396 }
4397 break;
4398 }
4399
4400 idx++;
4401
4402 if (idx == VGEN_NUM_VER) {
4403
4404 /* no version match - send NACK */
4405 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4406 vermsg->ver_major = 0;
4407 vermsg->ver_minor = 0;
4408 failed = 1;
4409 break;
4410 }
4411
4412 }
4413
4414 tagp->vio_sid = ldcp->local_sid;
4415
4416 /* send reply msg back to peer */
4417 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4418 B_FALSE);
4419 if (rv != VGEN_SUCCESS) {
4420 return (rv);
4421 }
4422
4423 if (ack) {
4424 ldcp->hstate |= VER_ACK_SENT;
4425 DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4426 vermsg->ver_major, vermsg->ver_minor);
4427 }
4428 if (failed) {
4429 DWARN(vgenp, ldcp, "Negotiation Failed\n");
4430 return (VGEN_FAILURE);
4431 }
4432 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4433
4434 /* VER_ACK_SENT and VER_ACK_RCVD */
4435
4436 /* local and peer versions match? */
4437 ASSERT((ldcp->local_hparams.ver_major ==
4438 ldcp->peer_hparams.ver_major) &&
4439 (ldcp->local_hparams.ver_minor ==
4440 ldcp->peer_hparams.ver_minor));
4441
4442 vgen_set_vnet_proto_ops(ldcp);
4443
4444 /* move to the next phase */
4445 rv = vgen_handshake(vh_nextphase(ldcp));
4446 if (rv != 0) {
4447 return (rv);
4448 }
4449 }
4450
4451 break;
4452
4453 case VIO_SUBTYPE_ACK:
4454
4455 if (ldcp->hphase != VH_PHASE1) {
4456 /* This should not happen. */
4457 DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4458 return (VGEN_FAILURE);
4459 }
4460
4461 /* SUCCESS - we have agreed on a version */
4462 ldcp->local_hparams.ver_major = vermsg->ver_major;
4463 ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4464 ldcp->hstate |= VER_ACK_RCVD;
4465
4466 DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4467 vermsg->ver_major, vermsg->ver_minor);
4468
4469 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4470
4471 /* VER_ACK_SENT and VER_ACK_RCVD */
4472
4473 /* local and peer versions match? */
4474 ASSERT((ldcp->local_hparams.ver_major ==
4475 ldcp->peer_hparams.ver_major) &&
4476 (ldcp->local_hparams.ver_minor ==
4477 ldcp->peer_hparams.ver_minor));
4478
4479 vgen_set_vnet_proto_ops(ldcp);
4480
4481 /* move to the next phase */
4482 rv = vgen_handshake(vh_nextphase(ldcp));
4483 if (rv != 0) {
4484 return (rv);
4485 }
4486 }
4487 break;
4488
4489 case VIO_SUBTYPE_NACK:
4490
4491 if (ldcp->hphase != VH_PHASE1) {
4492 /* This should not happen. */
4493 DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4494 "Phase(%u)\n", ldcp->hphase);
4495 return (VGEN_FAILURE);
4496 }
4497
4498 DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4499 vermsg->ver_major, vermsg->ver_minor);
4500
4501 /* check if version in NACK is zero */
4502 if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4503 /*
4504 * Version Negotiation has failed.
4505 */
4506 DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4507 return (VGEN_FAILURE);
4508 }
4509
4510 idx = 0;
4511
4512 for (;;) {
4513
4514 if (vermsg->ver_major > versions[idx].ver_major) {
4515 /* select next lower version */
4516
4517 ldcp->local_hparams.ver_major =
4518 versions[idx].ver_major;
4519 ldcp->local_hparams.ver_minor =
4520 versions[idx].ver_minor;
4521 break;
4522 }
4523
4524 if (vermsg->ver_major == versions[idx].ver_major) {
4525 /* major version match */
4526
4527 ldcp->local_hparams.ver_major =
4528 versions[idx].ver_major;
4529
4530 ldcp->local_hparams.ver_minor =
4531 versions[idx].ver_minor;
4532 break;
4533 }
4534
4535 idx++;
4536
4537 if (idx == VGEN_NUM_VER) {
4538 /*
4539 * no version match.
4540 * Version Negotiation has failed.
4541 */
4542 DWARN(vgenp, ldcp,
4543 "Version Negotiation Failed\n");
4544 return (VGEN_FAILURE);
4545 }
4546
4547 }
4548
4549 rv = vgen_send_version_negotiate(ldcp);
4550 if (rv != VGEN_SUCCESS) {
4551 return (rv);
4552 }
4553
4554 break;
4555 }
4556
4557 DBG1(vgenp, ldcp, "exit\n");
4558 return (VGEN_SUCCESS);
4559 }
4560
4561 static int
vgen_handle_attr_info(vgen_ldc_t * ldcp,vnet_attr_msg_t * msg)4562 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4563 {
4564 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4565 vgen_hparams_t *lp = &ldcp->local_hparams;
4566 vgen_hparams_t *rp = &ldcp->peer_hparams;
4567 uint32_t mtu;
4568 uint8_t dring_mode;
4569
4570 ldcp->hstate |= ATTR_INFO_RCVD;
4571
4572 /* save peer's values */
4573 rp->mtu = msg->mtu;
4574 rp->addr = msg->addr;
4575 rp->addr_type = msg->addr_type;
4576 rp->xfer_mode = msg->xfer_mode;
4577 rp->ack_freq = msg->ack_freq;
4578 rp->dring_mode = msg->options;
4579
4580 /*
4581 * Process address type, ack frequency and transfer mode attributes.
4582 */
4583 if ((msg->addr_type != ADDR_TYPE_MAC) ||
4584 (msg->ack_freq > 64) ||
4585 (msg->xfer_mode != lp->xfer_mode)) {
4586 return (VGEN_FAILURE);
4587 }
4588
4589 /*
4590 * Process dring mode attribute.
4591 */
4592 if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4593 /*
4594 * Versions >= 1.6:
4595 * Though we are operating in v1.6 mode, it is possible that
4596 * RxDringData mode has been disabled either on this guest or
4597 * on the peer guest. If so, we revert to pre v1.6 behavior of
4598 * TxDring mode. But this must be agreed upon in both
4599 * directions of attr exchange. We first determine the mode
4600 * that can be negotiated.
4601 */
4602 if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4603 vgen_mapin_avail(ldcp) == B_TRUE) {
4604 /*
4605 * We are capable of handling RxDringData AND the peer
4606 * is also capable of it; we enable RxDringData mode on
4607 * this channel.
4608 */
4609 dring_mode = VIO_RX_DRING_DATA;
4610 } else if ((msg->options & VIO_TX_DRING) != 0) {
4611 /*
4612 * If the peer is capable of TxDring mode, we
4613 * negotiate TxDring mode on this channel.
4614 */
4615 dring_mode = VIO_TX_DRING;
4616 } else {
4617 /*
4618 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4619 * modes. We don't support VIO_RX_DRING mode.
4620 */
4621 return (VGEN_FAILURE);
4622 }
4623
4624 /*
4625 * If we have received an ack for the attr info that we sent,
4626 * then check if the dring mode matches what the peer had ack'd
4627 * (saved in local hparams). If they don't match, we fail the
4628 * handshake.
4629 */
4630 if (ldcp->hstate & ATTR_ACK_RCVD) {
4631 if (msg->options != lp->dring_mode) {
4632 /* send NACK */
4633 return (VGEN_FAILURE);
4634 }
4635 } else {
4636 /*
4637 * Save the negotiated dring mode in our attr
4638 * parameters, so it gets sent in the attr info from us
4639 * to the peer.
4640 */
4641 lp->dring_mode = dring_mode;
4642 }
4643
4644 /* save the negotiated dring mode in the msg to be replied */
4645 msg->options = dring_mode;
4646 }
4647
4648 /*
4649 * Process MTU attribute.
4650 */
4651 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4652 /*
4653 * Versions >= 1.4:
4654 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4655 * is negotiated down to the minimum of our mtu and peer's mtu.
4656 */
4657 if (msg->mtu < ETHERMAX) {
4658 return (VGEN_FAILURE);
4659 }
4660
4661 mtu = MIN(msg->mtu, vgenp->max_frame_size);
4662
4663 /*
4664 * If we have received an ack for the attr info
4665 * that we sent, then check if the mtu computed
4666 * above matches the mtu that the peer had ack'd
4667 * (saved in local hparams). If they don't
4668 * match, we fail the handshake.
4669 */
4670 if (ldcp->hstate & ATTR_ACK_RCVD) {
4671 if (mtu != lp->mtu) {
4672 /* send NACK */
4673 return (VGEN_FAILURE);
4674 }
4675 } else {
4676 /*
4677 * Save the mtu computed above in our
4678 * attr parameters, so it gets sent in
4679 * the attr info from us to the peer.
4680 */
4681 lp->mtu = mtu;
4682 }
4683
4684 /* save the MIN mtu in the msg to be replied */
4685 msg->mtu = mtu;
4686
4687 } else {
4688 /* versions < 1.4, mtu must match */
4689 if (msg->mtu != lp->mtu) {
4690 return (VGEN_FAILURE);
4691 }
4692 }
4693
4694 return (VGEN_SUCCESS);
4695 }
4696
4697 static int
vgen_handle_attr_ack(vgen_ldc_t * ldcp,vnet_attr_msg_t * msg)4698 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4699 {
4700 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4701 vgen_hparams_t *lp = &ldcp->local_hparams;
4702
4703 /*
4704 * Process dring mode attribute.
4705 */
4706 if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4707 /*
4708 * Versions >= 1.6:
4709 * The ack msg sent by the peer contains the negotiated dring
4710 * mode between our capability (that we had sent in our attr
4711 * info) and the peer's capability.
4712 */
4713 if (ldcp->hstate & ATTR_ACK_SENT) {
4714 /*
4715 * If we have sent an ack for the attr info msg from
4716 * the peer, check if the dring mode that was
4717 * negotiated then (saved in local hparams) matches the
4718 * mode that the peer has ack'd. If they don't match,
4719 * we fail the handshake.
4720 */
4721 if (lp->dring_mode != msg->options) {
4722 return (VGEN_FAILURE);
4723 }
4724 } else {
4725 if ((msg->options & lp->dring_mode) == 0) {
4726 /*
4727 * Peer ack'd with a mode that we don't
4728 * support; we fail the handshake.
4729 */
4730 return (VGEN_FAILURE);
4731 }
4732 if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4733 == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4734 /*
4735 * Peer must ack with only one negotiated mode.
4736 * Otherwise fail handshake.
4737 */
4738 return (VGEN_FAILURE);
4739 }
4740
4741 /*
4742 * Save the negotiated mode, so we can validate it when
4743 * we receive attr info from the peer.
4744 */
4745 lp->dring_mode = msg->options;
4746 }
4747 }
4748
4749 /*
4750 * Process Physical Link Update attribute.
4751 */
4752 if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4753 ldcp->portp == vgenp->vsw_portp) {
4754 /*
4755 * Versions >= 1.5:
4756 * If the vnet device has been configured to get
4757 * physical link state updates, check the corresponding
4758 * bits in the ack msg, if the peer is vswitch.
4759 */
4760 if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4761 PHYSLINK_UPDATE_STATE) &&
4762 ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4763 PHYSLINK_UPDATE_STATE_ACK)) {
4764 vgenp->pls_negotiated = B_TRUE;
4765 } else {
4766 vgenp->pls_negotiated = B_FALSE;
4767 }
4768 }
4769
4770 /*
4771 * Process MTU attribute.
4772 */
4773 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4774 /*
4775 * Versions >= 1.4:
4776 * The ack msg sent by the peer contains the minimum of
4777 * our mtu (that we had sent in our attr info) and the
4778 * peer's mtu.
4779 *
4780 * If we have sent an ack for the attr info msg from
4781 * the peer, check if the mtu that was computed then
4782 * (saved in local hparams) matches the mtu that the
4783 * peer has ack'd. If they don't match, we fail the
4784 * handshake.
4785 */
4786 if (ldcp->hstate & ATTR_ACK_SENT) {
4787 if (lp->mtu != msg->mtu) {
4788 return (VGEN_FAILURE);
4789 }
4790 } else {
4791 /*
4792 * If the mtu ack'd by the peer is > our mtu
4793 * fail handshake. Otherwise, save the mtu, so
4794 * we can validate it when we receive attr info
4795 * from our peer.
4796 */
4797 if (msg->mtu > lp->mtu) {
4798 return (VGEN_FAILURE);
4799 }
4800 if (msg->mtu <= lp->mtu) {
4801 lp->mtu = msg->mtu;
4802 }
4803 }
4804 }
4805
4806 return (VGEN_SUCCESS);
4807 }
4808
4809
4810 /*
4811 * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4812 * to an attr info msg that we sent.
4813 */
4814 static int
vgen_handle_attr_msg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4815 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4816 {
4817 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4818 vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp;
4819 int rv = 0;
4820
4821 DBG1(vgenp, ldcp, "enter\n");
4822 if (ldcp->hphase != VH_PHASE2) {
4823 DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4824 " Invalid Phase(%u)\n",
4825 tagp->vio_subtype, ldcp->hphase);
4826 return (VGEN_FAILURE);
4827 }
4828 switch (tagp->vio_subtype) {
4829 case VIO_SUBTYPE_INFO:
4830
4831 rv = vgen_handle_attr_info(ldcp, msg);
4832 if (rv == VGEN_SUCCESS) {
4833 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4834 } else {
4835 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4836 }
4837 tagp->vio_sid = ldcp->local_sid;
4838
4839 /* send reply msg back to peer */
4840 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4841 B_FALSE);
4842 if (rv != VGEN_SUCCESS) {
4843 return (rv);
4844 }
4845
4846 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4847 DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4848 break;
4849 }
4850
4851 ldcp->hstate |= ATTR_ACK_SENT;
4852 DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4853 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4854 rv = vgen_handshake(vh_nextphase(ldcp));
4855 if (rv != 0) {
4856 return (rv);
4857 }
4858 }
4859
4860 break;
4861
4862 case VIO_SUBTYPE_ACK:
4863
4864 rv = vgen_handle_attr_ack(ldcp, msg);
4865 if (rv == VGEN_FAILURE) {
4866 break;
4867 }
4868
4869 ldcp->hstate |= ATTR_ACK_RCVD;
4870 DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4871
4872 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4873 rv = vgen_handshake(vh_nextphase(ldcp));
4874 if (rv != 0) {
4875 return (rv);
4876 }
4877 }
4878 break;
4879
4880 case VIO_SUBTYPE_NACK:
4881
4882 DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4883 return (VGEN_FAILURE);
4884 }
4885 DBG1(vgenp, ldcp, "exit\n");
4886 return (VGEN_SUCCESS);
4887 }
4888
4889 static int
vgen_handle_dring_reg_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4890 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4891 {
4892 int rv = 0;
4893 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4894 vgen_hparams_t *lp = &ldcp->local_hparams;
4895
4896 DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4897 ldcp->hstate |= DRING_INFO_RCVD;
4898
4899 if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4900 (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4901 /*
4902 * The earlier version of Solaris vnet driver doesn't set the
4903 * option (VIO_TX_DRING in its case) correctly in its dring reg
4904 * message. We workaround that here by doing the check only
4905 * for versions >= v1.6.
4906 */
4907 DWARN(vgenp, ldcp,
4908 "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4909 ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4910 return (VGEN_FAILURE);
4911 }
4912
4913 /*
4914 * Map dring exported by the peer.
4915 */
4916 rv = vgen_map_dring(ldcp, (void *)tagp);
4917 if (rv != VGEN_SUCCESS) {
4918 return (rv);
4919 }
4920
4921 /*
4922 * Map data buffers exported by the peer if we are in RxDringData mode.
4923 */
4924 if (lp->dring_mode == VIO_RX_DRING_DATA) {
4925 rv = vgen_map_data(ldcp, (void *)tagp);
4926 if (rv != VGEN_SUCCESS) {
4927 vgen_unmap_dring(ldcp);
4928 return (rv);
4929 }
4930 }
4931
4932 if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4933 ldcp->peer_hparams.dring_ready = B_TRUE;
4934 }
4935
4936 return (VGEN_SUCCESS);
4937 }
4938
4939 static int
vgen_handle_dring_reg_ack(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4940 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4941 {
4942 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4943 vgen_hparams_t *lp = &ldcp->local_hparams;
4944
4945 DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4946 ldcp->hstate |= DRING_ACK_RCVD;
4947
4948 if (lp->dring_ready) {
4949 return (VGEN_SUCCESS);
4950 }
4951
4952 /* save dring_ident acked by peer */
4953 lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4954
4955 /* local dring is now ready */
4956 lp->dring_ready = B_TRUE;
4957
4958 return (VGEN_SUCCESS);
4959 }
4960
4961 /*
4962 * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4963 * the peer to a dring register msg that we sent.
4964 */
4965 static int
vgen_handle_dring_reg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4966 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4967 {
4968 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4969 int rv = 0;
4970 int msgsize;
4971 vgen_hparams_t *lp = &ldcp->local_hparams;
4972
4973 DBG1(vgenp, ldcp, "enter\n");
4974 if (ldcp->hphase < VH_PHASE2) {
4975 /* dring_info can be rcvd in any of the phases after Phase1 */
4976 DWARN(vgenp, ldcp,
4977 "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4978 tagp->vio_subtype, ldcp->hphase);
4979 return (VGEN_FAILURE);
4980 }
4981
4982 switch (tagp->vio_subtype) {
4983 case VIO_SUBTYPE_INFO:
4984
4985 rv = vgen_handle_dring_reg_info(ldcp, tagp);
4986 if (rv == VGEN_SUCCESS) {
4987 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4988 } else {
4989 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4990 }
4991
4992 tagp->vio_sid = ldcp->local_sid;
4993
4994 if (lp->dring_mode == VIO_RX_DRING_DATA) {
4995 msgsize =
4996 VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
4997 } else {
4998 msgsize = sizeof (vio_dring_reg_msg_t);
4999 }
5000
5001 /* send reply msg back to peer */
5002 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5003 B_FALSE);
5004 if (rv != VGEN_SUCCESS) {
5005 return (rv);
5006 }
5007
5008 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5009 DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5010 return (VGEN_FAILURE);
5011 }
5012
5013 ldcp->hstate |= DRING_ACK_SENT;
5014 DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5015
5016 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5017 rv = vgen_handshake(vh_nextphase(ldcp));
5018 if (rv != 0) {
5019 return (rv);
5020 }
5021 }
5022 break;
5023
5024 case VIO_SUBTYPE_ACK:
5025
5026 rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5027 if (rv == VGEN_FAILURE) {
5028 return (rv);
5029 }
5030
5031 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5032 rv = vgen_handshake(vh_nextphase(ldcp));
5033 if (rv != 0) {
5034 return (rv);
5035 }
5036 }
5037
5038 break;
5039
5040 case VIO_SUBTYPE_NACK:
5041
5042 DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5043 return (VGEN_FAILURE);
5044 }
5045 DBG1(vgenp, ldcp, "exit\n");
5046 return (VGEN_SUCCESS);
5047 }
5048
5049 /*
5050 * Handle a rdx info msg from the peer or an ACK/NACK
5051 * from the peer to a rdx info msg that we sent.
5052 */
5053 static int
vgen_handle_rdx_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5054 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5055 {
5056 int rv = 0;
5057 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5058
5059 DBG1(vgenp, ldcp, "enter\n");
5060 if (ldcp->hphase != VH_PHASE4) {
5061 DWARN(vgenp, ldcp,
5062 "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5063 tagp->vio_subtype, ldcp->hphase);
5064 return (VGEN_FAILURE);
5065 }
5066 switch (tagp->vio_subtype) {
5067 case VIO_SUBTYPE_INFO:
5068
5069 DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5070 ldcp->hstate |= RDX_INFO_RCVD;
5071
5072 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5073 tagp->vio_sid = ldcp->local_sid;
5074 /* send reply msg back to peer */
5075 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5076 B_FALSE);
5077 if (rv != VGEN_SUCCESS) {
5078 return (rv);
5079 }
5080
5081 ldcp->hstate |= RDX_ACK_SENT;
5082 DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5083
5084 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5085 rv = vgen_handshake(vh_nextphase(ldcp));
5086 if (rv != 0) {
5087 return (rv);
5088 }
5089 }
5090
5091 break;
5092
5093 case VIO_SUBTYPE_ACK:
5094
5095 ldcp->hstate |= RDX_ACK_RCVD;
5096
5097 DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5098
5099 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5100 rv = vgen_handshake(vh_nextphase(ldcp));
5101 if (rv != 0) {
5102 return (rv);
5103 }
5104 }
5105 break;
5106
5107 case VIO_SUBTYPE_NACK:
5108
5109 DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5110 return (VGEN_FAILURE);
5111 }
5112 DBG1(vgenp, ldcp, "exit\n");
5113 return (VGEN_SUCCESS);
5114 }
5115
5116 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5117 static int
vgen_handle_mcast_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5118 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5119 {
5120 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5121 vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5122 struct ether_addr *addrp;
5123 int count;
5124 int i;
5125
5126 DBG1(vgenp, ldcp, "enter\n");
5127 switch (tagp->vio_subtype) {
5128
5129 case VIO_SUBTYPE_INFO:
5130
5131 /* vnet shouldn't recv set mcast msg, only vsw handles it */
5132 DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5133 break;
5134
5135 case VIO_SUBTYPE_ACK:
5136
5137 /* success adding/removing multicast addr */
5138 DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5139 break;
5140
5141 case VIO_SUBTYPE_NACK:
5142
5143 DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5144 if (!(msgp->set)) {
5145 /* multicast remove request failed */
5146 break;
5147 }
5148
5149 /* multicast add request failed */
5150 for (count = 0; count < msgp->count; count++) {
5151 addrp = &(msgp->mca[count]);
5152
5153 /* delete address from the table */
5154 for (i = 0; i < vgenp->mccount; i++) {
5155 if (ether_cmp(addrp,
5156 &(vgenp->mctab[i])) == 0) {
5157 if (vgenp->mccount > 1) {
5158 int t = vgenp->mccount - 1;
5159 vgenp->mctab[i] =
5160 vgenp->mctab[t];
5161 }
5162 vgenp->mccount--;
5163 break;
5164 }
5165 }
5166 }
5167 break;
5168
5169 }
5170 DBG1(vgenp, ldcp, "exit\n");
5171
5172 return (VGEN_SUCCESS);
5173 }
5174
5175 /*
5176 * Physical link information message from the peer. Only vswitch should send
5177 * us this message; if the vnet device has been configured to get physical link
5178 * state updates. Note that we must have already negotiated this with the
5179 * vswitch during attribute exchange phase of handshake.
5180 */
5181 static int
vgen_handle_physlink_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5182 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5183 {
5184 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5185 vnet_physlink_msg_t *msgp = (vnet_physlink_msg_t *)tagp;
5186 link_state_t link_state;
5187 int rv;
5188
5189 if (ldcp->portp != vgenp->vsw_portp) {
5190 /*
5191 * drop the message and don't process; as we should
5192 * receive physlink_info message from only vswitch.
5193 */
5194 return (VGEN_SUCCESS);
5195 }
5196
5197 if (vgenp->pls_negotiated == B_FALSE) {
5198 /*
5199 * drop the message and don't process; as we should receive
5200 * physlink_info message only if physlink update is enabled for
5201 * the device and negotiated with vswitch.
5202 */
5203 return (VGEN_SUCCESS);
5204 }
5205
5206 switch (tagp->vio_subtype) {
5207
5208 case VIO_SUBTYPE_INFO:
5209
5210 if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5211 VNET_PHYSLINK_STATE_UP) {
5212 link_state = LINK_STATE_UP;
5213 } else {
5214 link_state = LINK_STATE_DOWN;
5215 }
5216
5217 if (vgenp->phys_link_state != link_state) {
5218 vgenp->phys_link_state = link_state;
5219 mutex_exit(&ldcp->cblock);
5220
5221 /* Now update the stack */
5222 vgen_link_update(vgenp, link_state);
5223
5224 mutex_enter(&ldcp->cblock);
5225 }
5226
5227 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5228 tagp->vio_sid = ldcp->local_sid;
5229
5230 /* send reply msg back to peer */
5231 rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5232 sizeof (vnet_physlink_msg_t), B_FALSE);
5233 if (rv != VGEN_SUCCESS) {
5234 return (rv);
5235 }
5236 break;
5237
5238 case VIO_SUBTYPE_ACK:
5239
5240 /* vnet shouldn't recv physlink acks */
5241 DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5242 break;
5243
5244 case VIO_SUBTYPE_NACK:
5245
5246 /* vnet shouldn't recv physlink nacks */
5247 DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5248 break;
5249
5250 }
5251 DBG1(vgenp, ldcp, "exit\n");
5252
5253 return (VGEN_SUCCESS);
5254 }
5255
5256 /* handler for control messages received from the peer ldc end-point */
5257 static int
vgen_handle_ctrlmsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5258 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5259 {
5260 int rv = 0;
5261 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5262
5263 DBG1(vgenp, ldcp, "enter\n");
5264 switch (tagp->vio_subtype_env) {
5265
5266 case VIO_VER_INFO:
5267 rv = vgen_handle_version_negotiate(ldcp, tagp);
5268 break;
5269
5270 case VIO_ATTR_INFO:
5271 rv = vgen_handle_attr_msg(ldcp, tagp);
5272 break;
5273
5274 case VIO_DRING_REG:
5275 rv = vgen_handle_dring_reg(ldcp, tagp);
5276 break;
5277
5278 case VIO_RDX:
5279 rv = vgen_handle_rdx_info(ldcp, tagp);
5280 break;
5281
5282 case VNET_MCAST_INFO:
5283 rv = vgen_handle_mcast_info(ldcp, tagp);
5284 break;
5285
5286 case VIO_DDS_INFO:
5287 /*
5288 * If we are in the process of resetting the vswitch channel,
5289 * drop the dds message. A new handshake will be initiated
5290 * when the channel comes back up after the reset and dds
5291 * negotiation can then continue.
5292 */
5293 if (ldcp->reset_in_progress == 1) {
5294 break;
5295 }
5296 rv = vgen_dds_rx(ldcp, tagp);
5297 break;
5298
5299 case VNET_PHYSLINK_INFO:
5300 rv = vgen_handle_physlink_info(ldcp, tagp);
5301 break;
5302 }
5303
5304 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5305 return (rv);
5306 }
5307
5308 /* handler for error messages received from the peer ldc end-point */
5309 static void
vgen_handle_errmsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5310 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5311 {
5312 _NOTE(ARGUNUSED(ldcp, tagp))
5313 }
5314
5315 /*
5316 * This function handles raw pkt data messages received over the channel.
5317 * Currently, only priority-eth-type frames are received through this mechanism.
5318 * In this case, the frame(data) is present within the message itself which
5319 * is copied into an mblk before sending it up the stack.
5320 */
5321 void
vgen_handle_pkt_data(void * arg1,void * arg2,uint32_t msglen)5322 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5323 {
5324 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5325 vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2;
5326 uint32_t size;
5327 mblk_t *mp;
5328 vio_mblk_t *vmp;
5329 vio_net_rx_cb_t vrx_cb = NULL;
5330 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5331 vgen_stats_t *statsp = &ldcp->stats;
5332 vgen_hparams_t *lp = &ldcp->local_hparams;
5333 uint_t dring_mode = lp->dring_mode;
5334
5335 ASSERT(MUTEX_HELD(&ldcp->cblock));
5336
5337 mutex_exit(&ldcp->cblock);
5338
5339 size = msglen - VIO_PKT_DATA_HDRSIZE;
5340 if (size < ETHERMIN || size > lp->mtu) {
5341 (void) atomic_inc_32(&statsp->rx_pri_fail);
5342 mutex_enter(&ldcp->cblock);
5343 return;
5344 }
5345
5346 vmp = vio_multipool_allocb(&ldcp->vmp, size);
5347 if (vmp == NULL) {
5348 mp = allocb(size, BPRI_MED);
5349 if (mp == NULL) {
5350 (void) atomic_inc_32(&statsp->rx_pri_fail);
5351 DWARN(vgenp, ldcp, "allocb failure, "
5352 "unable to process priority frame\n");
5353 mutex_enter(&ldcp->cblock);
5354 return;
5355 }
5356 } else {
5357 mp = vmp->mp;
5358 }
5359
5360 /* copy the frame from the payload of raw data msg into the mblk */
5361 bcopy(pkt->data, mp->b_rptr, size);
5362 mp->b_wptr = mp->b_rptr + size;
5363
5364 if (vmp != NULL) {
5365 vmp->state = VIO_MBLK_HAS_DATA;
5366 }
5367
5368 /* update stats */
5369 (void) atomic_inc_64(&statsp->rx_pri_packets);
5370 (void) atomic_add_64(&statsp->rx_pri_bytes, size);
5371
5372 /*
5373 * If polling is currently enabled, add the packet to the priority
5374 * packets list and return. It will be picked up by the polling thread.
5375 */
5376 if (dring_mode == VIO_RX_DRING_DATA) {
5377 mutex_enter(&ldcp->rxlock);
5378 } else {
5379 mutex_enter(&ldcp->pollq_lock);
5380 }
5381
5382 if (ldcp->polling_on == B_TRUE) {
5383 if (ldcp->rx_pri_tail != NULL) {
5384 ldcp->rx_pri_tail->b_next = mp;
5385 } else {
5386 ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5387 }
5388 } else {
5389 vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5390 }
5391
5392 if (dring_mode == VIO_RX_DRING_DATA) {
5393 mutex_exit(&ldcp->rxlock);
5394 } else {
5395 mutex_exit(&ldcp->pollq_lock);
5396 }
5397
5398 if (vrx_cb != NULL) {
5399 vrx_cb(ldcp->portp->vhp, mp);
5400 }
5401
5402 mutex_enter(&ldcp->cblock);
5403 }
5404
5405 /*
5406 * dummy pkt data handler function for vnet protocol version 1.0
5407 */
5408 static void
vgen_handle_pkt_data_nop(void * arg1,void * arg2,uint32_t msglen)5409 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5410 {
5411 _NOTE(ARGUNUSED(arg1, arg2, msglen))
5412 }
5413
5414 /* handler for data messages received from the peer ldc end-point */
5415 static int
vgen_handle_datamsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp,uint32_t msglen)5416 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5417 {
5418 int rv = 0;
5419 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5420 vgen_hparams_t *lp = &ldcp->local_hparams;
5421
5422 DBG1(vgenp, ldcp, "enter\n");
5423
5424 if (ldcp->hphase != VH_DONE) {
5425 return (0);
5426 }
5427
5428 /*
5429 * We check the data msg seqnum. This is needed only in TxDring mode.
5430 */
5431 if (lp->dring_mode == VIO_TX_DRING &&
5432 tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5433 rv = vgen_check_datamsg_seq(ldcp, tagp);
5434 if (rv != 0) {
5435 return (rv);
5436 }
5437 }
5438
5439 switch (tagp->vio_subtype_env) {
5440 case VIO_DRING_DATA:
5441 rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5442 break;
5443
5444 case VIO_PKT_DATA:
5445 ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5446 break;
5447 default:
5448 break;
5449 }
5450
5451 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5452 return (rv);
5453 }
5454
5455
5456 static int
vgen_ldc_reset(vgen_ldc_t * ldcp,vgen_caller_t caller)5457 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5458 {
5459 int rv;
5460
5461 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5462 ASSERT(MUTEX_HELD(&ldcp->cblock));
5463 }
5464
5465 /* Set the flag to indicate reset is in progress */
5466 if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5467 /* another thread is already in the process of resetting */
5468 return (EBUSY);
5469 }
5470
5471 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5472 mutex_exit(&ldcp->cblock);
5473 }
5474
5475 rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5476
5477 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5478 mutex_enter(&ldcp->cblock);
5479 }
5480
5481 return (rv);
5482 }
5483
5484 static void
vgen_ldc_up(vgen_ldc_t * ldcp)5485 vgen_ldc_up(vgen_ldc_t *ldcp)
5486 {
5487 int rv;
5488 uint32_t retries = 0;
5489 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5490
5491 ASSERT(MUTEX_HELD(&ldcp->cblock));
5492
5493 /*
5494 * If the channel has been reset max # of times, without successfully
5495 * completing handshake, stop and do not bring the channel up.
5496 */
5497 if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5498 cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5499 " handshake attempts (%d) on channel %ld",
5500 vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5501 return;
5502 }
5503 ldcp->ldc_reset_count++;
5504
5505 do {
5506 rv = ldc_up(ldcp->ldc_handle);
5507 if ((rv != 0) && (rv == EWOULDBLOCK)) {
5508 drv_usecwait(VGEN_LDC_UP_DELAY);
5509 }
5510 if (retries++ >= vgen_ldcup_retries)
5511 break;
5512 } while (rv == EWOULDBLOCK);
5513
5514 if (rv != 0) {
5515 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5516 }
5517 }
5518
5519 int
vgen_enable_intr(void * arg)5520 vgen_enable_intr(void *arg)
5521 {
5522 uint32_t end_ix;
5523 vio_dring_msg_t msg;
5524 vgen_port_t *portp = (vgen_port_t *)arg;
5525 vgen_ldc_t *ldcp = portp->ldcp;
5526 vgen_hparams_t *lp = &ldcp->local_hparams;
5527
5528 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5529 mutex_enter(&ldcp->rxlock);
5530
5531 ldcp->polling_on = B_FALSE;
5532 /*
5533 * We send a stopped message to peer (sender) as we are turning
5534 * off polled mode. This effectively restarts data interrupts
5535 * by allowing the peer to send further dring data msgs to us.
5536 */
5537 end_ix = ldcp->next_rxi;
5538 DECR_RXI(end_ix, ldcp);
5539 msg.dring_ident = ldcp->peer_hparams.dring_ident;
5540 (void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5541 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5542
5543 mutex_exit(&ldcp->rxlock);
5544 } else {
5545 mutex_enter(&ldcp->pollq_lock);
5546 ldcp->polling_on = B_FALSE;
5547 mutex_exit(&ldcp->pollq_lock);
5548 }
5549
5550 return (0);
5551 }
5552
5553 int
vgen_disable_intr(void * arg)5554 vgen_disable_intr(void *arg)
5555 {
5556 vgen_port_t *portp = (vgen_port_t *)arg;
5557 vgen_ldc_t *ldcp = portp->ldcp;
5558 vgen_hparams_t *lp = &ldcp->local_hparams;
5559
5560 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5561 mutex_enter(&ldcp->rxlock);
5562 ldcp->polling_on = B_TRUE;
5563 mutex_exit(&ldcp->rxlock);
5564 } else {
5565 mutex_enter(&ldcp->pollq_lock);
5566 ldcp->polling_on = B_TRUE;
5567 mutex_exit(&ldcp->pollq_lock);
5568 }
5569
5570 return (0);
5571 }
5572
5573 mblk_t *
vgen_rx_poll(void * arg,int bytes_to_pickup)5574 vgen_rx_poll(void *arg, int bytes_to_pickup)
5575 {
5576 vgen_port_t *portp = (vgen_port_t *)arg;
5577 vgen_ldc_t *ldcp = portp->ldcp;
5578 vgen_hparams_t *lp = &ldcp->local_hparams;
5579 mblk_t *mp = NULL;
5580
5581 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5582 mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5583 } else {
5584 mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5585 }
5586
5587 return (mp);
5588 }
5589
5590 /* transmit watchdog timeout handler */
5591 static void
vgen_tx_watchdog(void * arg)5592 vgen_tx_watchdog(void *arg)
5593 {
5594 vgen_ldc_t *ldcp;
5595 vgen_t *vgenp;
5596 int rv;
5597 boolean_t tx_blocked;
5598 clock_t tx_blocked_lbolt;
5599
5600 ldcp = (vgen_ldc_t *)arg;
5601 vgenp = LDC_TO_VGEN(ldcp);
5602
5603 tx_blocked = ldcp->tx_blocked;
5604 tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5605
5606 if (vgen_txwd_timeout &&
5607 (tx_blocked == B_TRUE) &&
5608 ((ddi_get_lbolt() - tx_blocked_lbolt) >
5609 drv_usectohz(vgen_txwd_timeout * 1000))) {
5610 /*
5611 * Something is wrong; the peer is not picking up the packets
5612 * in the transmit dring. We now go ahead and reset the channel
5613 * to break out of this condition.
5614 */
5615 DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5616 "tx_blocked_lbolt(%lx)\n",
5617 ddi_get_lbolt(), tx_blocked_lbolt);
5618
5619 #ifdef DEBUG
5620 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5621 /* tx timeout triggered for debugging */
5622 vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5623 }
5624 #endif
5625
5626 /*
5627 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5628 * it will result in a deadlock when vgen_process_reset() tries
5629 * to untimeout() on seeing a non-zero tid, but it is being
5630 * invoked by the timer itself in this case.
5631 */
5632 mutex_enter(&ldcp->cblock);
5633 if (ldcp->wd_tid == 0) {
5634 /* Cancelled by vgen_process_reset() */
5635 mutex_exit(&ldcp->cblock);
5636 return;
5637 }
5638 ldcp->wd_tid = 0;
5639 mutex_exit(&ldcp->cblock);
5640
5641 /*
5642 * Now reset the channel.
5643 */
5644 rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5645 if (rv == 0) {
5646 /*
5647 * We have successfully reset the channel. If we are
5648 * in tx flow controlled state, clear it now and enable
5649 * transmit in the upper layer.
5650 */
5651 if (ldcp->tx_blocked) {
5652 vio_net_tx_update_t vtx_update =
5653 ldcp->portp->vcb.vio_net_tx_update;
5654
5655 ldcp->tx_blocked = B_FALSE;
5656 vtx_update(ldcp->portp->vhp);
5657 }
5658 }
5659
5660 /*
5661 * Channel has been reset by us or some other thread is already
5662 * in the process of resetting. In either case, we return
5663 * without restarting the timer. When handshake completes and
5664 * the channel is ready for data transmit/receive we start a
5665 * new watchdog timer.
5666 */
5667 return;
5668 }
5669
5670 restart_timer:
5671 /* Restart the timer */
5672 mutex_enter(&ldcp->cblock);
5673 if (ldcp->wd_tid == 0) {
5674 /* Cancelled by vgen_process_reset() */
5675 mutex_exit(&ldcp->cblock);
5676 return;
5677 }
5678 ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5679 drv_usectohz(vgen_txwd_interval * 1000));
5680 mutex_exit(&ldcp->cblock);
5681 }
5682
5683 /* Handshake watchdog timeout handler */
5684 static void
vgen_hwatchdog(void * arg)5685 vgen_hwatchdog(void *arg)
5686 {
5687 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5688 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5689
5690 DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5691 ldcp->hphase, ldcp->hstate);
5692
5693 mutex_enter(&ldcp->cblock);
5694 if (ldcp->htid == 0) {
5695 /* Cancelled by vgen_process_reset() */
5696 mutex_exit(&ldcp->cblock);
5697 return;
5698 }
5699 ldcp->htid = 0;
5700 mutex_exit(&ldcp->cblock);
5701
5702 /*
5703 * Something is wrong; handshake with the peer seems to be hung. We now
5704 * go ahead and reset the channel to break out of this condition.
5705 */
5706 (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5707 }
5708
5709 /* Check if the session id in the received message is valid */
5710 static int
vgen_check_sid(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5711 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5712 {
5713 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5714
5715 if (tagp->vio_sid != ldcp->peer_sid) {
5716 DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5717 ldcp->peer_sid, tagp->vio_sid);
5718 return (VGEN_FAILURE);
5719 }
5720 else
5721 return (VGEN_SUCCESS);
5722 }
5723
5724 /*
5725 * Initialize the common part of dring registration
5726 * message; used in both TxDring and RxDringData modes.
5727 */
5728 static void
vgen_init_dring_reg_msg(vgen_ldc_t * ldcp,vio_dring_reg_msg_t * msg,uint8_t option)5729 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5730 uint8_t option)
5731 {
5732 vio_msg_tag_t *tagp;
5733
5734 tagp = &msg->tag;
5735 tagp->vio_msgtype = VIO_TYPE_CTRL;
5736 tagp->vio_subtype = VIO_SUBTYPE_INFO;
5737 tagp->vio_subtype_env = VIO_DRING_REG;
5738 tagp->vio_sid = ldcp->local_sid;
5739
5740 /* get dring info msg payload from ldcp->local */
5741 bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5742 sizeof (ldc_mem_cookie_t));
5743 msg->ncookies = ldcp->local_hparams.dring_ncookies;
5744 msg->num_descriptors = ldcp->local_hparams.num_desc;
5745 msg->descriptor_size = ldcp->local_hparams.desc_size;
5746
5747 msg->options = option;
5748
5749 /*
5750 * dring_ident is set to 0. After mapping the dring, peer sets this
5751 * value and sends it in the ack, which is saved in
5752 * vgen_handle_dring_reg().
5753 */
5754 msg->dring_ident = 0;
5755 }
5756
5757 static int
vgen_mapin_avail(vgen_ldc_t * ldcp)5758 vgen_mapin_avail(vgen_ldc_t *ldcp)
5759 {
5760 int rv;
5761 ldc_info_t info;
5762 uint64_t mapin_sz_req;
5763 uint64_t dblk_sz;
5764 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5765
5766 rv = ldc_info(ldcp->ldc_handle, &info);
5767 if (rv != 0) {
5768 return (B_FALSE);
5769 }
5770
5771 dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
5772 mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
5773
5774 if (info.direct_map_size_max >= mapin_sz_req) {
5775 return (B_TRUE);
5776 }
5777
5778 return (B_FALSE);
5779 }
5780
5781 #if DEBUG
5782
5783 /*
5784 * Print debug messages - set to 0xf to enable all msgs
5785 */
5786 void
vgen_debug_printf(const char * fname,vgen_t * vgenp,vgen_ldc_t * ldcp,const char * fmt,...)5787 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5788 vgen_ldc_t *ldcp, const char *fmt, ...)
5789 {
5790 char buf[256];
5791 char *bufp = buf;
5792 va_list ap;
5793
5794 if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5795 (void) sprintf(bufp, "vnet%d:",
5796 ((vnet_t *)(vgenp->vnetp))->instance);
5797 bufp += strlen(bufp);
5798 }
5799 if (ldcp != NULL) {
5800 (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5801 bufp += strlen(bufp);
5802 }
5803 (void) sprintf(bufp, "%s: ", fname);
5804 bufp += strlen(bufp);
5805
5806 va_start(ap, fmt);
5807 (void) vsprintf(bufp, fmt, ap);
5808 va_end(ap);
5809
5810 if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5811 (vgendbg_ldcid == ldcp->ldc_id)) {
5812 cmn_err(CE_CONT, "%s\n", buf);
5813 }
5814 }
5815 #endif
5816
5817 #ifdef VNET_IOC_DEBUG
5818
5819 static void
vgen_ioctl(void * arg,queue_t * q,mblk_t * mp)5820 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5821 {
5822 struct iocblk *iocp;
5823 vgen_port_t *portp;
5824 enum ioc_reply {
5825 IOC_INVAL = -1, /* bad, NAK with EINVAL */
5826 IOC_ACK /* OK, just send ACK */
5827 } status;
5828 int rv;
5829
5830 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5831 iocp->ioc_error = 0;
5832 portp = (vgen_port_t *)arg;
5833
5834 if (portp == NULL) {
5835 status = IOC_INVAL;
5836 goto vgen_ioc_exit;
5837 }
5838
5839 mutex_enter(&portp->lock);
5840
5841 switch (iocp->ioc_cmd) {
5842
5843 case VNET_FORCE_LINK_DOWN:
5844 case VNET_FORCE_LINK_UP:
5845 rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5846 (rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5847 break;
5848
5849 default:
5850 status = IOC_INVAL;
5851 break;
5852
5853 }
5854
5855 mutex_exit(&portp->lock);
5856
5857 vgen_ioc_exit:
5858
5859 switch (status) {
5860 default:
5861 case IOC_INVAL:
5862 /* Error, reply with a NAK and EINVAL error */
5863 miocnak(q, mp, 0, EINVAL);
5864 break;
5865 case IOC_ACK:
5866 /* OK, reply with an ACK */
5867 miocack(q, mp, 0, 0);
5868 break;
5869 }
5870 }
5871
5872 static int
vgen_force_link_state(vgen_port_t * portp,int cmd)5873 vgen_force_link_state(vgen_port_t *portp, int cmd)
5874 {
5875 ldc_status_t istatus;
5876 int rv;
5877 vgen_ldc_t *ldcp = portp->ldcp;
5878 vgen_t *vgenp = portp->vgenp;
5879
5880 mutex_enter(&ldcp->cblock);
5881
5882 switch (cmd) {
5883
5884 case VNET_FORCE_LINK_DOWN:
5885 (void) ldc_down(ldcp->ldc_handle);
5886 ldcp->link_down_forced = B_TRUE;
5887 break;
5888
5889 case VNET_FORCE_LINK_UP:
5890 vgen_ldc_up(ldcp);
5891 ldcp->link_down_forced = B_FALSE;
5892
5893 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5894 DWARN(vgenp, ldcp, "ldc_status err\n");
5895 } else {
5896 ldcp->ldc_status = istatus;
5897 }
5898
5899 /* if channel is already UP - restart handshake */
5900 if (ldcp->ldc_status == LDC_UP) {
5901 vgen_handle_evt_up(ldcp);
5902 }
5903 break;
5904
5905 }
5906
5907 mutex_exit(&ldcp->cblock);
5908
5909 return (0);
5910 }
5911
5912 #else
5913
5914 static void
vgen_ioctl(void * arg,queue_t * q,mblk_t * mp)5915 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5916 {
5917 vgen_port_t *portp;
5918
5919 portp = (vgen_port_t *)arg;
5920
5921 if (portp == NULL) {
5922 miocnak(q, mp, 0, EINVAL);
5923 return;
5924 }
5925
5926 miocnak(q, mp, 0, ENOTSUP);
5927 }
5928
5929 #endif
5930