1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/sysmacros.h>
29 #include <sys/param.h>
30 #include <sys/machsystm.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/devops.h>
36 #include <sys/ksynch.h>
37 #include <sys/stat.h>
38 #include <sys/modctl.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/note.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_ether.h>
47 #include <sys/ldc.h>
48 #include <sys/mach_descrip.h>
49 #include <sys/mdeg.h>
50 #include <net/if.h>
51 #include <sys/vnet.h>
52 #include <sys/vio_mailbox.h>
53 #include <sys/vio_common.h>
54 #include <sys/vnet_common.h>
55 #include <sys/vnet_mailbox.h>
56 #include <sys/vio_util.h>
57 #include <sys/vnet_gen.h>
58 #include <sys/atomic.h>
59 #include <sys/callb.h>
60 #include <sys/sdt.h>
61 #include <sys/intr.h>
62 #include <sys/pattr.h>
63 #include <sys/vlan.h>
64
65 /*
66 * Implementation of the mac provider functionality for vnet using the
67 * generic(default) transport layer of sun4v Logical Domain Channels(LDC).
68 */
69
70 /* Entry Points */
71 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
72 const uint8_t *macaddr, void **vgenhdl);
73 int vgen_init_mdeg(void *arg);
74 void vgen_uninit(void *arg);
75 int vgen_dds_tx(void *arg, void *dmsg);
76 int vgen_enable_intr(void *arg);
77 int vgen_disable_intr(void *arg);
78 mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup);
79 static int vgen_start(void *arg);
80 static void vgen_stop(void *arg);
81 static mblk_t *vgen_tx(void *arg, mblk_t *mp);
82 static int vgen_multicst(void *arg, boolean_t add,
83 const uint8_t *mca);
84 static int vgen_promisc(void *arg, boolean_t on);
85 static int vgen_unicst(void *arg, const uint8_t *mca);
86 static int vgen_stat(void *arg, uint_t stat, uint64_t *val);
87 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp);
88 #ifdef VNET_IOC_DEBUG
89 static int vgen_force_link_state(vgen_port_t *portp, int link_state);
90 #endif
91
92 /* Port/LDC Configuration */
93 static int vgen_read_mdprops(vgen_t *vgenp);
94 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
95 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp,
96 mde_cookie_t node);
97 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
98 uint32_t *mtu);
99 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
100 boolean_t *pls);
101 static void vgen_detach_ports(vgen_t *vgenp);
102 static void vgen_port_detach(vgen_port_t *portp);
103 static void vgen_port_list_insert(vgen_port_t *portp);
104 static void vgen_port_list_remove(vgen_port_t *portp);
105 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp,
106 int port_num);
107 static int vgen_mdeg_reg(vgen_t *vgenp);
108 static void vgen_mdeg_unreg(vgen_t *vgenp);
109 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp);
110 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
111 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
112 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
113 mde_cookie_t mdex);
114 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex);
115 static int vgen_port_attach(vgen_port_t *portp);
116 static void vgen_port_detach_mdeg(vgen_port_t *portp);
117 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp,
118 mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex);
119 static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat);
120 static void vgen_port_reset(vgen_port_t *portp);
121 static void vgen_reset_vsw_port(vgen_t *vgenp);
122 static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
123 static void vgen_ldc_up(vgen_ldc_t *ldcp);
124 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id);
125 static void vgen_ldc_detach(vgen_ldc_t *ldcp);
126 static void vgen_port_init(vgen_port_t *portp);
127 static void vgen_port_uninit(vgen_port_t *portp);
128 static int vgen_ldc_init(vgen_ldc_t *ldcp);
129 static void vgen_ldc_uninit(vgen_ldc_t *ldcp);
130 static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat);
131
132 /* I/O Processing */
133 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp);
134 static int vgen_ldcsend(void *arg, mblk_t *mp);
135 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp);
136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg);
137 static void vgen_tx_watchdog(void *arg);
138
139 /* Dring Configuration */
140 static int vgen_create_dring(vgen_ldc_t *ldcp);
141 static void vgen_destroy_dring(vgen_ldc_t *ldcp);
142 static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt);
143 static void vgen_unmap_dring(vgen_ldc_t *ldcp);
144 static int vgen_mapin_avail(vgen_ldc_t *ldcp);
145
146 /* VIO Message Processing */
147 static int vgen_handshake(vgen_ldc_t *ldcp);
148 static int vgen_handshake_done(vgen_ldc_t *ldcp);
149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp);
150 static int vgen_handshake_phase2(vgen_ldc_t *ldcp);
151 static int vgen_handshake_phase3(vgen_ldc_t *ldcp);
152 static void vgen_setup_handshake_params(vgen_ldc_t *ldcp);
153 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp);
154 static int vgen_send_attr_info(vgen_ldc_t *ldcp);
155 static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp);
156 static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp);
157 static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
158 uint8_t option);
159 static int vgen_send_rdx_info(vgen_ldc_t *ldcp);
160 static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end);
161 static int vgen_send_mcast_info(vgen_ldc_t *ldcp);
162 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp,
163 vio_msg_tag_t *tagp);
164 static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
165 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
166 static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg);
167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
168 static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
169 static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
170 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
171 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
172 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
173 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen);
174 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
175 uint32_t msglen);
176 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
177 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
178 static void vgen_handle_evt_up(vgen_ldc_t *ldcp);
179 static int vgen_process_reset(vgen_ldc_t *ldcp, int flags);
180 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
181 static void vgen_hwatchdog(void *arg);
182 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp);
183 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp);
184 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state);
185
186 /* VLANs */
187 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp,
188 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp,
189 uint16_t *nvidsp, uint16_t *default_idp);
190 static void vgen_vlan_create_hash(vgen_port_t *portp);
191 static void vgen_vlan_destroy_hash(vgen_port_t *portp);
192 static void vgen_vlan_add_ids(vgen_port_t *portp);
193 static void vgen_vlan_remove_ids(vgen_port_t *portp);
194 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
195 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp,
196 uint16_t *vidp);
197 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp,
198 boolean_t is_tagged, uint16_t vid);
199 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp);
200 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp);
201
202 /* Exported functions */
203 int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller);
204 int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller);
205 void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen);
206 void vgen_destroy_rxpools(void *arg);
207
208 /* Externs */
209 extern void vnet_dds_rx(void *arg, void *dmsg);
210 extern void vnet_dds_cleanup_hio(vnet_t *vnetp);
211 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu);
212 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state);
213 extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen,
214 boolean_t caller_holds_lock);
215 extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp);
216 extern int vgen_create_tx_dring(vgen_ldc_t *ldcp);
217 extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp);
218 extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt);
219 extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp);
220 extern int vgen_create_rx_dring(vgen_ldc_t *ldcp);
221 extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp);
222 extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt);
223 extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp);
224 extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt);
225 extern int vgen_handle_dringdata_shm(void *arg1, void *arg2);
226 extern int vgen_handle_dringdata(void *arg1, void *arg2);
227 extern int vgen_dringsend_shm(void *arg, mblk_t *mp);
228 extern int vgen_dringsend(void *arg, mblk_t *mp);
229 extern void vgen_ldc_msg_worker(void *arg);
230 extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp,
231 uint32_t start, int32_t end, uint8_t pstate);
232 extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup);
233 extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup);
234 extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp);
235
236 #define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0)
237
238 #define LDC_LOCK(ldcp) \
239 mutex_enter(&((ldcp)->cblock));\
240 mutex_enter(&((ldcp)->rxlock));\
241 mutex_enter(&((ldcp)->wrlock));\
242 mutex_enter(&((ldcp)->txlock));\
243 mutex_enter(&((ldcp)->tclock));
244 #define LDC_UNLOCK(ldcp) \
245 mutex_exit(&((ldcp)->tclock));\
246 mutex_exit(&((ldcp)->txlock));\
247 mutex_exit(&((ldcp)->wrlock));\
248 mutex_exit(&((ldcp)->rxlock));\
249 mutex_exit(&((ldcp)->cblock));
250
251 #define VGEN_VER_EQ(ldcp, major, minor) \
252 ((ldcp)->local_hparams.ver_major == (major) && \
253 (ldcp)->local_hparams.ver_minor == (minor))
254
255 #define VGEN_VER_LT(ldcp, major, minor) \
256 (((ldcp)->local_hparams.ver_major < (major)) || \
257 ((ldcp)->local_hparams.ver_major == (major) && \
258 (ldcp)->local_hparams.ver_minor < (minor)))
259
260 #define VGEN_VER_GTEQ(ldcp, major, minor) \
261 (((ldcp)->local_hparams.ver_major > (major)) || \
262 ((ldcp)->local_hparams.ver_major == (major) && \
263 (ldcp)->local_hparams.ver_minor >= (minor)))
264
265 /*
266 * Property names
267 */
268 static char macaddr_propname[] = "mac-address";
269 static char rmacaddr_propname[] = "remote-mac-address";
270 static char channel_propname[] = "channel-endpoint";
271 static char reg_propname[] = "reg";
272 static char port_propname[] = "port";
273 static char swport_propname[] = "switch-port";
274 static char id_propname[] = "id";
275 static char vdev_propname[] = "virtual-device";
276 static char vnet_propname[] = "network";
277 static char pri_types_propname[] = "priority-ether-types";
278 static char vgen_pvid_propname[] = "port-vlan-id";
279 static char vgen_vid_propname[] = "vlan-id";
280 static char vgen_dvid_propname[] = "default-vlan-id";
281 static char port_pvid_propname[] = "remote-port-vlan-id";
282 static char port_vid_propname[] = "remote-vlan-id";
283 static char vgen_mtu_propname[] = "mtu";
284 static char vgen_linkprop_propname[] = "linkprop";
285
286 /*
287 * VIO Protocol Version Info:
288 *
289 * The version specified below represents the version of protocol currently
290 * supported in the driver. It means the driver can negotiate with peers with
291 * versions <= this version. Here is a summary of the feature(s) that are
292 * supported at each version of the protocol:
293 *
294 * 1.0 Basic VIO protocol.
295 * 1.1 vDisk protocol update (no virtual network update).
296 * 1.2 Support for priority frames (priority-ether-types).
297 * 1.3 VLAN and HybridIO support.
298 * 1.4 Jumbo Frame support.
299 * 1.5 Link State Notification support with optional support
300 * for Physical Link information.
301 * 1.6 Support for RxDringData mode.
302 */
303 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 6} };
304
305 /* Tunables */
306 uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */
307 uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */
308 uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */
309 uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */
310 uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */
311 uint32_t vgen_ldc_mtu = VGEN_LDC_MTU; /* ldc mtu */
312 uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */
313 uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT; /* tx timeout in msec */
314
315 /*
316 * Max # of channel resets allowed during handshake.
317 */
318 uint32_t vgen_ldc_max_resets = 5;
319
320 /*
321 * See comments in vsw.c for details on the dring modes supported.
322 * In RxDringData mode, # of buffers is determined by multiplying the # of
323 * descriptors with the factor below. Note that the factor must be > 1; i.e,
324 * the # of buffers must always be > # of descriptors. This is needed because,
325 * while the shared memory buffers are sent up the stack on the receiver, the
326 * sender needs additional buffers that can be used for further transmits.
327 * See vgen_create_rx_dring() for details.
328 */
329 uint32_t vgen_nrbufs_factor = 2;
330
331 /*
332 * Retry delay used while destroying rx mblk pools. Used in both Dring modes.
333 */
334 int vgen_rxpool_cleanup_delay = 100000; /* 100ms */
335
336 /*
337 * Delay when rx descr not ready; used in TxDring mode only.
338 */
339 uint32_t vgen_recv_delay = 1;
340
341 /*
342 * Retry when rx descr not ready; used in TxDring mode only.
343 */
344 uint32_t vgen_recv_retries = 10;
345
346 /*
347 * Max # of packets accumulated prior to sending them up. It is best
348 * to keep this at 60% of the number of receive buffers. Used in TxDring mode
349 * by the msg worker thread. Used in RxDringData mode while in interrupt mode
350 * (not used in polled mode).
351 */
352 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6);
353
354 /*
355 * Internal tunables for receive buffer pools, that is, the size and number of
356 * mblks for each pool. At least 3 sizes must be specified if these are used.
357 * The sizes must be specified in increasing order. Non-zero value of the first
358 * size will be used as a hint to use these values instead of the algorithm
359 * that determines the sizes based on MTU. Used in TxDring mode only.
360 */
361 uint32_t vgen_rbufsz1 = 0;
362 uint32_t vgen_rbufsz2 = 0;
363 uint32_t vgen_rbufsz3 = 0;
364 uint32_t vgen_rbufsz4 = 0;
365
366 uint32_t vgen_nrbufs1 = VGEN_NRBUFS;
367 uint32_t vgen_nrbufs2 = VGEN_NRBUFS;
368 uint32_t vgen_nrbufs3 = VGEN_NRBUFS;
369 uint32_t vgen_nrbufs4 = VGEN_NRBUFS;
370
371 /*
372 * In the absence of "priority-ether-types" property in MD, the following
373 * internal tunable can be set to specify a single priority ethertype.
374 */
375 uint64_t vgen_pri_eth_type = 0;
376
377 /*
378 * Number of transmit priority buffers that are preallocated per device.
379 * This number is chosen to be a small value to throttle transmission
380 * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
381 */
382 uint32_t vgen_pri_tx_nmblks = 64;
383
384 uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */
385
386 /*
387 * Matching criteria passed to the MDEG to register interest
388 * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified
389 * by their 'name' and 'cfg-handle' properties.
390 */
391 static md_prop_match_t vdev_prop_match[] = {
392 { MDET_PROP_STR, "name" },
393 { MDET_PROP_VAL, "cfg-handle" },
394 { MDET_LIST_END, NULL }
395 };
396
397 static mdeg_node_match_t vdev_match = { "virtual-device",
398 vdev_prop_match };
399
400 /* MD update matching structure */
401 static md_prop_match_t vport_prop_match[] = {
402 { MDET_PROP_VAL, "id" },
403 { MDET_LIST_END, NULL }
404 };
405
406 static mdeg_node_match_t vport_match = { "virtual-device-port",
407 vport_prop_match };
408
409 /* Template for matching a particular vnet instance */
410 static mdeg_prop_spec_t vgen_prop_template[] = {
411 { MDET_PROP_STR, "name", "network" },
412 { MDET_PROP_VAL, "cfg-handle", NULL },
413 { MDET_LIST_END, NULL, NULL }
414 };
415
416 #define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val)
417
418 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp);
419
420 #ifdef VNET_IOC_DEBUG
421 #define VGEN_M_CALLBACK_FLAGS (MC_IOCTL)
422 #else
423 #define VGEN_M_CALLBACK_FLAGS (0)
424 #endif
425
426 static mac_callbacks_t vgen_m_callbacks = {
427 VGEN_M_CALLBACK_FLAGS,
428 vgen_stat,
429 vgen_start,
430 vgen_stop,
431 vgen_promisc,
432 vgen_multicst,
433 vgen_unicst,
434 vgen_tx,
435 NULL,
436 vgen_ioctl,
437 NULL,
438 NULL
439 };
440
441 /* Externs */
442 extern pri_t maxclsyspri;
443 extern proc_t p0;
444 extern uint32_t vnet_ethermtu;
445 extern uint16_t vnet_default_vlan_id;
446 extern uint32_t vnet_num_descriptors;
447
448 #ifdef DEBUG
449
450 #define DEBUG_PRINTF vgen_debug_printf
451
452 extern int vnet_dbglevel;
453
454 void vgen_debug_printf(const char *fname, vgen_t *vgenp,
455 vgen_ldc_t *ldcp, const char *fmt, ...);
456
457 /* -1 for all LDCs info, or ldc_id for a specific LDC info */
458 int vgendbg_ldcid = -1;
459
460 /* Flags to simulate error conditions for debugging */
461 int vgen_inject_err_flag = 0;
462
463
464 boolean_t
vgen_inject_error(vgen_ldc_t * ldcp,int error)465 vgen_inject_error(vgen_ldc_t *ldcp, int error)
466 {
467 if ((vgendbg_ldcid == ldcp->ldc_id) &&
468 (vgen_inject_err_flag & error)) {
469 return (B_TRUE);
470 }
471 return (B_FALSE);
472 }
473
474 #endif
475
476 /*
477 * vgen_init() is called by an instance of vnet driver to initialize the
478 * corresponding generic transport layer. This layer uses Logical Domain
479 * Channels (LDCs) to communicate with the virtual switch in the service domain
480 * and also with peer vnets in other guest domains in the system.
481 *
482 * Arguments:
483 * vnetp: an opaque pointer to the vnet instance
484 * regprop: frame to be transmitted
485 * vnetdip: dip of the vnet device
486 * macaddr: mac address of the vnet device
487 *
488 * Returns:
489 * Sucess: a handle to the vgen instance (vgen_t)
490 * Failure: NULL
491 */
492 int
vgen_init(void * vnetp,uint64_t regprop,dev_info_t * vnetdip,const uint8_t * macaddr,void ** vgenhdl)493 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip,
494 const uint8_t *macaddr, void **vgenhdl)
495 {
496 vgen_t *vgenp;
497 int instance;
498 int rv;
499 char qname[TASKQ_NAMELEN];
500
501 if ((vnetp == NULL) || (vnetdip == NULL))
502 return (DDI_FAILURE);
503
504 instance = ddi_get_instance(vnetdip);
505
506 DBG1(NULL, NULL, "vnet(%d): enter\n", instance);
507
508 vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP);
509
510 vgenp->vnetp = vnetp;
511 vgenp->instance = instance;
512 vgenp->regprop = regprop;
513 vgenp->vnetdip = vnetdip;
514 bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL);
515 vgenp->phys_link_state = LINK_STATE_UNKNOWN;
516
517 /* allocate multicast table */
518 vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE *
519 sizeof (struct ether_addr), KM_SLEEP);
520 vgenp->mccount = 0;
521 vgenp->mcsize = VGEN_INIT_MCTAB_SIZE;
522
523 mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL);
524 rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL);
525
526 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d",
527 instance);
528 if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1,
529 TASKQ_DEFAULTPRI, 0)) == NULL) {
530 cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue",
531 instance);
532 goto vgen_init_fail;
533 }
534
535 rv = vgen_read_mdprops(vgenp);
536 if (rv != 0) {
537 goto vgen_init_fail;
538 }
539 *vgenhdl = (void *)vgenp;
540
541 DBG1(NULL, NULL, "vnet(%d): exit\n", instance);
542 return (DDI_SUCCESS);
543
544 vgen_init_fail:
545 rw_destroy(&vgenp->vgenports.rwlock);
546 mutex_destroy(&vgenp->lock);
547 kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE *
548 sizeof (struct ether_addr));
549 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
550 kmem_free(vgenp->pri_types,
551 sizeof (uint16_t) * vgenp->pri_num_types);
552 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
553 }
554 if (vgenp->rxp_taskq != NULL) {
555 ddi_taskq_destroy(vgenp->rxp_taskq);
556 vgenp->rxp_taskq = NULL;
557 }
558 KMEM_FREE(vgenp);
559 return (DDI_FAILURE);
560 }
561
562 int
vgen_init_mdeg(void * arg)563 vgen_init_mdeg(void *arg)
564 {
565 vgen_t *vgenp = (vgen_t *)arg;
566
567 /* register with MD event generator */
568 return (vgen_mdeg_reg(vgenp));
569 }
570
571 /*
572 * Called by vnet to undo the initializations done by vgen_init().
573 * The handle provided by generic transport during vgen_init() is the argument.
574 */
575 void
vgen_uninit(void * arg)576 vgen_uninit(void *arg)
577 {
578 vgen_t *vgenp = (vgen_t *)arg;
579
580 if (vgenp == NULL) {
581 return;
582 }
583
584 DBG1(vgenp, NULL, "enter\n");
585
586 /* Unregister with MD event generator */
587 vgen_mdeg_unreg(vgenp);
588
589 mutex_enter(&vgenp->lock);
590
591 /*
592 * Detach all ports from the device; note that the device should have
593 * been unplumbed by this time (See vnet_unattach() for the sequence)
594 * and thus vgen_stop() has already been invoked on all the ports.
595 */
596 vgen_detach_ports(vgenp);
597
598 /*
599 * We now destroy the taskq used to clean up rx mblk pools that
600 * couldn't be destroyed when the ports/channels were detached.
601 * We implicitly wait for those tasks to complete in
602 * ddi_taskq_destroy().
603 */
604 if (vgenp->rxp_taskq != NULL) {
605 ddi_taskq_destroy(vgenp->rxp_taskq);
606 vgenp->rxp_taskq = NULL;
607 }
608
609 /* Free multicast table */
610 kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr));
611
612 /* Free pri_types table */
613 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
614 kmem_free(vgenp->pri_types,
615 sizeof (uint16_t) * vgenp->pri_num_types);
616 (void) vio_destroy_mblks(vgenp->pri_tx_vmp);
617 }
618
619 mutex_exit(&vgenp->lock);
620 rw_destroy(&vgenp->vgenports.rwlock);
621 mutex_destroy(&vgenp->lock);
622
623 DBG1(vgenp, NULL, "exit\n");
624 KMEM_FREE(vgenp);
625 }
626
627 /* enable transmit/receive for the device */
628 int
vgen_start(void * arg)629 vgen_start(void *arg)
630 {
631 vgen_port_t *portp = (vgen_port_t *)arg;
632 vgen_t *vgenp = portp->vgenp;
633
634 DBG1(vgenp, NULL, "enter\n");
635 mutex_enter(&portp->lock);
636 vgen_port_init(portp);
637 portp->flags |= VGEN_STARTED;
638 mutex_exit(&portp->lock);
639 DBG1(vgenp, NULL, "exit\n");
640
641 return (DDI_SUCCESS);
642 }
643
644 /* stop transmit/receive */
645 void
vgen_stop(void * arg)646 vgen_stop(void *arg)
647 {
648 vgen_port_t *portp = (vgen_port_t *)arg;
649 vgen_t *vgenp = portp->vgenp;
650
651 DBG1(vgenp, NULL, "enter\n");
652
653 mutex_enter(&portp->lock);
654 if (portp->flags & VGEN_STARTED) {
655 vgen_port_uninit(portp);
656 portp->flags &= ~(VGEN_STARTED);
657 }
658 mutex_exit(&portp->lock);
659 DBG1(vgenp, NULL, "exit\n");
660
661 }
662
663 /* vgen transmit function */
664 static mblk_t *
vgen_tx(void * arg,mblk_t * mp)665 vgen_tx(void *arg, mblk_t *mp)
666 {
667 vgen_port_t *portp;
668 int status;
669
670 portp = (vgen_port_t *)arg;
671 status = vgen_portsend(portp, mp);
672 if (status != VGEN_SUCCESS) {
673 /* failure */
674 return (mp);
675 }
676 /* success */
677 return (NULL);
678 }
679
680 /*
681 * This function provides any necessary tagging/untagging of the frames
682 * that are being transmitted over the port. It first verifies the vlan
683 * membership of the destination(port) and drops the packet if the
684 * destination doesn't belong to the given vlan.
685 *
686 * Arguments:
687 * portp: port over which the frames should be transmitted
688 * mp: frame to be transmitted
689 * is_tagged:
690 * B_TRUE: indicates frame header contains the vlan tag already.
691 * B_FALSE: indicates frame is untagged.
692 * vid: vlan in which the frame should be transmitted.
693 *
694 * Returns:
695 * Sucess: frame(mblk_t *) after doing the necessary tag/untag.
696 * Failure: NULL
697 */
698 static mblk_t *
vgen_vlan_frame_fixtag(vgen_port_t * portp,mblk_t * mp,boolean_t is_tagged,uint16_t vid)699 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged,
700 uint16_t vid)
701 {
702 vgen_t *vgenp;
703 boolean_t dst_tagged;
704 int rv;
705
706 vgenp = portp->vgenp;
707
708 /*
709 * If the packet is going to a vnet:
710 * Check if the destination vnet is in the same vlan.
711 * Check the frame header if tag or untag is needed.
712 *
713 * We do not check the above conditions if the packet is going to vsw:
714 * vsw must be present implicitly in all the vlans that a vnet device
715 * is configured into; even if vsw itself is not assigned to those
716 * vlans as an interface. For instance, the packet might be destined
717 * to another vnet(indirectly through vsw) or to an external host
718 * which is in the same vlan as this vnet and vsw itself may not be
719 * present in that vlan. Similarly packets going to vsw must be
720 * always tagged(unless in the default-vlan) if not already tagged,
721 * as we do not know the final destination. This is needed because
722 * vsw must always invoke its switching function only after tagging
723 * the packet; otherwise after switching function determines the
724 * destination we cannot figure out if the destination belongs to the
725 * the same vlan that the frame originated from and if it needs tag/
726 * untag. Note that vsw will tag the packet itself when it receives
727 * it over the channel from a client if needed. However, that is
728 * needed only in the case of vlan unaware clients such as obp or
729 * earlier versions of vnet.
730 *
731 */
732 if (portp != vgenp->vsw_portp) {
733 /*
734 * Packet going to a vnet. Check if the destination vnet is in
735 * the same vlan. Then check the frame header if tag/untag is
736 * needed.
737 */
738 rv = vgen_vlan_lookup(portp->vlan_hashp, vid);
739 if (rv == B_FALSE) {
740 /* drop the packet */
741 freemsg(mp);
742 return (NULL);
743 }
744
745 /* is the destination tagged or untagged in this vlan? */
746 (vid == portp->pvid) ? (dst_tagged = B_FALSE) :
747 (dst_tagged = B_TRUE);
748
749 if (is_tagged == dst_tagged) {
750 /* no tagging/untagging needed */
751 return (mp);
752 }
753
754 if (is_tagged == B_TRUE) {
755 /* frame is tagged; destination needs untagged */
756 mp = vnet_vlan_remove_tag(mp);
757 return (mp);
758 }
759
760 /* (is_tagged == B_FALSE): fallthru to tag tx packet: */
761 }
762
763 /*
764 * Packet going to a vnet needs tagging.
765 * OR
766 * If the packet is going to vsw, then it must be tagged in all cases:
767 * unknown unicast, broadcast/multicast or to vsw interface.
768 */
769
770 if (is_tagged == B_FALSE) {
771 mp = vnet_vlan_insert_tag(mp, vid);
772 }
773
774 return (mp);
775 }
776
777 /* transmit packets over the given port */
778 static int
vgen_portsend(vgen_port_t * portp,mblk_t * mp)779 vgen_portsend(vgen_port_t *portp, mblk_t *mp)
780 {
781 vgen_ldc_t *ldcp;
782 int status;
783 int rv = VGEN_SUCCESS;
784 vgen_t *vgenp = portp->vgenp;
785 vnet_t *vnetp = vgenp->vnetp;
786 boolean_t is_tagged;
787 boolean_t dec_refcnt = B_FALSE;
788 uint16_t vlan_id;
789 struct ether_header *ehp;
790
791 if (portp == NULL) {
792 return (VGEN_FAILURE);
793 }
794
795 if (portp->use_vsw_port) {
796 (void) atomic_inc_32(&vgenp->vsw_port_refcnt);
797 portp = portp->vgenp->vsw_portp;
798 ASSERT(portp != NULL);
799 dec_refcnt = B_TRUE;
800 }
801
802 /*
803 * Determine the vlan id that the frame belongs to.
804 */
805 ehp = (struct ether_header *)mp->b_rptr;
806 is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id);
807
808 if (vlan_id == vnetp->default_vlan_id) {
809
810 /* Frames in default vlan must be untagged */
811 ASSERT(is_tagged == B_FALSE);
812
813 /*
814 * If the destination is a vnet-port verify it belongs to the
815 * default vlan; otherwise drop the packet. We do not need
816 * this check for vsw-port, as it should implicitly belong to
817 * this vlan; see comments in vgen_vlan_frame_fixtag().
818 */
819 if (portp != vgenp->vsw_portp &&
820 portp->pvid != vnetp->default_vlan_id) {
821 freemsg(mp);
822 goto portsend_ret;
823 }
824
825 } else { /* frame not in default-vlan */
826
827 mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id);
828 if (mp == NULL) {
829 goto portsend_ret;
830 }
831
832 }
833
834 ldcp = portp->ldcp;
835 status = ldcp->tx(ldcp, mp);
836
837 if (status != VGEN_TX_SUCCESS) {
838 rv = VGEN_FAILURE;
839 }
840
841 portsend_ret:
842 if (dec_refcnt == B_TRUE) {
843 (void) atomic_dec_32(&vgenp->vsw_port_refcnt);
844 }
845 return (rv);
846 }
847
848 /*
849 * Wrapper function to transmit normal and/or priority frames over the channel.
850 */
851 static int
vgen_ldcsend(void * arg,mblk_t * mp)852 vgen_ldcsend(void *arg, mblk_t *mp)
853 {
854 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
855 int status;
856 struct ether_header *ehp;
857 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
858 uint32_t num_types;
859 uint16_t *types;
860 int i;
861
862 ASSERT(VGEN_PRI_ETH_DEFINED(vgenp));
863
864 num_types = vgenp->pri_num_types;
865 types = vgenp->pri_types;
866 ehp = (struct ether_header *)mp->b_rptr;
867
868 for (i = 0; i < num_types; i++) {
869
870 if (ehp->ether_type == types[i]) {
871 /* priority frame, use pri tx function */
872 vgen_ldcsend_pkt(ldcp, mp);
873 return (VGEN_SUCCESS);
874 }
875
876 }
877
878 if (ldcp->tx_dringdata == NULL) {
879 freemsg(mp);
880 return (VGEN_SUCCESS);
881 }
882
883 status = ldcp->tx_dringdata(ldcp, mp);
884 return (status);
885 }
886
887 /*
888 * This function transmits the frame in the payload of a raw data
889 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
890 * send special frames with high priorities, without going through
891 * the normal data path which uses descriptor ring mechanism.
892 */
893 static void
vgen_ldcsend_pkt(void * arg,mblk_t * mp)894 vgen_ldcsend_pkt(void *arg, mblk_t *mp)
895 {
896 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
897 vio_raw_data_msg_t *pkt;
898 mblk_t *bp;
899 mblk_t *nmp = NULL;
900 vio_mblk_t *vmp;
901 caddr_t dst;
902 uint32_t mblksz;
903 uint32_t size;
904 uint32_t nbytes;
905 int rv;
906 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
907 vgen_stats_t *statsp = &ldcp->stats;
908
909 /* drop the packet if ldc is not up or handshake is not done */
910 if (ldcp->ldc_status != LDC_UP) {
911 (void) atomic_inc_32(&statsp->tx_pri_fail);
912 DWARN(vgenp, ldcp, "status(%d), dropping packet\n",
913 ldcp->ldc_status);
914 goto send_pkt_exit;
915 }
916
917 if (ldcp->hphase != VH_DONE) {
918 (void) atomic_inc_32(&statsp->tx_pri_fail);
919 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n",
920 ldcp->hphase);
921 goto send_pkt_exit;
922 }
923
924 size = msgsize(mp);
925
926 /* frame size bigger than available payload len of raw data msg ? */
927 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) {
928 (void) atomic_inc_32(&statsp->tx_pri_fail);
929 DWARN(vgenp, ldcp, "invalid size(%d)\n", size);
930 goto send_pkt_exit;
931 }
932
933 if (size < ETHERMIN)
934 size = ETHERMIN;
935
936 /* alloc space for a raw data message */
937 vmp = vio_allocb(vgenp->pri_tx_vmp);
938 if (vmp == NULL) {
939 (void) atomic_inc_32(&statsp->tx_pri_fail);
940 DWARN(vgenp, ldcp, "vio_allocb failed\n");
941 goto send_pkt_exit;
942 } else {
943 nmp = vmp->mp;
944 }
945 pkt = (vio_raw_data_msg_t *)nmp->b_rptr;
946
947 /* copy frame into the payload of raw data message */
948 dst = (caddr_t)pkt->data;
949 for (bp = mp; bp != NULL; bp = bp->b_cont) {
950 mblksz = MBLKL(bp);
951 bcopy(bp->b_rptr, dst, mblksz);
952 dst += mblksz;
953 }
954
955 vmp->state = VIO_MBLK_HAS_DATA;
956
957 /* setup the raw data msg */
958 pkt->tag.vio_msgtype = VIO_TYPE_DATA;
959 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
960 pkt->tag.vio_subtype_env = VIO_PKT_DATA;
961 pkt->tag.vio_sid = ldcp->local_sid;
962 nbytes = VIO_PKT_DATA_HDRSIZE + size;
963
964 /* send the msg over ldc */
965 rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE);
966 if (rv != VGEN_SUCCESS) {
967 (void) atomic_inc_32(&statsp->tx_pri_fail);
968 DWARN(vgenp, ldcp, "Error sending priority frame\n");
969 if (rv == ECONNRESET) {
970 (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER);
971 }
972 goto send_pkt_exit;
973 }
974
975 /* update stats */
976 (void) atomic_inc_64(&statsp->tx_pri_packets);
977 (void) atomic_add_64(&statsp->tx_pri_bytes, size);
978
979 send_pkt_exit:
980 if (nmp != NULL)
981 freemsg(nmp);
982 freemsg(mp);
983 }
984
985 /*
986 * enable/disable a multicast address
987 * note that the cblock of the ldc channel connected to the vsw is used for
988 * synchronization of the mctab.
989 */
990 int
vgen_multicst(void * arg,boolean_t add,const uint8_t * mca)991 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca)
992 {
993 vgen_t *vgenp;
994 vnet_mcast_msg_t mcastmsg;
995 vio_msg_tag_t *tagp;
996 vgen_port_t *portp;
997 vgen_ldc_t *ldcp;
998 struct ether_addr *addrp;
999 int rv = DDI_FAILURE;
1000 uint32_t i;
1001
1002 portp = (vgen_port_t *)arg;
1003 vgenp = portp->vgenp;
1004
1005 if (portp->is_vsw_port != B_TRUE) {
1006 return (DDI_SUCCESS);
1007 }
1008
1009 addrp = (struct ether_addr *)mca;
1010 tagp = &mcastmsg.tag;
1011 bzero(&mcastmsg, sizeof (mcastmsg));
1012
1013 ldcp = portp->ldcp;
1014 if (ldcp == NULL) {
1015 return (DDI_FAILURE);
1016 }
1017
1018 mutex_enter(&ldcp->cblock);
1019
1020 if (ldcp->hphase == VH_DONE) {
1021 /*
1022 * If handshake is done, send a msg to vsw to add/remove
1023 * the multicast address. Otherwise, we just update this
1024 * mcast address in our table and the table will be sync'd
1025 * with vsw when handshake completes.
1026 */
1027 tagp->vio_msgtype = VIO_TYPE_CTRL;
1028 tagp->vio_subtype = VIO_SUBTYPE_INFO;
1029 tagp->vio_subtype_env = VNET_MCAST_INFO;
1030 tagp->vio_sid = ldcp->local_sid;
1031 bcopy(mca, &(mcastmsg.mca), ETHERADDRL);
1032 mcastmsg.set = add;
1033 mcastmsg.count = 1;
1034 if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg),
1035 B_FALSE) != VGEN_SUCCESS) {
1036 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
1037 rv = DDI_FAILURE;
1038 goto vgen_mcast_exit;
1039 }
1040 }
1041
1042 if (add) {
1043
1044 /* expand multicast table if necessary */
1045 if (vgenp->mccount >= vgenp->mcsize) {
1046 struct ether_addr *newtab;
1047 uint32_t newsize;
1048
1049
1050 newsize = vgenp->mcsize * 2;
1051
1052 newtab = kmem_zalloc(newsize *
1053 sizeof (struct ether_addr), KM_NOSLEEP);
1054 if (newtab == NULL)
1055 goto vgen_mcast_exit;
1056 bcopy(vgenp->mctab, newtab, vgenp->mcsize *
1057 sizeof (struct ether_addr));
1058 kmem_free(vgenp->mctab,
1059 vgenp->mcsize * sizeof (struct ether_addr));
1060
1061 vgenp->mctab = newtab;
1062 vgenp->mcsize = newsize;
1063 }
1064
1065 /* add address to the table */
1066 vgenp->mctab[vgenp->mccount++] = *addrp;
1067
1068 } else {
1069
1070 /* delete address from the table */
1071 for (i = 0; i < vgenp->mccount; i++) {
1072 if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) {
1073
1074 /*
1075 * If there's more than one address in this
1076 * table, delete the unwanted one by moving
1077 * the last one in the list over top of it;
1078 * otherwise, just remove it.
1079 */
1080 if (vgenp->mccount > 1) {
1081 vgenp->mctab[i] =
1082 vgenp->mctab[vgenp->mccount-1];
1083 }
1084 vgenp->mccount--;
1085 break;
1086 }
1087 }
1088 }
1089
1090 rv = DDI_SUCCESS;
1091
1092 vgen_mcast_exit:
1093
1094 mutex_exit(&ldcp->cblock);
1095 return (rv);
1096 }
1097
1098 /* set or clear promiscuous mode on the device */
1099 static int
vgen_promisc(void * arg,boolean_t on)1100 vgen_promisc(void *arg, boolean_t on)
1101 {
1102 _NOTE(ARGUNUSED(arg, on))
1103 return (DDI_SUCCESS);
1104 }
1105
1106 /* set the unicast mac address of the device */
1107 static int
vgen_unicst(void * arg,const uint8_t * mca)1108 vgen_unicst(void *arg, const uint8_t *mca)
1109 {
1110 _NOTE(ARGUNUSED(arg, mca))
1111 return (DDI_SUCCESS);
1112 }
1113
1114 /* get device statistics */
1115 int
vgen_stat(void * arg,uint_t stat,uint64_t * val)1116 vgen_stat(void *arg, uint_t stat, uint64_t *val)
1117 {
1118 vgen_port_t *portp = (vgen_port_t *)arg;
1119
1120 *val = vgen_port_stat(portp, stat);
1121 return (0);
1122 }
1123
1124 /* vgen internal functions */
1125 /* detach all ports from the device */
1126 static void
vgen_detach_ports(vgen_t * vgenp)1127 vgen_detach_ports(vgen_t *vgenp)
1128 {
1129 vgen_port_t *portp;
1130 vgen_portlist_t *plistp;
1131
1132 plistp = &(vgenp->vgenports);
1133 WRITE_ENTER(&plistp->rwlock);
1134 while ((portp = plistp->headp) != NULL) {
1135 vgen_port_detach(portp);
1136 }
1137 RW_EXIT(&plistp->rwlock);
1138 }
1139
1140 /*
1141 * detach the given port.
1142 */
1143 static void
vgen_port_detach(vgen_port_t * portp)1144 vgen_port_detach(vgen_port_t *portp)
1145 {
1146 vgen_t *vgenp;
1147 int port_num;
1148
1149 vgenp = portp->vgenp;
1150 port_num = portp->port_num;
1151
1152 DBG1(vgenp, NULL, "port(%d):enter\n", port_num);
1153
1154 /*
1155 * If this port is connected to the vswitch, then
1156 * potentially there could be ports that may be using
1157 * this port to transmit packets. To address this do
1158 * the following:
1159 * - First set vgenp->vsw_portp to NULL, so that
1160 * its not used after that.
1161 * - Then wait for the refcnt to go down to 0.
1162 * - Now we can safely detach this port.
1163 */
1164 if (vgenp->vsw_portp == portp) {
1165 vgenp->vsw_portp = NULL;
1166 while (vgenp->vsw_port_refcnt > 0) {
1167 delay(drv_usectohz(vgen_tx_delay));
1168 }
1169 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
1170 }
1171
1172 if (portp->vhp != NULL) {
1173 vio_net_resource_unreg(portp->vhp);
1174 portp->vhp = NULL;
1175 }
1176
1177 vgen_vlan_destroy_hash(portp);
1178
1179 /* remove it from port list */
1180 vgen_port_list_remove(portp);
1181
1182 /* detach channels from this port */
1183 vgen_ldc_detach(portp->ldcp);
1184
1185 if (portp->num_ldcs != 0) {
1186 kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t));
1187 portp->num_ldcs = 0;
1188 }
1189
1190 mutex_destroy(&portp->lock);
1191 KMEM_FREE(portp);
1192
1193 DBG1(vgenp, NULL, "port(%d):exit\n", port_num);
1194 }
1195
1196 /* add a port to port list */
1197 static void
vgen_port_list_insert(vgen_port_t * portp)1198 vgen_port_list_insert(vgen_port_t *portp)
1199 {
1200 vgen_portlist_t *plistp;
1201 vgen_t *vgenp;
1202
1203 vgenp = portp->vgenp;
1204 plistp = &(vgenp->vgenports);
1205
1206 if (plistp->headp == NULL) {
1207 plistp->headp = portp;
1208 } else {
1209 plistp->tailp->nextp = portp;
1210 }
1211 plistp->tailp = portp;
1212 portp->nextp = NULL;
1213 }
1214
1215 /* remove a port from port list */
1216 static void
vgen_port_list_remove(vgen_port_t * portp)1217 vgen_port_list_remove(vgen_port_t *portp)
1218 {
1219 vgen_port_t *prevp;
1220 vgen_port_t *nextp;
1221 vgen_portlist_t *plistp;
1222 vgen_t *vgenp;
1223
1224 vgenp = portp->vgenp;
1225
1226 plistp = &(vgenp->vgenports);
1227
1228 if (plistp->headp == NULL)
1229 return;
1230
1231 if (portp == plistp->headp) {
1232 plistp->headp = portp->nextp;
1233 if (portp == plistp->tailp)
1234 plistp->tailp = plistp->headp;
1235 } else {
1236 for (prevp = plistp->headp;
1237 ((nextp = prevp->nextp) != NULL) && (nextp != portp);
1238 prevp = nextp)
1239 ;
1240 if (nextp == portp) {
1241 prevp->nextp = portp->nextp;
1242 }
1243 if (portp == plistp->tailp)
1244 plistp->tailp = prevp;
1245 }
1246 }
1247
1248 /* lookup a port in the list based on port_num */
1249 static vgen_port_t *
vgen_port_lookup(vgen_portlist_t * plistp,int port_num)1250 vgen_port_lookup(vgen_portlist_t *plistp, int port_num)
1251 {
1252 vgen_port_t *portp = NULL;
1253
1254 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
1255 if (portp->port_num == port_num) {
1256 break;
1257 }
1258 }
1259
1260 return (portp);
1261 }
1262
1263 static void
vgen_port_init(vgen_port_t * portp)1264 vgen_port_init(vgen_port_t *portp)
1265 {
1266 /* Add the port to the specified vlans */
1267 vgen_vlan_add_ids(portp);
1268
1269 /* Bring up the channel */
1270 (void) vgen_ldc_init(portp->ldcp);
1271 }
1272
1273 static void
vgen_port_uninit(vgen_port_t * portp)1274 vgen_port_uninit(vgen_port_t *portp)
1275 {
1276 vgen_ldc_uninit(portp->ldcp);
1277
1278 /* remove the port from vlans it has been assigned to */
1279 vgen_vlan_remove_ids(portp);
1280 }
1281
1282 /*
1283 * Scan the machine description for this instance of vnet
1284 * and read its properties. Called only from vgen_init().
1285 * Returns: 0 on success, 1 on failure.
1286 */
1287 static int
vgen_read_mdprops(vgen_t * vgenp)1288 vgen_read_mdprops(vgen_t *vgenp)
1289 {
1290 vnet_t *vnetp = vgenp->vnetp;
1291 md_t *mdp = NULL;
1292 mde_cookie_t rootnode;
1293 mde_cookie_t *listp = NULL;
1294 uint64_t cfgh;
1295 char *name;
1296 int rv = 1;
1297 int num_nodes = 0;
1298 int num_devs = 0;
1299 int listsz = 0;
1300 int i;
1301
1302 if ((mdp = md_get_handle()) == NULL) {
1303 return (rv);
1304 }
1305
1306 num_nodes = md_node_count(mdp);
1307 ASSERT(num_nodes > 0);
1308
1309 listsz = num_nodes * sizeof (mde_cookie_t);
1310 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1311
1312 rootnode = md_root_node(mdp);
1313
1314 /* search for all "virtual_device" nodes */
1315 num_devs = md_scan_dag(mdp, rootnode,
1316 md_find_name(mdp, vdev_propname),
1317 md_find_name(mdp, "fwd"), listp);
1318 if (num_devs <= 0) {
1319 goto vgen_readmd_exit;
1320 }
1321
1322 /*
1323 * Now loop through the list of virtual-devices looking for
1324 * devices with name "network" and for each such device compare
1325 * its instance with what we have from the 'reg' property to
1326 * find the right node in MD and then read all its properties.
1327 */
1328 for (i = 0; i < num_devs; i++) {
1329
1330 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1331 goto vgen_readmd_exit;
1332 }
1333
1334 /* is this a "network" device? */
1335 if (strcmp(name, vnet_propname) != 0)
1336 continue;
1337
1338 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1339 goto vgen_readmd_exit;
1340 }
1341
1342 /* is this the required instance of vnet? */
1343 if (vgenp->regprop != cfgh)
1344 continue;
1345
1346 /*
1347 * Read the 'linkprop' property to know if this vnet
1348 * device should get physical link updates from vswitch.
1349 */
1350 vgen_linkprop_read(vgenp, mdp, listp[i],
1351 &vnetp->pls_update);
1352
1353 /*
1354 * Read the mtu. Note that we set the mtu of vnet device within
1355 * this routine itself, after validating the range.
1356 */
1357 vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu);
1358 if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) {
1359 vnetp->mtu = ETHERMTU;
1360 }
1361 vgenp->max_frame_size = vnetp->mtu +
1362 sizeof (struct ether_header) + VLAN_TAGSZ;
1363
1364 /* read priority ether types */
1365 vgen_read_pri_eth_types(vgenp, mdp, listp[i]);
1366
1367 /* read vlan id properties of this vnet instance */
1368 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i],
1369 &vnetp->pvid, &vnetp->vids, &vnetp->nvids,
1370 &vnetp->default_vlan_id);
1371
1372 rv = 0;
1373 break;
1374 }
1375
1376 vgen_readmd_exit:
1377
1378 kmem_free(listp, listsz);
1379 (void) md_fini_handle(mdp);
1380 return (rv);
1381 }
1382
1383 /*
1384 * Read vlan id properties of the given MD node.
1385 * Arguments:
1386 * arg: device argument(vnet device or a port)
1387 * type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port)
1388 * mdp: machine description
1389 * node: md node cookie
1390 *
1391 * Returns:
1392 * pvidp: port-vlan-id of the node
1393 * vidspp: list of vlan-ids of the node
1394 * nvidsp: # of vlan-ids in the list
1395 * default_idp: default-vlan-id of the node(if node is vnet device)
1396 */
1397 static void
vgen_vlan_read_ids(void * arg,int type,md_t * mdp,mde_cookie_t node,uint16_t * pvidp,uint16_t ** vidspp,uint16_t * nvidsp,uint16_t * default_idp)1398 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1399 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp,
1400 uint16_t *default_idp)
1401 {
1402 vgen_t *vgenp;
1403 vnet_t *vnetp;
1404 vgen_port_t *portp;
1405 char *pvid_propname;
1406 char *vid_propname;
1407 uint_t nvids;
1408 uint32_t vids_size;
1409 int rv;
1410 int i;
1411 uint64_t *data;
1412 uint64_t val;
1413 int size;
1414 int inst;
1415
1416 if (type == VGEN_LOCAL) {
1417
1418 vgenp = (vgen_t *)arg;
1419 vnetp = vgenp->vnetp;
1420 pvid_propname = vgen_pvid_propname;
1421 vid_propname = vgen_vid_propname;
1422 inst = vnetp->instance;
1423
1424 } else if (type == VGEN_PEER) {
1425
1426 portp = (vgen_port_t *)arg;
1427 vgenp = portp->vgenp;
1428 vnetp = vgenp->vnetp;
1429 pvid_propname = port_pvid_propname;
1430 vid_propname = port_vid_propname;
1431 inst = portp->port_num;
1432
1433 } else {
1434 return;
1435 }
1436
1437 if (type == VGEN_LOCAL && default_idp != NULL) {
1438 rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val);
1439 if (rv != 0) {
1440 DWARN(vgenp, NULL, "prop(%s) not found",
1441 vgen_dvid_propname);
1442
1443 *default_idp = vnet_default_vlan_id;
1444 } else {
1445 *default_idp = val & 0xFFF;
1446 DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname,
1447 inst, *default_idp);
1448 }
1449 }
1450
1451 rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1452 if (rv != 0) {
1453 DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname);
1454 *pvidp = vnet_default_vlan_id;
1455 } else {
1456
1457 *pvidp = val & 0xFFF;
1458 DBG2(vgenp, NULL, "%s(%d): (%d)\n",
1459 pvid_propname, inst, *pvidp);
1460 }
1461
1462 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1463 &size);
1464 if (rv != 0) {
1465 DBG2(vgenp, NULL, "prop(%s) not found", vid_propname);
1466 size = 0;
1467 } else {
1468 size /= sizeof (uint64_t);
1469 }
1470 nvids = size;
1471
1472 if (nvids != 0) {
1473 DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst);
1474 vids_size = sizeof (uint16_t) * nvids;
1475 *vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1476 for (i = 0; i < nvids; i++) {
1477 (*vidspp)[i] = data[i] & 0xFFFF;
1478 DBG2(vgenp, NULL, " %d ", (*vidspp)[i]);
1479 }
1480 DBG2(vgenp, NULL, "\n");
1481 }
1482
1483 *nvidsp = nvids;
1484 }
1485
1486 /*
1487 * Create a vlan id hash table for the given port.
1488 */
1489 static void
vgen_vlan_create_hash(vgen_port_t * portp)1490 vgen_vlan_create_hash(vgen_port_t *portp)
1491 {
1492 char hashname[MAXNAMELEN];
1493
1494 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
1495 portp->port_num);
1496
1497 portp->vlan_nchains = vgen_vlan_nchains;
1498 portp->vlan_hashp = mod_hash_create_idhash(hashname,
1499 portp->vlan_nchains, mod_hash_null_valdtor);
1500 }
1501
1502 /*
1503 * Destroy the vlan id hash table in the given port.
1504 */
1505 static void
vgen_vlan_destroy_hash(vgen_port_t * portp)1506 vgen_vlan_destroy_hash(vgen_port_t *portp)
1507 {
1508 if (portp->vlan_hashp != NULL) {
1509 mod_hash_destroy_hash(portp->vlan_hashp);
1510 portp->vlan_hashp = NULL;
1511 portp->vlan_nchains = 0;
1512 }
1513 }
1514
1515 /*
1516 * Add a port to the vlans specified in its port properites.
1517 */
1518 static void
vgen_vlan_add_ids(vgen_port_t * portp)1519 vgen_vlan_add_ids(vgen_port_t *portp)
1520 {
1521 int rv;
1522 int i;
1523
1524 rv = mod_hash_insert(portp->vlan_hashp,
1525 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1526 (mod_hash_val_t)B_TRUE);
1527 ASSERT(rv == 0);
1528
1529 for (i = 0; i < portp->nvids; i++) {
1530 rv = mod_hash_insert(portp->vlan_hashp,
1531 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1532 (mod_hash_val_t)B_TRUE);
1533 ASSERT(rv == 0);
1534 }
1535 }
1536
1537 /*
1538 * Remove a port from the vlans it has been assigned to.
1539 */
1540 static void
vgen_vlan_remove_ids(vgen_port_t * portp)1541 vgen_vlan_remove_ids(vgen_port_t *portp)
1542 {
1543 int rv;
1544 int i;
1545 mod_hash_val_t vp;
1546
1547 rv = mod_hash_remove(portp->vlan_hashp,
1548 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1549 (mod_hash_val_t *)&vp);
1550 ASSERT(rv == 0);
1551
1552 for (i = 0; i < portp->nvids; i++) {
1553 rv = mod_hash_remove(portp->vlan_hashp,
1554 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1555 (mod_hash_val_t *)&vp);
1556 ASSERT(rv == 0);
1557 }
1558 }
1559
1560 /*
1561 * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame,
1562 * then the vlan-id is available in the tag; otherwise, its vlan id is
1563 * implicitly obtained from the port-vlan-id of the vnet device.
1564 * The vlan id determined is returned in vidp.
1565 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1566 */
1567 static boolean_t
vgen_frame_lookup_vid(vnet_t * vnetp,struct ether_header * ehp,uint16_t * vidp)1568 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp)
1569 {
1570 struct ether_vlan_header *evhp;
1571
1572 /* If it's a tagged frame, get the vlan id from vlan header */
1573 if (ehp->ether_type == ETHERTYPE_VLAN) {
1574
1575 evhp = (struct ether_vlan_header *)ehp;
1576 *vidp = VLAN_ID(ntohs(evhp->ether_tci));
1577 return (B_TRUE);
1578 }
1579
1580 /* Untagged frame, vlan-id is the pvid of vnet device */
1581 *vidp = vnetp->pvid;
1582 return (B_FALSE);
1583 }
1584
1585 /*
1586 * Find the given vlan id in the hash table.
1587 * Return: B_TRUE if the id is found; B_FALSE if not found.
1588 */
1589 static boolean_t
vgen_vlan_lookup(mod_hash_t * vlan_hashp,uint16_t vid)1590 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1591 {
1592 int rv;
1593 mod_hash_val_t vp;
1594
1595 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1596
1597 if (rv != 0)
1598 return (B_FALSE);
1599
1600 return (B_TRUE);
1601 }
1602
1603 /*
1604 * This function reads "priority-ether-types" property from md. This property
1605 * is used to enable support for priority frames. Applications which need
1606 * guaranteed and timely delivery of certain high priority frames to/from
1607 * a vnet or vsw within ldoms, should configure this property by providing
1608 * the ether type(s) for which the priority facility is needed.
1609 * Normal data frames are delivered over a ldc channel using the descriptor
1610 * ring mechanism which is constrained by factors such as descriptor ring size,
1611 * the rate at which the ring is processed at the peer ldc end point, etc.
1612 * The priority mechanism provides an Out-Of-Band path to send/receive frames
1613 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1614 * descriptor ring path and enables a more reliable and timely delivery of
1615 * frames to the peer.
1616 */
1617 static void
vgen_read_pri_eth_types(vgen_t * vgenp,md_t * mdp,mde_cookie_t node)1618 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node)
1619 {
1620 int rv;
1621 uint16_t *types;
1622 uint64_t *data;
1623 int size;
1624 int i;
1625 size_t mblk_sz;
1626
1627 rv = md_get_prop_data(mdp, node, pri_types_propname,
1628 (uint8_t **)&data, &size);
1629 if (rv != 0) {
1630 /*
1631 * Property may not exist if we are running pre-ldoms1.1 f/w.
1632 * Check if 'vgen_pri_eth_type' has been set in that case.
1633 */
1634 if (vgen_pri_eth_type != 0) {
1635 size = sizeof (vgen_pri_eth_type);
1636 data = &vgen_pri_eth_type;
1637 } else {
1638 DBG2(vgenp, NULL,
1639 "prop(%s) not found", pri_types_propname);
1640 size = 0;
1641 }
1642 }
1643
1644 if (size == 0) {
1645 vgenp->pri_num_types = 0;
1646 return;
1647 }
1648
1649 /*
1650 * we have some priority-ether-types defined;
1651 * allocate a table of these types and also
1652 * allocate a pool of mblks to transmit these
1653 * priority packets.
1654 */
1655 size /= sizeof (uint64_t);
1656 vgenp->pri_num_types = size;
1657 vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1658 for (i = 0, types = vgenp->pri_types; i < size; i++) {
1659 types[i] = data[i] & 0xFFFF;
1660 }
1661 mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7;
1662 (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL,
1663 &vgenp->pri_tx_vmp);
1664 }
1665
1666 static void
vgen_mtu_read(vgen_t * vgenp,md_t * mdp,mde_cookie_t node,uint32_t * mtu)1667 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1668 {
1669 int rv;
1670 uint64_t val;
1671 char *mtu_propname;
1672
1673 mtu_propname = vgen_mtu_propname;
1674
1675 rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1676 if (rv != 0) {
1677 DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname);
1678 *mtu = vnet_ethermtu;
1679 } else {
1680
1681 *mtu = val & 0xFFFF;
1682 DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname,
1683 vgenp->instance, *mtu);
1684 }
1685 }
1686
1687 static void
vgen_linkprop_read(vgen_t * vgenp,md_t * mdp,mde_cookie_t node,boolean_t * pls)1688 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node,
1689 boolean_t *pls)
1690 {
1691 int rv;
1692 uint64_t val;
1693 char *linkpropname;
1694
1695 linkpropname = vgen_linkprop_propname;
1696
1697 rv = md_get_prop_val(mdp, node, linkpropname, &val);
1698 if (rv != 0) {
1699 DWARN(vgenp, NULL, "prop(%s) not found", linkpropname);
1700 *pls = B_FALSE;
1701 } else {
1702
1703 *pls = (val & 0x1) ? B_TRUE : B_FALSE;
1704 DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname,
1705 vgenp->instance, *pls);
1706 }
1707 }
1708
1709 /* register with MD event generator */
1710 static int
vgen_mdeg_reg(vgen_t * vgenp)1711 vgen_mdeg_reg(vgen_t *vgenp)
1712 {
1713 mdeg_prop_spec_t *pspecp;
1714 mdeg_node_spec_t *parentp;
1715 uint_t templatesz;
1716 int rv;
1717 mdeg_handle_t dev_hdl = NULL;
1718 mdeg_handle_t port_hdl = NULL;
1719
1720 templatesz = sizeof (vgen_prop_template);
1721 pspecp = kmem_zalloc(templatesz, KM_NOSLEEP);
1722 if (pspecp == NULL) {
1723 return (DDI_FAILURE);
1724 }
1725 parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP);
1726 if (parentp == NULL) {
1727 kmem_free(pspecp, templatesz);
1728 return (DDI_FAILURE);
1729 }
1730
1731 bcopy(vgen_prop_template, pspecp, templatesz);
1732
1733 /*
1734 * NOTE: The instance here refers to the value of "reg" property and
1735 * not the dev_info instance (ddi_get_instance()) of vnet.
1736 */
1737 VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop);
1738
1739 parentp->namep = "virtual-device";
1740 parentp->specp = pspecp;
1741
1742 /* save parentp in vgen_t */
1743 vgenp->mdeg_parentp = parentp;
1744
1745 /*
1746 * Register an interest in 'virtual-device' nodes with a
1747 * 'name' property of 'network'
1748 */
1749 rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl);
1750 if (rv != MDEG_SUCCESS) {
1751 DERR(vgenp, NULL, "mdeg_register failed\n");
1752 goto mdeg_reg_fail;
1753 }
1754
1755 /* Register an interest in 'port' nodes */
1756 rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp,
1757 &port_hdl);
1758 if (rv != MDEG_SUCCESS) {
1759 DERR(vgenp, NULL, "mdeg_register failed\n");
1760 goto mdeg_reg_fail;
1761 }
1762
1763 /* save mdeg handle in vgen_t */
1764 vgenp->mdeg_dev_hdl = dev_hdl;
1765 vgenp->mdeg_port_hdl = port_hdl;
1766
1767 return (DDI_SUCCESS);
1768
1769 mdeg_reg_fail:
1770 if (dev_hdl != NULL) {
1771 (void) mdeg_unregister(dev_hdl);
1772 }
1773 KMEM_FREE(parentp);
1774 kmem_free(pspecp, templatesz);
1775 vgenp->mdeg_parentp = NULL;
1776 return (DDI_FAILURE);
1777 }
1778
1779 /* unregister with MD event generator */
1780 static void
vgen_mdeg_unreg(vgen_t * vgenp)1781 vgen_mdeg_unreg(vgen_t *vgenp)
1782 {
1783 if (vgenp->mdeg_dev_hdl != NULL) {
1784 (void) mdeg_unregister(vgenp->mdeg_dev_hdl);
1785 vgenp->mdeg_dev_hdl = NULL;
1786 }
1787 if (vgenp->mdeg_port_hdl != NULL) {
1788 (void) mdeg_unregister(vgenp->mdeg_port_hdl);
1789 vgenp->mdeg_port_hdl = NULL;
1790 }
1791
1792 if (vgenp->mdeg_parentp != NULL) {
1793 kmem_free(vgenp->mdeg_parentp->specp,
1794 sizeof (vgen_prop_template));
1795 KMEM_FREE(vgenp->mdeg_parentp);
1796 vgenp->mdeg_parentp = NULL;
1797 }
1798 }
1799
1800 /* mdeg callback function for the port node */
1801 static int
vgen_mdeg_port_cb(void * cb_argp,mdeg_result_t * resp)1802 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp)
1803 {
1804 int idx;
1805 int vsw_idx = -1;
1806 uint64_t val;
1807 vgen_t *vgenp;
1808
1809 if ((resp == NULL) || (cb_argp == NULL)) {
1810 return (MDEG_FAILURE);
1811 }
1812
1813 vgenp = (vgen_t *)cb_argp;
1814 DBG1(vgenp, NULL, "enter\n");
1815
1816 mutex_enter(&vgenp->lock);
1817
1818 DBG1(vgenp, NULL, "ports: removed(%x), "
1819 "added(%x), updated(%x)\n", resp->removed.nelem,
1820 resp->added.nelem, resp->match_curr.nelem);
1821
1822 for (idx = 0; idx < resp->removed.nelem; idx++) {
1823 (void) vgen_remove_port(vgenp, resp->removed.mdp,
1824 resp->removed.mdep[idx]);
1825 }
1826
1827 if (vgenp->vsw_portp == NULL) {
1828 /*
1829 * find vsw_port and add it first, because other ports need
1830 * this when adding fdb entry (see vgen_port_init()).
1831 */
1832 for (idx = 0; idx < resp->added.nelem; idx++) {
1833 if (!(md_get_prop_val(resp->added.mdp,
1834 resp->added.mdep[idx], swport_propname, &val))) {
1835 if (val == 0) {
1836 /*
1837 * This port is connected to the
1838 * vsw on service domain.
1839 */
1840 vsw_idx = idx;
1841 if (vgen_add_port(vgenp,
1842 resp->added.mdp,
1843 resp->added.mdep[idx]) !=
1844 DDI_SUCCESS) {
1845 cmn_err(CE_NOTE, "vnet%d Could "
1846 "not initialize virtual "
1847 "switch port.",
1848 vgenp->instance);
1849 mutex_exit(&vgenp->lock);
1850 return (MDEG_FAILURE);
1851 }
1852 break;
1853 }
1854 }
1855 }
1856 if (vsw_idx == -1) {
1857 DWARN(vgenp, NULL, "can't find vsw_port\n");
1858 mutex_exit(&vgenp->lock);
1859 return (MDEG_FAILURE);
1860 }
1861 }
1862
1863 for (idx = 0; idx < resp->added.nelem; idx++) {
1864 if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */
1865 continue;
1866
1867 /* If this port can't be added just skip it. */
1868 (void) vgen_add_port(vgenp, resp->added.mdp,
1869 resp->added.mdep[idx]);
1870 }
1871
1872 for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1873 (void) vgen_update_port(vgenp, resp->match_curr.mdp,
1874 resp->match_curr.mdep[idx],
1875 resp->match_prev.mdp,
1876 resp->match_prev.mdep[idx]);
1877 }
1878
1879 mutex_exit(&vgenp->lock);
1880 DBG1(vgenp, NULL, "exit\n");
1881 return (MDEG_SUCCESS);
1882 }
1883
1884 /* mdeg callback function for the vnet node */
1885 static int
vgen_mdeg_cb(void * cb_argp,mdeg_result_t * resp)1886 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1887 {
1888 vgen_t *vgenp;
1889 vnet_t *vnetp;
1890 md_t *mdp;
1891 mde_cookie_t node;
1892 uint64_t inst;
1893 char *node_name = NULL;
1894
1895 if ((resp == NULL) || (cb_argp == NULL)) {
1896 return (MDEG_FAILURE);
1897 }
1898
1899 vgenp = (vgen_t *)cb_argp;
1900 vnetp = vgenp->vnetp;
1901
1902 DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d"
1903 " : prev matched %d", resp->added.nelem, resp->removed.nelem,
1904 resp->match_curr.nelem, resp->match_prev.nelem);
1905
1906 mutex_enter(&vgenp->lock);
1907
1908 /*
1909 * We get an initial callback for this node as 'added' after
1910 * registering with mdeg. Note that we would have already gathered
1911 * information about this vnet node by walking MD earlier during attach
1912 * (in vgen_read_mdprops()). So, there is a window where the properties
1913 * of this node might have changed when we get this initial 'added'
1914 * callback. We handle this as if an update occured and invoke the same
1915 * function which handles updates to the properties of this vnet-node
1916 * if any. A non-zero 'match' value indicates that the MD has been
1917 * updated and that a 'network' node is present which may or may not
1918 * have been updated. It is up to the clients to examine their own
1919 * nodes and determine if they have changed.
1920 */
1921 if (resp->added.nelem != 0) {
1922
1923 if (resp->added.nelem != 1) {
1924 cmn_err(CE_NOTE, "!vnet%d: number of nodes added "
1925 "invalid: %d\n", vnetp->instance,
1926 resp->added.nelem);
1927 goto vgen_mdeg_cb_err;
1928 }
1929
1930 mdp = resp->added.mdp;
1931 node = resp->added.mdep[0];
1932
1933 } else if (resp->match_curr.nelem != 0) {
1934
1935 if (resp->match_curr.nelem != 1) {
1936 cmn_err(CE_NOTE, "!vnet%d: number of nodes updated "
1937 "invalid: %d\n", vnetp->instance,
1938 resp->match_curr.nelem);
1939 goto vgen_mdeg_cb_err;
1940 }
1941
1942 mdp = resp->match_curr.mdp;
1943 node = resp->match_curr.mdep[0];
1944
1945 } else {
1946 goto vgen_mdeg_cb_err;
1947 }
1948
1949 /* Validate name and instance */
1950 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1951 DERR(vgenp, NULL, "unable to get node name\n");
1952 goto vgen_mdeg_cb_err;
1953 }
1954
1955 /* is this a virtual-network device? */
1956 if (strcmp(node_name, vnet_propname) != 0) {
1957 DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name);
1958 goto vgen_mdeg_cb_err;
1959 }
1960
1961 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1962 DERR(vgenp, NULL, "prop(cfg-handle) not found\n");
1963 goto vgen_mdeg_cb_err;
1964 }
1965
1966 /* is this the right instance of vnet? */
1967 if (inst != vgenp->regprop) {
1968 DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst);
1969 goto vgen_mdeg_cb_err;
1970 }
1971
1972 vgen_update_md_prop(vgenp, mdp, node);
1973
1974 mutex_exit(&vgenp->lock);
1975 return (MDEG_SUCCESS);
1976
1977 vgen_mdeg_cb_err:
1978 mutex_exit(&vgenp->lock);
1979 return (MDEG_FAILURE);
1980 }
1981
1982 /*
1983 * Check to see if the relevant properties in the specified node have
1984 * changed, and if so take the appropriate action.
1985 */
1986 static void
vgen_update_md_prop(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)1987 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
1988 {
1989 uint16_t pvid;
1990 uint16_t *vids;
1991 uint16_t nvids;
1992 vnet_t *vnetp = vgenp->vnetp;
1993 uint32_t mtu;
1994 boolean_t pls_update;
1995 enum { MD_init = 0x1,
1996 MD_vlans = 0x2,
1997 MD_mtu = 0x4,
1998 MD_pls = 0x8 } updated;
1999 int rv;
2000
2001 updated = MD_init;
2002
2003 /* Read the vlan ids */
2004 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids,
2005 &nvids, NULL);
2006
2007 /* Determine if there are any vlan id updates */
2008 if ((pvid != vnetp->pvid) || /* pvid changed? */
2009 (nvids != vnetp->nvids) || /* # of vids changed? */
2010 ((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */
2011 bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) {
2012 updated |= MD_vlans;
2013 }
2014
2015 /* Read mtu */
2016 vgen_mtu_read(vgenp, mdp, mdex, &mtu);
2017 if (mtu != vnetp->mtu) {
2018 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2019 updated |= MD_mtu;
2020 } else {
2021 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update"
2022 " as the specified value:%d is invalid\n",
2023 vnetp->instance, mtu);
2024 }
2025 }
2026
2027 /*
2028 * Read the 'linkprop' property.
2029 */
2030 vgen_linkprop_read(vgenp, mdp, mdex, &pls_update);
2031 if (pls_update != vnetp->pls_update) {
2032 updated |= MD_pls;
2033 }
2034
2035 /* Now process the updated props */
2036
2037 if (updated & MD_vlans) {
2038
2039 /* save the new vlan ids */
2040 vnetp->pvid = pvid;
2041 if (vnetp->nvids != 0) {
2042 kmem_free(vnetp->vids,
2043 sizeof (uint16_t) * vnetp->nvids);
2044 vnetp->nvids = 0;
2045 }
2046 if (nvids != 0) {
2047 vnetp->nvids = nvids;
2048 vnetp->vids = vids;
2049 }
2050
2051 /* reset vlan-unaware peers (ver < 1.3) and restart handshake */
2052 vgen_reset_vlan_unaware_ports(vgenp);
2053
2054 } else {
2055
2056 if (nvids != 0) {
2057 kmem_free(vids, sizeof (uint16_t) * nvids);
2058 }
2059 }
2060
2061 if (updated & MD_mtu) {
2062
2063 DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n",
2064 vnetp->mtu, mtu);
2065
2066 rv = vnet_mtu_update(vnetp, mtu);
2067 if (rv == 0) {
2068 vgenp->max_frame_size = mtu +
2069 sizeof (struct ether_header) + VLAN_TAGSZ;
2070 }
2071 }
2072
2073 if (updated & MD_pls) {
2074 /* enable/disable physical link state updates */
2075 vnetp->pls_update = pls_update;
2076 mutex_exit(&vgenp->lock);
2077
2078 /* reset vsw-port to re-negotiate with the updated prop. */
2079 vgen_reset_vsw_port(vgenp);
2080
2081 mutex_enter(&vgenp->lock);
2082 }
2083 }
2084
2085 /* add a new port to the device */
2086 static int
vgen_add_port(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2087 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2088 {
2089 vgen_port_t *portp;
2090 int rv;
2091
2092 portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP);
2093
2094 rv = vgen_port_read_props(portp, vgenp, mdp, mdex);
2095 if (rv != DDI_SUCCESS) {
2096 KMEM_FREE(portp);
2097 return (DDI_FAILURE);
2098 }
2099
2100 rv = vgen_port_attach(portp);
2101 if (rv != DDI_SUCCESS) {
2102 return (DDI_FAILURE);
2103 }
2104
2105 return (DDI_SUCCESS);
2106 }
2107
2108 /* read properties of the port from its md node */
2109 static int
vgen_port_read_props(vgen_port_t * portp,vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2110 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp,
2111 mde_cookie_t mdex)
2112 {
2113 uint64_t port_num;
2114 uint64_t *ldc_ids;
2115 uint64_t macaddr;
2116 uint64_t val;
2117 int num_ldcs;
2118 int i;
2119 int addrsz;
2120 int num_nodes = 0;
2121 int listsz = 0;
2122 mde_cookie_t *listp = NULL;
2123 uint8_t *addrp;
2124 struct ether_addr ea;
2125
2126 /* read "id" property to get the port number */
2127 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2128 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2129 return (DDI_FAILURE);
2130 }
2131
2132 /*
2133 * Find the channel endpoint node(s) under this port node.
2134 */
2135 if ((num_nodes = md_node_count(mdp)) <= 0) {
2136 DWARN(vgenp, NULL, "invalid number of nodes found (%d)",
2137 num_nodes);
2138 return (DDI_FAILURE);
2139 }
2140
2141 /* allocate space for node list */
2142 listsz = num_nodes * sizeof (mde_cookie_t);
2143 listp = kmem_zalloc(listsz, KM_NOSLEEP);
2144 if (listp == NULL)
2145 return (DDI_FAILURE);
2146
2147 num_ldcs = md_scan_dag(mdp, mdex,
2148 md_find_name(mdp, channel_propname),
2149 md_find_name(mdp, "fwd"), listp);
2150
2151 if (num_ldcs <= 0) {
2152 DWARN(vgenp, NULL, "can't find %s nodes", channel_propname);
2153 kmem_free(listp, listsz);
2154 return (DDI_FAILURE);
2155 }
2156
2157 if (num_ldcs > 1) {
2158 DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n",
2159 port_num, num_ldcs);
2160 }
2161
2162 ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP);
2163 if (ldc_ids == NULL) {
2164 kmem_free(listp, listsz);
2165 return (DDI_FAILURE);
2166 }
2167
2168 for (i = 0; i < num_ldcs; i++) {
2169 /* read channel ids */
2170 if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) {
2171 DWARN(vgenp, NULL, "prop(%s) not found\n",
2172 id_propname);
2173 kmem_free(listp, listsz);
2174 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2175 return (DDI_FAILURE);
2176 }
2177 DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]);
2178 }
2179
2180 kmem_free(listp, listsz);
2181
2182 if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp,
2183 &addrsz)) {
2184 DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname);
2185 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2186 return (DDI_FAILURE);
2187 }
2188
2189 if (addrsz < ETHERADDRL) {
2190 DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz);
2191 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2192 return (DDI_FAILURE);
2193 }
2194
2195 macaddr = *((uint64_t *)addrp);
2196
2197 DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr);
2198
2199 for (i = ETHERADDRL - 1; i >= 0; i--) {
2200 ea.ether_addr_octet[i] = macaddr & 0xFF;
2201 macaddr >>= 8;
2202 }
2203
2204 if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) {
2205 if (val == 0) {
2206 /* This port is connected to the vswitch */
2207 portp->is_vsw_port = B_TRUE;
2208 } else {
2209 portp->is_vsw_port = B_FALSE;
2210 }
2211 }
2212
2213 /* now update all properties into the port */
2214 portp->vgenp = vgenp;
2215 portp->port_num = port_num;
2216 ether_copy(&ea, &portp->macaddr);
2217 portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP);
2218 bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs);
2219 portp->num_ldcs = num_ldcs;
2220
2221 /* read vlan id properties of this port node */
2222 vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid,
2223 &portp->vids, &portp->nvids, NULL);
2224
2225 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t));
2226
2227 return (DDI_SUCCESS);
2228 }
2229
2230 /* remove a port from the device */
2231 static int
vgen_remove_port(vgen_t * vgenp,md_t * mdp,mde_cookie_t mdex)2232 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex)
2233 {
2234 uint64_t port_num;
2235 vgen_port_t *portp;
2236 vgen_portlist_t *plistp;
2237
2238 /* read "id" property to get the port number */
2239 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) {
2240 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2241 return (DDI_FAILURE);
2242 }
2243
2244 plistp = &(vgenp->vgenports);
2245
2246 WRITE_ENTER(&plistp->rwlock);
2247 portp = vgen_port_lookup(plistp, (int)port_num);
2248 if (portp == NULL) {
2249 DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num);
2250 RW_EXIT(&plistp->rwlock);
2251 return (DDI_FAILURE);
2252 }
2253
2254 vgen_port_detach_mdeg(portp);
2255 RW_EXIT(&plistp->rwlock);
2256
2257 return (DDI_SUCCESS);
2258 }
2259
2260 /* attach a port to the device based on mdeg data */
2261 static int
vgen_port_attach(vgen_port_t * portp)2262 vgen_port_attach(vgen_port_t *portp)
2263 {
2264 vgen_portlist_t *plistp;
2265 vgen_t *vgenp;
2266 uint64_t *ldcids;
2267 mac_register_t *macp;
2268 vio_net_res_type_t type;
2269 int rv;
2270
2271 ASSERT(portp != NULL);
2272 vgenp = portp->vgenp;
2273 ldcids = portp->ldc_ids;
2274
2275 DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n",
2276 portp->port_num, ldcids[0]);
2277
2278 mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL);
2279
2280 /*
2281 * attach the channel under the port using its channel id;
2282 * note that we only support one channel per port for now.
2283 */
2284 if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) {
2285 vgen_port_detach(portp);
2286 return (DDI_FAILURE);
2287 }
2288
2289 /* create vlan id hash table */
2290 vgen_vlan_create_hash(portp);
2291
2292 if (portp->is_vsw_port == B_TRUE) {
2293 /* This port is connected to the switch port */
2294 (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE);
2295 type = VIO_NET_RES_LDC_SERVICE;
2296 } else {
2297 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
2298 type = VIO_NET_RES_LDC_GUEST;
2299 }
2300
2301 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
2302 vgen_port_detach(portp);
2303 return (DDI_FAILURE);
2304 }
2305 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
2306 macp->m_driver = portp;
2307 macp->m_dip = vgenp->vnetdip;
2308 macp->m_src_addr = (uint8_t *)&(vgenp->macaddr);
2309 macp->m_callbacks = &vgen_m_callbacks;
2310 macp->m_min_sdu = 0;
2311 macp->m_max_sdu = ETHERMTU;
2312
2313 mutex_enter(&portp->lock);
2314 rv = vio_net_resource_reg(macp, type, vgenp->macaddr,
2315 portp->macaddr, &portp->vhp, &portp->vcb);
2316 mutex_exit(&portp->lock);
2317 mac_free(macp);
2318
2319 if (rv == 0) {
2320 /* link it into the list of ports */
2321 plistp = &(vgenp->vgenports);
2322 WRITE_ENTER(&plistp->rwlock);
2323 vgen_port_list_insert(portp);
2324 RW_EXIT(&plistp->rwlock);
2325
2326 if (portp->is_vsw_port == B_TRUE) {
2327 /* We now have the vswitch port attached */
2328 vgenp->vsw_portp = portp;
2329 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0);
2330 }
2331 } else {
2332 DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p",
2333 portp);
2334 vgen_port_detach(portp);
2335 }
2336
2337 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2338 return (DDI_SUCCESS);
2339 }
2340
2341 /* detach a port from the device based on mdeg data */
2342 static void
vgen_port_detach_mdeg(vgen_port_t * portp)2343 vgen_port_detach_mdeg(vgen_port_t *portp)
2344 {
2345 vgen_t *vgenp = portp->vgenp;
2346
2347 DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num);
2348
2349 mutex_enter(&portp->lock);
2350
2351 /* stop the port if needed */
2352 if (portp->flags & VGEN_STARTED) {
2353 vgen_port_uninit(portp);
2354 portp->flags &= ~(VGEN_STARTED);
2355 }
2356
2357 mutex_exit(&portp->lock);
2358 vgen_port_detach(portp);
2359
2360 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num);
2361 }
2362
2363 static int
vgen_update_port(vgen_t * vgenp,md_t * curr_mdp,mde_cookie_t curr_mdex,md_t * prev_mdp,mde_cookie_t prev_mdex)2364 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2365 md_t *prev_mdp, mde_cookie_t prev_mdex)
2366 {
2367 uint64_t cport_num;
2368 uint64_t pport_num;
2369 vgen_portlist_t *plistp;
2370 vgen_port_t *portp;
2371 boolean_t updated_vlans = B_FALSE;
2372 uint16_t pvid;
2373 uint16_t *vids;
2374 uint16_t nvids;
2375
2376 /*
2377 * For now, we get port updates only if vlan ids changed.
2378 * We read the port num and do some sanity check.
2379 */
2380 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2381 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2382 return (DDI_FAILURE);
2383 }
2384
2385 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2386 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname);
2387 return (DDI_FAILURE);
2388 }
2389 if (cport_num != pport_num)
2390 return (DDI_FAILURE);
2391
2392 plistp = &(vgenp->vgenports);
2393
2394 READ_ENTER(&plistp->rwlock);
2395
2396 portp = vgen_port_lookup(plistp, (int)cport_num);
2397 if (portp == NULL) {
2398 DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num);
2399 RW_EXIT(&plistp->rwlock);
2400 return (DDI_FAILURE);
2401 }
2402
2403 /* Read the vlan ids */
2404 vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids,
2405 &nvids, NULL);
2406
2407 /* Determine if there are any vlan id updates */
2408 if ((pvid != portp->pvid) || /* pvid changed? */
2409 (nvids != portp->nvids) || /* # of vids changed? */
2410 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */
2411 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) {
2412 updated_vlans = B_TRUE;
2413 }
2414
2415 if (updated_vlans == B_FALSE) {
2416 RW_EXIT(&plistp->rwlock);
2417 return (DDI_FAILURE);
2418 }
2419
2420 /* remove the port from vlans it has been assigned to */
2421 vgen_vlan_remove_ids(portp);
2422
2423 /* save the new vlan ids */
2424 portp->pvid = pvid;
2425 if (portp->nvids != 0) {
2426 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids);
2427 portp->nvids = 0;
2428 }
2429 if (nvids != 0) {
2430 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP);
2431 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids);
2432 portp->nvids = nvids;
2433 kmem_free(vids, sizeof (uint16_t) * nvids);
2434 }
2435
2436 /* add port to the new vlans */
2437 vgen_vlan_add_ids(portp);
2438
2439 /* reset the port if it is vlan unaware (ver < 1.3) */
2440 vgen_vlan_unaware_port_reset(portp);
2441
2442 RW_EXIT(&plistp->rwlock);
2443
2444 return (DDI_SUCCESS);
2445 }
2446
2447 static uint64_t
vgen_port_stat(vgen_port_t * portp,uint_t stat)2448 vgen_port_stat(vgen_port_t *portp, uint_t stat)
2449 {
2450 return (vgen_ldc_stat(portp->ldcp, stat));
2451 }
2452
2453 /* attach the channel corresponding to the given ldc_id to the port */
2454 static int
vgen_ldc_attach(vgen_port_t * portp,uint64_t ldc_id)2455 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id)
2456 {
2457 vgen_t *vgenp;
2458 vgen_ldc_t *ldcp;
2459 ldc_attr_t attr;
2460 int status;
2461 ldc_status_t istatus;
2462 char kname[MAXNAMELEN];
2463 int instance;
2464 enum {AST_init = 0x0, AST_ldc_alloc = 0x1,
2465 AST_mutex_init = 0x2, AST_ldc_init = 0x4,
2466 AST_ldc_reg_cb = 0x8 } attach_state;
2467
2468 attach_state = AST_init;
2469 vgenp = portp->vgenp;
2470
2471 ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP);
2472 if (ldcp == NULL) {
2473 goto ldc_attach_failed;
2474 }
2475 ldcp->ldc_id = ldc_id;
2476 ldcp->portp = portp;
2477
2478 attach_state |= AST_ldc_alloc;
2479
2480 mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL);
2481 mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL);
2482 mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL);
2483 mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL);
2484 mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL);
2485 mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL);
2486 mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL);
2487 cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL);
2488
2489 attach_state |= AST_mutex_init;
2490
2491 attr.devclass = LDC_DEV_NT;
2492 attr.instance = vgenp->instance;
2493 attr.mode = LDC_MODE_UNRELIABLE;
2494 attr.mtu = vgen_ldc_mtu;
2495 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
2496 if (status != 0) {
2497 DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status);
2498 goto ldc_attach_failed;
2499 }
2500 attach_state |= AST_ldc_init;
2501
2502 status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp);
2503 if (status != 0) {
2504 DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n",
2505 status);
2506 goto ldc_attach_failed;
2507 }
2508 /*
2509 * allocate a message for ldc_read()s, big enough to hold ctrl and
2510 * data msgs, including raw data msgs used to recv priority frames.
2511 */
2512 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size;
2513 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP);
2514 attach_state |= AST_ldc_reg_cb;
2515
2516 (void) ldc_status(ldcp->ldc_handle, &istatus);
2517 ASSERT(istatus == LDC_INIT);
2518 ldcp->ldc_status = istatus;
2519
2520 /* Setup kstats for the channel */
2521 instance = vgenp->instance;
2522 (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id);
2523 ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats);
2524 if (ldcp->ksp == NULL) {
2525 goto ldc_attach_failed;
2526 }
2527
2528 /* initialize vgen_versions supported */
2529 bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions));
2530 vgen_reset_vnet_proto_ops(ldcp);
2531
2532 /* Link this channel to the port */
2533 portp->ldcp = ldcp;
2534
2535 ldcp->link_state = LINK_STATE_UNKNOWN;
2536 #ifdef VNET_IOC_DEBUG
2537 ldcp->link_down_forced = B_FALSE;
2538 #endif
2539 ldcp->flags |= CHANNEL_ATTACHED;
2540 return (DDI_SUCCESS);
2541
2542 ldc_attach_failed:
2543 if (attach_state & AST_ldc_reg_cb) {
2544 (void) ldc_unreg_callback(ldcp->ldc_handle);
2545 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2546 }
2547
2548 if (attach_state & AST_ldc_init) {
2549 (void) ldc_fini(ldcp->ldc_handle);
2550 }
2551 if (attach_state & AST_mutex_init) {
2552 mutex_destroy(&ldcp->tclock);
2553 mutex_destroy(&ldcp->txlock);
2554 mutex_destroy(&ldcp->cblock);
2555 mutex_destroy(&ldcp->wrlock);
2556 mutex_destroy(&ldcp->rxlock);
2557 mutex_destroy(&ldcp->pollq_lock);
2558 }
2559 if (attach_state & AST_ldc_alloc) {
2560 KMEM_FREE(ldcp);
2561 }
2562 return (DDI_FAILURE);
2563 }
2564
2565 /* detach a channel from the port */
2566 static void
vgen_ldc_detach(vgen_ldc_t * ldcp)2567 vgen_ldc_detach(vgen_ldc_t *ldcp)
2568 {
2569 vgen_port_t *portp;
2570 vgen_t *vgenp;
2571
2572 ASSERT(ldcp != NULL);
2573
2574 portp = ldcp->portp;
2575 vgenp = portp->vgenp;
2576
2577 if (ldcp->ldc_status != LDC_INIT) {
2578 DWARN(vgenp, ldcp, "ldc_status is not INIT\n");
2579 }
2580
2581 if (ldcp->flags & CHANNEL_ATTACHED) {
2582 ldcp->flags &= ~(CHANNEL_ATTACHED);
2583
2584 (void) ldc_unreg_callback(ldcp->ldc_handle);
2585 (void) ldc_fini(ldcp->ldc_handle);
2586
2587 kmem_free(ldcp->ldcmsg, ldcp->msglen);
2588 vgen_destroy_kstats(ldcp->ksp);
2589 ldcp->ksp = NULL;
2590 mutex_destroy(&ldcp->tclock);
2591 mutex_destroy(&ldcp->txlock);
2592 mutex_destroy(&ldcp->cblock);
2593 mutex_destroy(&ldcp->wrlock);
2594 mutex_destroy(&ldcp->rxlock);
2595 mutex_destroy(&ldcp->pollq_lock);
2596 mutex_destroy(&ldcp->msg_thr_lock);
2597 cv_destroy(&ldcp->msg_thr_cv);
2598
2599 KMEM_FREE(ldcp);
2600 }
2601 }
2602
2603 /* enable transmit/receive on the channel */
2604 static int
vgen_ldc_init(vgen_ldc_t * ldcp)2605 vgen_ldc_init(vgen_ldc_t *ldcp)
2606 {
2607 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2608 ldc_status_t istatus;
2609 int rv;
2610 enum { ST_init = 0x0, ST_ldc_open = 0x1,
2611 ST_cb_enable = 0x2} init_state;
2612 int flag = 0;
2613
2614 init_state = ST_init;
2615
2616 DBG1(vgenp, ldcp, "enter\n");
2617 LDC_LOCK(ldcp);
2618
2619 rv = ldc_open(ldcp->ldc_handle);
2620 if (rv != 0) {
2621 DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv);
2622 goto ldcinit_failed;
2623 }
2624 init_state |= ST_ldc_open;
2625
2626 (void) ldc_status(ldcp->ldc_handle, &istatus);
2627 if (istatus != LDC_OPEN && istatus != LDC_READY) {
2628 DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus);
2629 goto ldcinit_failed;
2630 }
2631 ldcp->ldc_status = istatus;
2632
2633 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE);
2634 if (rv != 0) {
2635 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv);
2636 goto ldcinit_failed;
2637 }
2638
2639 init_state |= ST_cb_enable;
2640
2641 vgen_ldc_up(ldcp);
2642
2643 (void) ldc_status(ldcp->ldc_handle, &istatus);
2644 if (istatus == LDC_UP) {
2645 DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus);
2646 }
2647
2648 ldcp->ldc_status = istatus;
2649
2650 ldcp->hphase = VH_PHASE0;
2651 ldcp->hstate = 0;
2652 ldcp->flags |= CHANNEL_STARTED;
2653
2654 vgen_setup_handshake_params(ldcp);
2655
2656 /* if channel is already UP - start handshake */
2657 if (istatus == LDC_UP) {
2658 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2659 if (ldcp->portp != vgenp->vsw_portp) {
2660 /*
2661 * As the channel is up, use this port from now on.
2662 */
2663 (void) atomic_swap_32(
2664 &ldcp->portp->use_vsw_port, B_FALSE);
2665 }
2666
2667 /* Initialize local session id */
2668 ldcp->local_sid = ddi_get_lbolt();
2669
2670 /* clear peer session id */
2671 ldcp->peer_sid = 0;
2672
2673 mutex_exit(&ldcp->tclock);
2674 mutex_exit(&ldcp->txlock);
2675 mutex_exit(&ldcp->wrlock);
2676 mutex_exit(&ldcp->rxlock);
2677 rv = vgen_handshake(vh_nextphase(ldcp));
2678 mutex_exit(&ldcp->cblock);
2679 if (rv != 0) {
2680 flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET :
2681 VGEN_FLAG_NEED_LDCRESET;
2682 (void) vgen_process_reset(ldcp, flag);
2683 }
2684 } else {
2685 LDC_UNLOCK(ldcp);
2686 }
2687
2688 return (DDI_SUCCESS);
2689
2690 ldcinit_failed:
2691 if (init_state & ST_cb_enable) {
2692 (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
2693 }
2694 if (init_state & ST_ldc_open) {
2695 (void) ldc_close(ldcp->ldc_handle);
2696 }
2697 LDC_UNLOCK(ldcp);
2698 DBG1(vgenp, ldcp, "exit\n");
2699 return (DDI_FAILURE);
2700 }
2701
2702 /* stop transmit/receive on the channel */
2703 static void
vgen_ldc_uninit(vgen_ldc_t * ldcp)2704 vgen_ldc_uninit(vgen_ldc_t *ldcp)
2705 {
2706 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2707
2708 DBG1(vgenp, ldcp, "enter\n");
2709
2710 LDC_LOCK(ldcp);
2711
2712 if ((ldcp->flags & CHANNEL_STARTED) == 0) {
2713 LDC_UNLOCK(ldcp);
2714 DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n");
2715 return;
2716 }
2717
2718 LDC_UNLOCK(ldcp);
2719
2720 while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2721 delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY));
2722 }
2723
2724 (void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT);
2725
2726 DBG1(vgenp, ldcp, "exit\n");
2727 }
2728
2729 /*
2730 * Create a descriptor ring, that will be exported to the peer for mapping.
2731 */
2732 static int
vgen_create_dring(vgen_ldc_t * ldcp)2733 vgen_create_dring(vgen_ldc_t *ldcp)
2734 {
2735 vgen_hparams_t *lp = &ldcp->local_hparams;
2736 int rv;
2737
2738 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2739 rv = vgen_create_rx_dring(ldcp);
2740 } else {
2741 rv = vgen_create_tx_dring(ldcp);
2742 }
2743
2744 return (rv);
2745 }
2746
2747 /*
2748 * Destroy the descriptor ring.
2749 */
2750 static void
vgen_destroy_dring(vgen_ldc_t * ldcp)2751 vgen_destroy_dring(vgen_ldc_t *ldcp)
2752 {
2753 vgen_hparams_t *lp = &ldcp->local_hparams;
2754
2755 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2756 vgen_destroy_rx_dring(ldcp);
2757 } else {
2758 vgen_destroy_tx_dring(ldcp);
2759 }
2760 }
2761
2762 /*
2763 * Map the descriptor ring exported by the peer.
2764 */
2765 static int
vgen_map_dring(vgen_ldc_t * ldcp,void * pkt)2766 vgen_map_dring(vgen_ldc_t *ldcp, void *pkt)
2767 {
2768 int rv;
2769 vgen_hparams_t *lp = &ldcp->local_hparams;
2770
2771 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2772 /*
2773 * In RxDringData mode, dring that we map in
2774 * becomes our transmit descriptor ring.
2775 */
2776 rv = vgen_map_tx_dring(ldcp, pkt);
2777 } else {
2778
2779 /*
2780 * In TxDring mode, dring that we map in
2781 * becomes our receive descriptor ring.
2782 */
2783 rv = vgen_map_rx_dring(ldcp, pkt);
2784 }
2785
2786 return (rv);
2787 }
2788
2789 /*
2790 * Unmap the descriptor ring exported by the peer.
2791 */
2792 static void
vgen_unmap_dring(vgen_ldc_t * ldcp)2793 vgen_unmap_dring(vgen_ldc_t *ldcp)
2794 {
2795 vgen_hparams_t *lp = &ldcp->local_hparams;
2796
2797 if (lp->dring_mode == VIO_RX_DRING_DATA) {
2798 vgen_unmap_tx_dring(ldcp);
2799 } else {
2800 vgen_unmap_rx_dring(ldcp);
2801 }
2802 }
2803
2804 void
vgen_destroy_rxpools(void * arg)2805 vgen_destroy_rxpools(void *arg)
2806 {
2807 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg;
2808 vio_mblk_pool_t *npoolp;
2809
2810 while (poolp != NULL) {
2811 npoolp = poolp->nextp;
2812 while (vio_destroy_mblks(poolp) != 0) {
2813 delay(drv_usectohz(vgen_rxpool_cleanup_delay));
2814 }
2815 poolp = npoolp;
2816 }
2817 }
2818
2819 /* get channel statistics */
2820 static uint64_t
vgen_ldc_stat(vgen_ldc_t * ldcp,uint_t stat)2821 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat)
2822 {
2823 vgen_stats_t *statsp;
2824 uint64_t val;
2825
2826 val = 0;
2827 statsp = &ldcp->stats;
2828 switch (stat) {
2829
2830 case MAC_STAT_MULTIRCV:
2831 val = statsp->multircv;
2832 break;
2833
2834 case MAC_STAT_BRDCSTRCV:
2835 val = statsp->brdcstrcv;
2836 break;
2837
2838 case MAC_STAT_MULTIXMT:
2839 val = statsp->multixmt;
2840 break;
2841
2842 case MAC_STAT_BRDCSTXMT:
2843 val = statsp->brdcstxmt;
2844 break;
2845
2846 case MAC_STAT_NORCVBUF:
2847 val = statsp->norcvbuf;
2848 break;
2849
2850 case MAC_STAT_IERRORS:
2851 val = statsp->ierrors;
2852 break;
2853
2854 case MAC_STAT_NOXMTBUF:
2855 val = statsp->noxmtbuf;
2856 break;
2857
2858 case MAC_STAT_OERRORS:
2859 val = statsp->oerrors;
2860 break;
2861
2862 case MAC_STAT_COLLISIONS:
2863 break;
2864
2865 case MAC_STAT_RBYTES:
2866 val = statsp->rbytes;
2867 break;
2868
2869 case MAC_STAT_IPACKETS:
2870 val = statsp->ipackets;
2871 break;
2872
2873 case MAC_STAT_OBYTES:
2874 val = statsp->obytes;
2875 break;
2876
2877 case MAC_STAT_OPACKETS:
2878 val = statsp->opackets;
2879 break;
2880
2881 /* stats not relevant to ldc, return 0 */
2882 case MAC_STAT_IFSPEED:
2883 case ETHER_STAT_ALIGN_ERRORS:
2884 case ETHER_STAT_FCS_ERRORS:
2885 case ETHER_STAT_FIRST_COLLISIONS:
2886 case ETHER_STAT_MULTI_COLLISIONS:
2887 case ETHER_STAT_DEFER_XMTS:
2888 case ETHER_STAT_TX_LATE_COLLISIONS:
2889 case ETHER_STAT_EX_COLLISIONS:
2890 case ETHER_STAT_MACXMT_ERRORS:
2891 case ETHER_STAT_CARRIER_ERRORS:
2892 case ETHER_STAT_TOOLONG_ERRORS:
2893 case ETHER_STAT_XCVR_ADDR:
2894 case ETHER_STAT_XCVR_ID:
2895 case ETHER_STAT_XCVR_INUSE:
2896 case ETHER_STAT_CAP_1000FDX:
2897 case ETHER_STAT_CAP_1000HDX:
2898 case ETHER_STAT_CAP_100FDX:
2899 case ETHER_STAT_CAP_100HDX:
2900 case ETHER_STAT_CAP_10FDX:
2901 case ETHER_STAT_CAP_10HDX:
2902 case ETHER_STAT_CAP_ASMPAUSE:
2903 case ETHER_STAT_CAP_PAUSE:
2904 case ETHER_STAT_CAP_AUTONEG:
2905 case ETHER_STAT_ADV_CAP_1000FDX:
2906 case ETHER_STAT_ADV_CAP_1000HDX:
2907 case ETHER_STAT_ADV_CAP_100FDX:
2908 case ETHER_STAT_ADV_CAP_100HDX:
2909 case ETHER_STAT_ADV_CAP_10FDX:
2910 case ETHER_STAT_ADV_CAP_10HDX:
2911 case ETHER_STAT_ADV_CAP_ASMPAUSE:
2912 case ETHER_STAT_ADV_CAP_PAUSE:
2913 case ETHER_STAT_ADV_CAP_AUTONEG:
2914 case ETHER_STAT_LP_CAP_1000FDX:
2915 case ETHER_STAT_LP_CAP_1000HDX:
2916 case ETHER_STAT_LP_CAP_100FDX:
2917 case ETHER_STAT_LP_CAP_100HDX:
2918 case ETHER_STAT_LP_CAP_10FDX:
2919 case ETHER_STAT_LP_CAP_10HDX:
2920 case ETHER_STAT_LP_CAP_ASMPAUSE:
2921 case ETHER_STAT_LP_CAP_PAUSE:
2922 case ETHER_STAT_LP_CAP_AUTONEG:
2923 case ETHER_STAT_LINK_ASMPAUSE:
2924 case ETHER_STAT_LINK_PAUSE:
2925 case ETHER_STAT_LINK_AUTONEG:
2926 case ETHER_STAT_LINK_DUPLEX:
2927 default:
2928 val = 0;
2929 break;
2930
2931 }
2932 return (val);
2933 }
2934
2935 /*
2936 * LDC channel is UP, start handshake process with peer.
2937 */
2938 static void
vgen_handle_evt_up(vgen_ldc_t * ldcp)2939 vgen_handle_evt_up(vgen_ldc_t *ldcp)
2940 {
2941 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
2942
2943 DBG1(vgenp, ldcp, "enter\n");
2944
2945 ASSERT(MUTEX_HELD(&ldcp->cblock));
2946
2947 if (ldcp->portp != vgenp->vsw_portp) {
2948 /*
2949 * As the channel is up, use this port from now on.
2950 */
2951 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE);
2952 }
2953
2954 /* Initialize local session id */
2955 ldcp->local_sid = ddi_get_lbolt();
2956
2957 /* clear peer session id */
2958 ldcp->peer_sid = 0;
2959
2960 /* Initiate Handshake process with peer ldc endpoint */
2961 (void) vgen_handshake(vh_nextphase(ldcp));
2962
2963 DBG1(vgenp, ldcp, "exit\n");
2964 }
2965
2966 /*
2967 * LDC channel is Reset, terminate connection with peer and try to
2968 * bring the channel up again.
2969 */
2970 int
vgen_handle_evt_reset(vgen_ldc_t * ldcp,vgen_caller_t caller)2971 vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
2972 {
2973 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2974 ASSERT(MUTEX_HELD(&ldcp->cblock));
2975 }
2976
2977 /* Set the flag to indicate reset is in progress */
2978 if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
2979 /* another thread is already in the process of resetting */
2980 return (EBUSY);
2981 }
2982
2983 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2984 mutex_exit(&ldcp->cblock);
2985 }
2986
2987 (void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET);
2988
2989 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
2990 mutex_enter(&ldcp->cblock);
2991 }
2992
2993 return (0);
2994 }
2995
2996 /* Interrupt handler for the channel */
2997 static uint_t
vgen_ldc_cb(uint64_t event,caddr_t arg)2998 vgen_ldc_cb(uint64_t event, caddr_t arg)
2999 {
3000 _NOTE(ARGUNUSED(event))
3001 vgen_ldc_t *ldcp;
3002 vgen_t *vgenp;
3003 ldc_status_t istatus;
3004 vgen_stats_t *statsp;
3005 uint_t ret = LDC_SUCCESS;
3006
3007 ldcp = (vgen_ldc_t *)arg;
3008 vgenp = LDC_TO_VGEN(ldcp);
3009 statsp = &ldcp->stats;
3010
3011 DBG1(vgenp, ldcp, "enter\n");
3012
3013 mutex_enter(&ldcp->cblock);
3014 statsp->callbacks++;
3015 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
3016 DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n",
3017 ldcp->ldc_status);
3018 mutex_exit(&ldcp->cblock);
3019 return (LDC_SUCCESS);
3020 }
3021
3022 /*
3023 * NOTE: not using switch() as event could be triggered by
3024 * a state change and a read request. Also the ordering of the
3025 * check for the event types is deliberate.
3026 */
3027 if (event & LDC_EVT_UP) {
3028 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3029 DWARN(vgenp, ldcp, "ldc_status err\n");
3030 /* status couldn't be determined */
3031 ret = LDC_FAILURE;
3032 goto ldc_cb_ret;
3033 }
3034 ldcp->ldc_status = istatus;
3035 if (ldcp->ldc_status != LDC_UP) {
3036 DWARN(vgenp, ldcp, "LDC_EVT_UP received "
3037 " but ldc status is not UP(0x%x)\n",
3038 ldcp->ldc_status);
3039 /* spurious interrupt, return success */
3040 goto ldc_cb_ret;
3041 }
3042 DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n",
3043 event, ldcp->ldc_status);
3044
3045 vgen_handle_evt_up(ldcp);
3046
3047 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3048 }
3049
3050 /* Handle RESET/DOWN before READ event */
3051 if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) {
3052 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3053 DWARN(vgenp, ldcp, "ldc_status error\n");
3054 /* status couldn't be determined */
3055 ret = LDC_FAILURE;
3056 goto ldc_cb_ret;
3057 }
3058 ldcp->ldc_status = istatus;
3059 DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n",
3060 event, ldcp->ldc_status);
3061
3062 (void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB);
3063
3064 /*
3065 * As the channel is down/reset, ignore READ event
3066 * but print a debug warning message.
3067 */
3068 if (event & LDC_EVT_READ) {
3069 DWARN(vgenp, ldcp,
3070 "LDC_EVT_READ set along with RESET/DOWN\n");
3071 event &= ~LDC_EVT_READ;
3072 }
3073 }
3074
3075 if (event & LDC_EVT_READ) {
3076 DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n",
3077 event, ldcp->ldc_status);
3078
3079 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
3080
3081 if (ldcp->msg_thread != NULL) {
3082 /*
3083 * If the receive thread is enabled, then
3084 * wakeup the receive thread to process the
3085 * LDC messages.
3086 */
3087 mutex_exit(&ldcp->cblock);
3088 mutex_enter(&ldcp->msg_thr_lock);
3089 if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) {
3090 ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD;
3091 cv_signal(&ldcp->msg_thr_cv);
3092 }
3093 mutex_exit(&ldcp->msg_thr_lock);
3094 mutex_enter(&ldcp->cblock);
3095 } else {
3096 (void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB);
3097 }
3098 }
3099
3100 ldc_cb_ret:
3101 mutex_exit(&ldcp->cblock);
3102 DBG1(vgenp, ldcp, "exit\n");
3103 return (ret);
3104 }
3105
3106 int
vgen_handle_evt_read(vgen_ldc_t * ldcp,vgen_caller_t caller)3107 vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller)
3108 {
3109 int rv;
3110 uint64_t *ldcmsg;
3111 size_t msglen;
3112 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3113 vio_msg_tag_t *tagp;
3114 ldc_status_t istatus;
3115 boolean_t has_data;
3116
3117 DBG1(vgenp, ldcp, "enter\n");
3118
3119 if (caller == VGEN_LDC_CB) {
3120 ASSERT(MUTEX_HELD(&ldcp->cblock));
3121 } else if (caller == VGEN_MSG_THR) {
3122 mutex_enter(&ldcp->cblock);
3123 } else {
3124 return (EINVAL);
3125 }
3126
3127 ldcmsg = ldcp->ldcmsg;
3128
3129 vgen_evtread:
3130 do {
3131 msglen = ldcp->msglen;
3132 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen);
3133
3134 if (rv != 0) {
3135 DWARN(vgenp, ldcp, "ldc_read() failed "
3136 "rv(%d) len(%d)\n", rv, msglen);
3137 if (rv == ECONNRESET)
3138 goto vgen_evtread_error;
3139 break;
3140 }
3141 if (msglen == 0) {
3142 DBG2(vgenp, ldcp, "ldc_read NODATA");
3143 break;
3144 }
3145 DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen);
3146
3147 tagp = (vio_msg_tag_t *)ldcmsg;
3148
3149 if (ldcp->peer_sid) {
3150 /*
3151 * check sid only after we have received peer's sid
3152 * in the version negotiate msg.
3153 */
3154 #ifdef DEBUG
3155 if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) {
3156 /* simulate bad sid condition */
3157 tagp->vio_sid = 0;
3158 vgen_inject_err_flag &= ~(VGEN_ERR_HSID);
3159 }
3160 #endif
3161 rv = vgen_check_sid(ldcp, tagp);
3162 if (rv != VGEN_SUCCESS) {
3163 /*
3164 * If sid mismatch is detected,
3165 * reset the channel.
3166 */
3167 DWARN(vgenp, ldcp, "vgen_check_sid() failed\n");
3168 goto vgen_evtread_error;
3169 }
3170 }
3171
3172 switch (tagp->vio_msgtype) {
3173 case VIO_TYPE_CTRL:
3174 rv = vgen_handle_ctrlmsg(ldcp, tagp);
3175 if (rv != 0) {
3176 DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()"
3177 " failed rv(%d)\n", rv);
3178 }
3179 break;
3180
3181 case VIO_TYPE_DATA:
3182 rv = vgen_handle_datamsg(ldcp, tagp, msglen);
3183 if (rv != 0) {
3184 DWARN(vgenp, ldcp, "vgen_handle_datamsg()"
3185 " failed rv(%d)\n", rv);
3186 }
3187 break;
3188
3189 case VIO_TYPE_ERR:
3190 vgen_handle_errmsg(ldcp, tagp);
3191 break;
3192
3193 default:
3194 DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n",
3195 tagp->vio_msgtype);
3196 break;
3197 }
3198
3199 /*
3200 * If an error is encountered, stop processing and
3201 * handle the error.
3202 */
3203 if (rv != 0) {
3204 goto vgen_evtread_error;
3205 }
3206
3207 } while (msglen);
3208
3209 /* check once more before exiting */
3210 rv = ldc_chkq(ldcp->ldc_handle, &has_data);
3211 if ((rv == 0) && (has_data == B_TRUE)) {
3212 DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp);
3213 goto vgen_evtread;
3214 }
3215
3216 vgen_evtread_error:
3217 if (rv != 0) {
3218 /*
3219 * We handle the error and then return the error value. If we
3220 * are running in the context of the msg worker, the error
3221 * tells the worker thread to exit, as the channel would have
3222 * been reset.
3223 */
3224 if (rv == ECONNRESET) {
3225 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
3226 DWARN(vgenp, ldcp, "ldc_status err\n");
3227 } else {
3228 ldcp->ldc_status = istatus;
3229 }
3230 (void) vgen_handle_evt_reset(ldcp, caller);
3231 } else {
3232 DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n");
3233 (void) vgen_ldc_reset(ldcp, caller);
3234 }
3235 }
3236
3237 if (caller == VGEN_MSG_THR) {
3238 mutex_exit(&ldcp->cblock);
3239 }
3240
3241 DBG1(vgenp, ldcp, "exit\n");
3242 return (rv);
3243 }
3244
3245 /* vgen handshake functions */
3246
3247 /* change the hphase for the channel to the next phase */
3248 static vgen_ldc_t *
vh_nextphase(vgen_ldc_t * ldcp)3249 vh_nextphase(vgen_ldc_t *ldcp)
3250 {
3251 if (ldcp->hphase == VH_PHASE4) {
3252 ldcp->hphase = VH_DONE;
3253 } else {
3254 ldcp->hphase++;
3255 }
3256 return (ldcp);
3257 }
3258
3259 /* send version negotiate message to the peer over ldc */
3260 static int
vgen_send_version_negotiate(vgen_ldc_t * ldcp)3261 vgen_send_version_negotiate(vgen_ldc_t *ldcp)
3262 {
3263 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3264 vio_ver_msg_t vermsg;
3265 vio_msg_tag_t *tagp = &vermsg.tag;
3266 int rv;
3267
3268 bzero(&vermsg, sizeof (vermsg));
3269
3270 tagp->vio_msgtype = VIO_TYPE_CTRL;
3271 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3272 tagp->vio_subtype_env = VIO_VER_INFO;
3273 tagp->vio_sid = ldcp->local_sid;
3274
3275 /* get version msg payload from ldcp->local */
3276 vermsg.ver_major = ldcp->local_hparams.ver_major;
3277 vermsg.ver_minor = ldcp->local_hparams.ver_minor;
3278 vermsg.dev_class = ldcp->local_hparams.dev_class;
3279
3280 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE);
3281 if (rv != VGEN_SUCCESS) {
3282 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3283 return (rv);
3284 }
3285
3286 ldcp->hstate |= VER_INFO_SENT;
3287 DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n",
3288 vermsg.ver_major, vermsg.ver_minor);
3289
3290 return (VGEN_SUCCESS);
3291 }
3292
3293 /* send attr info message to the peer over ldc */
3294 static int
vgen_send_attr_info(vgen_ldc_t * ldcp)3295 vgen_send_attr_info(vgen_ldc_t *ldcp)
3296 {
3297 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3298 vnet_attr_msg_t attrmsg;
3299 vio_msg_tag_t *tagp = &attrmsg.tag;
3300 int rv;
3301
3302 bzero(&attrmsg, sizeof (attrmsg));
3303
3304 tagp->vio_msgtype = VIO_TYPE_CTRL;
3305 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3306 tagp->vio_subtype_env = VIO_ATTR_INFO;
3307 tagp->vio_sid = ldcp->local_sid;
3308
3309 /* get attr msg payload from ldcp->local */
3310 attrmsg.mtu = ldcp->local_hparams.mtu;
3311 attrmsg.addr = ldcp->local_hparams.addr;
3312 attrmsg.addr_type = ldcp->local_hparams.addr_type;
3313 attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode;
3314 attrmsg.ack_freq = ldcp->local_hparams.ack_freq;
3315 attrmsg.physlink_update = ldcp->local_hparams.physlink_update;
3316 attrmsg.options = ldcp->local_hparams.dring_mode;
3317
3318 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE);
3319 if (rv != VGEN_SUCCESS) {
3320 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3321 return (rv);
3322 }
3323
3324 ldcp->hstate |= ATTR_INFO_SENT;
3325 DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n");
3326
3327 return (VGEN_SUCCESS);
3328 }
3329
3330 /*
3331 * Send descriptor ring register message to the peer over ldc.
3332 * Invoked in RxDringData mode.
3333 */
3334 static int
vgen_send_rx_dring_reg(vgen_ldc_t * ldcp)3335 vgen_send_rx_dring_reg(vgen_ldc_t *ldcp)
3336 {
3337 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3338 vio_dring_reg_msg_t *msg;
3339 vio_dring_reg_ext_msg_t *emsg;
3340 int rv;
3341 uint8_t *buf;
3342 uint_t msgsize;
3343
3344 msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies);
3345 msg = kmem_zalloc(msgsize, KM_SLEEP);
3346
3347 /* Initialize the common part of dring reg msg */
3348 vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA);
3349
3350 /* skip over dring cookies at the tail of common section */
3351 buf = (uint8_t *)msg->cookie;
3352 ASSERT(msg->ncookies == 1);
3353 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t));
3354
3355 /* Now setup the extended part, specific to RxDringData mode */
3356 emsg = (vio_dring_reg_ext_msg_t *)buf;
3357
3358 /* copy data_ncookies in the msg */
3359 emsg->data_ncookies = ldcp->rx_data_ncookies;
3360
3361 /* copy data area size in the msg */
3362 emsg->data_area_size = ldcp->rx_data_sz;
3363
3364 /* copy data area cookies in the msg */
3365 bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie,
3366 sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies);
3367
3368 rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE);
3369 if (rv != VGEN_SUCCESS) {
3370 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3371 kmem_free(msg, msgsize);
3372 return (rv);
3373 }
3374
3375 ldcp->hstate |= DRING_INFO_SENT;
3376 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3377
3378 kmem_free(msg, msgsize);
3379 return (VGEN_SUCCESS);
3380 }
3381
3382 /*
3383 * Send descriptor ring register message to the peer over ldc.
3384 * Invoked in TxDring mode.
3385 */
3386 static int
vgen_send_tx_dring_reg(vgen_ldc_t * ldcp)3387 vgen_send_tx_dring_reg(vgen_ldc_t *ldcp)
3388 {
3389 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3390 vio_dring_reg_msg_t msg;
3391 int rv;
3392
3393 bzero(&msg, sizeof (msg));
3394
3395 /*
3396 * Initialize only the common part of dring reg msg in TxDring mode.
3397 */
3398 vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING);
3399
3400 rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE);
3401 if (rv != VGEN_SUCCESS) {
3402 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3403 return (rv);
3404 }
3405
3406 ldcp->hstate |= DRING_INFO_SENT;
3407 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n");
3408
3409 return (VGEN_SUCCESS);
3410 }
3411
3412 static int
vgen_send_rdx_info(vgen_ldc_t * ldcp)3413 vgen_send_rdx_info(vgen_ldc_t *ldcp)
3414 {
3415 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3416 vio_rdx_msg_t rdxmsg;
3417 vio_msg_tag_t *tagp = &rdxmsg.tag;
3418 int rv;
3419
3420 bzero(&rdxmsg, sizeof (rdxmsg));
3421
3422 tagp->vio_msgtype = VIO_TYPE_CTRL;
3423 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3424 tagp->vio_subtype_env = VIO_RDX;
3425 tagp->vio_sid = ldcp->local_sid;
3426
3427 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE);
3428 if (rv != VGEN_SUCCESS) {
3429 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n");
3430 return (rv);
3431 }
3432
3433 ldcp->hstate |= RDX_INFO_SENT;
3434 DBG2(vgenp, ldcp, "RDX_INFO_SENT\n");
3435
3436 return (VGEN_SUCCESS);
3437 }
3438
3439 /* send multicast addr info message to vsw */
3440 static int
vgen_send_mcast_info(vgen_ldc_t * ldcp)3441 vgen_send_mcast_info(vgen_ldc_t *ldcp)
3442 {
3443 vnet_mcast_msg_t mcastmsg;
3444 vnet_mcast_msg_t *msgp;
3445 vio_msg_tag_t *tagp;
3446 vgen_t *vgenp;
3447 struct ether_addr *mca;
3448 int rv;
3449 int i;
3450 uint32_t size;
3451 uint32_t mccount;
3452 uint32_t n;
3453
3454 msgp = &mcastmsg;
3455 tagp = &msgp->tag;
3456 vgenp = LDC_TO_VGEN(ldcp);
3457
3458 mccount = vgenp->mccount;
3459 i = 0;
3460
3461 do {
3462 tagp->vio_msgtype = VIO_TYPE_CTRL;
3463 tagp->vio_subtype = VIO_SUBTYPE_INFO;
3464 tagp->vio_subtype_env = VNET_MCAST_INFO;
3465 tagp->vio_sid = ldcp->local_sid;
3466
3467 n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount);
3468 size = n * sizeof (struct ether_addr);
3469
3470 mca = &(vgenp->mctab[i]);
3471 bcopy(mca, (msgp->mca), size);
3472 msgp->set = B_TRUE;
3473 msgp->count = n;
3474
3475 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp),
3476 B_FALSE);
3477 if (rv != VGEN_SUCCESS) {
3478 DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv);
3479 return (rv);
3480 }
3481
3482 mccount -= n;
3483 i += n;
3484
3485 } while (mccount);
3486
3487 return (VGEN_SUCCESS);
3488 }
3489
3490 /*
3491 * vgen_dds_rx -- post DDS messages to vnet.
3492 */
3493 static int
vgen_dds_rx(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)3494 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
3495 {
3496 vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp;
3497 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3498
3499 if (dmsg->dds_class != DDS_VNET_NIU) {
3500 DWARN(vgenp, ldcp, "Unknown DDS class, dropping");
3501 return (EBADMSG);
3502 }
3503 vnet_dds_rx(vgenp->vnetp, dmsg);
3504 return (0);
3505 }
3506
3507 /*
3508 * vgen_dds_tx -- an interface called by vnet to send DDS messages.
3509 */
3510 int
vgen_dds_tx(void * arg,void * msg)3511 vgen_dds_tx(void *arg, void *msg)
3512 {
3513 vgen_t *vgenp = arg;
3514 vio_dds_msg_t *dmsg = msg;
3515 vgen_portlist_t *plistp = &vgenp->vgenports;
3516 vgen_ldc_t *ldcp;
3517 int rv = EIO;
3518
3519 READ_ENTER(&plistp->rwlock);
3520 ldcp = vgenp->vsw_portp->ldcp;
3521 if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) {
3522 goto vgen_dsend_exit;
3523 }
3524
3525 dmsg->tag.vio_sid = ldcp->local_sid;
3526 rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE);
3527 if (rv != VGEN_SUCCESS) {
3528 rv = EIO;
3529 } else {
3530 rv = 0;
3531 }
3532
3533 vgen_dsend_exit:
3534 RW_EXIT(&plistp->rwlock);
3535 return (rv);
3536
3537 }
3538
3539 /* Initiate Phase 2 of handshake */
3540 static int
vgen_handshake_phase2(vgen_ldc_t * ldcp)3541 vgen_handshake_phase2(vgen_ldc_t *ldcp)
3542 {
3543 int rv;
3544
3545 #ifdef DEBUG
3546 if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) {
3547 /* simulate out of state condition */
3548 vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE);
3549 rv = vgen_send_rdx_info(ldcp);
3550 return (rv);
3551 }
3552 if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) {
3553 /* simulate timeout condition */
3554 vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT);
3555 return (VGEN_SUCCESS);
3556 }
3557 #endif
3558 rv = vgen_send_attr_info(ldcp);
3559 if (rv != VGEN_SUCCESS) {
3560 return (rv);
3561 }
3562
3563 return (VGEN_SUCCESS);
3564 }
3565
3566 static int
vgen_handshake_phase3(vgen_ldc_t * ldcp)3567 vgen_handshake_phase3(vgen_ldc_t *ldcp)
3568 {
3569 int rv;
3570 vgen_hparams_t *lp = &ldcp->local_hparams;
3571 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3572 vgen_stats_t *statsp = &ldcp->stats;
3573
3574 /* dring mode has been negotiated in attr phase; save in stats */
3575 statsp->dring_mode = lp->dring_mode;
3576
3577 if (lp->dring_mode == VIO_RX_DRING_DATA) { /* RxDringData mode */
3578 ldcp->rx_dringdata = vgen_handle_dringdata_shm;
3579 ldcp->tx_dringdata = vgen_dringsend_shm;
3580 if (!VGEN_PRI_ETH_DEFINED(vgenp)) {
3581 /*
3582 * If priority frames are not in use, we don't need a
3583 * separate wrapper function for 'tx', so we set it to
3584 * 'tx_dringdata'. If priority frames are configured,
3585 * we leave the 'tx' pointer as is (initialized in
3586 * vgen_set_vnet_proto_ops()).
3587 */
3588 ldcp->tx = ldcp->tx_dringdata;
3589 }
3590 } else { /* TxDring mode */
3591 ldcp->msg_thread = thread_create(NULL,
3592 2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0,
3593 &p0, TS_RUN, maxclsyspri);
3594 }
3595
3596 rv = vgen_create_dring(ldcp);
3597 if (rv != VGEN_SUCCESS) {
3598 return (rv);
3599 }
3600
3601 /* update local dring_info params */
3602 if (lp->dring_mode == VIO_RX_DRING_DATA) {
3603 bcopy(&(ldcp->rx_dring_cookie),
3604 &(ldcp->local_hparams.dring_cookie),
3605 sizeof (ldc_mem_cookie_t));
3606 ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies;
3607 ldcp->local_hparams.num_desc = ldcp->num_rxds;
3608 ldcp->local_hparams.desc_size =
3609 sizeof (vnet_rx_dringdata_desc_t);
3610 rv = vgen_send_rx_dring_reg(ldcp);
3611 } else {
3612 bcopy(&(ldcp->tx_dring_cookie),
3613 &(ldcp->local_hparams.dring_cookie),
3614 sizeof (ldc_mem_cookie_t));
3615 ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies;
3616 ldcp->local_hparams.num_desc = ldcp->num_txds;
3617 ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t);
3618 rv = vgen_send_tx_dring_reg(ldcp);
3619 }
3620
3621 if (rv != VGEN_SUCCESS) {
3622 return (rv);
3623 }
3624
3625 return (VGEN_SUCCESS);
3626 }
3627
3628 /*
3629 * Set vnet-protocol-version dependent functions based on version.
3630 */
3631 static void
vgen_set_vnet_proto_ops(vgen_ldc_t * ldcp)3632 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp)
3633 {
3634 vgen_hparams_t *lp = &ldcp->local_hparams;
3635 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3636
3637 /*
3638 * Setup the appropriate dring data processing routine and any
3639 * associated thread based on the version.
3640 *
3641 * In versions < 1.6, we only support TxDring mode. In this mode, the
3642 * msg worker thread processes all types of VIO msgs (ctrl and data).
3643 *
3644 * In versions >= 1.6, we also support RxDringData mode. In this mode,
3645 * all msgs including dring data messages are handled directly by the
3646 * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA,
3647 * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be
3648 * disabled while the polling thread is active, in which case the
3649 * polling thread processes the rcv descriptor ring.
3650 *
3651 * However, for versions >= 1.6, we can force to only use TxDring mode.
3652 * This could happen if RxDringData mode has been disabled (see
3653 * below) on this guest or on the peer guest. This info is determined
3654 * as part of attr exchange phase of handshake. Hence, we setup these
3655 * pointers for v1.6 after attr msg phase completes during handshake.
3656 */
3657 if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* Ver >= 1.6 */
3658 /*
3659 * Set data dring mode for vgen_send_attr_info().
3660 */
3661 if (vgen_mapin_avail(ldcp) == B_TRUE) {
3662 lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING);
3663 } else {
3664 lp->dring_mode = VIO_TX_DRING;
3665 }
3666 } else { /* Ver <= 1.5 */
3667 lp->dring_mode = VIO_TX_DRING;
3668 }
3669
3670 if (VGEN_VER_GTEQ(ldcp, 1, 5)) {
3671 vgen_port_t *portp = ldcp->portp;
3672 vnet_t *vnetp = vgenp->vnetp;
3673 /*
3674 * If the version negotiated with vswitch is >= 1.5 (link
3675 * status update support), set the required bits in our
3676 * attributes if this vnet device has been configured to get
3677 * physical link state updates.
3678 */
3679 if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) {
3680 lp->physlink_update = PHYSLINK_UPDATE_STATE;
3681 } else {
3682 lp->physlink_update = PHYSLINK_UPDATE_NONE;
3683 }
3684 }
3685
3686 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
3687 /*
3688 * If the version negotiated with peer is >= 1.4(Jumbo Frame
3689 * Support), set the mtu in our attributes to max_frame_size.
3690 */
3691 lp->mtu = vgenp->max_frame_size;
3692 } else if (VGEN_VER_EQ(ldcp, 1, 3)) {
3693 /*
3694 * If the version negotiated with peer is == 1.3 (Vlan Tag
3695 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
3696 */
3697 lp->mtu = ETHERMAX + VLAN_TAGSZ;
3698 } else {
3699 vgen_port_t *portp = ldcp->portp;
3700 vnet_t *vnetp = vgenp->vnetp;
3701 /*
3702 * Pre-1.3 peers expect max frame size of ETHERMAX.
3703 * We can negotiate that size with those peers provided the
3704 * following conditions are true:
3705 * - Only pvid is defined for our peer and there are no vids.
3706 * - pvids are equal.
3707 * If the above conditions are true, then we can send/recv only
3708 * untagged frames of max size ETHERMAX.
3709 */
3710 if (portp->nvids == 0 && portp->pvid == vnetp->pvid) {
3711 lp->mtu = ETHERMAX;
3712 }
3713 }
3714
3715 if (VGEN_VER_GTEQ(ldcp, 1, 2)) { /* Versions >= 1.2 */
3716 /*
3717 * Starting v1.2 we support priority frames; so set the
3718 * dring processing routines and xfer modes based on the
3719 * version. Note that the dring routines could be changed after
3720 * attribute handshake phase for versions >= 1.6 (See
3721 * vgen_handshake_phase3())
3722 */
3723 ldcp->tx_dringdata = vgen_dringsend;
3724 ldcp->rx_dringdata = vgen_handle_dringdata;
3725
3726 if (VGEN_PRI_ETH_DEFINED(vgenp)) {
3727 /*
3728 * Enable priority routines and pkt mode only if
3729 * at least one pri-eth-type is specified in MD.
3730 */
3731 ldcp->tx = vgen_ldcsend;
3732 ldcp->rx_pktdata = vgen_handle_pkt_data;
3733
3734 /* set xfer mode for vgen_send_attr_info() */
3735 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2;
3736 } else {
3737 /* No priority eth types defined in MD */
3738 ldcp->tx = ldcp->tx_dringdata;
3739 ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3740
3741 /* Set xfer mode for vgen_send_attr_info() */
3742 lp->xfer_mode = VIO_DRING_MODE_V1_2;
3743 }
3744 } else { /* Versions prior to 1.2 */
3745 vgen_reset_vnet_proto_ops(ldcp);
3746 }
3747 }
3748
3749 /*
3750 * Reset vnet-protocol-version dependent functions to pre-v1.2.
3751 */
3752 static void
vgen_reset_vnet_proto_ops(vgen_ldc_t * ldcp)3753 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp)
3754 {
3755 vgen_hparams_t *lp = &ldcp->local_hparams;
3756
3757 ldcp->tx = ldcp->tx_dringdata = vgen_dringsend;
3758 ldcp->rx_dringdata = vgen_handle_dringdata;
3759 ldcp->rx_pktdata = vgen_handle_pkt_data_nop;
3760
3761 /* set xfer mode for vgen_send_attr_info() */
3762 lp->xfer_mode = VIO_DRING_MODE_V1_0;
3763 }
3764
3765 static void
vgen_vlan_unaware_port_reset(vgen_port_t * portp)3766 vgen_vlan_unaware_port_reset(vgen_port_t *portp)
3767 {
3768 vgen_ldc_t *ldcp = portp->ldcp;
3769 vgen_t *vgenp = portp->vgenp;
3770 vnet_t *vnetp = vgenp->vnetp;
3771 boolean_t need_reset = B_FALSE;
3772
3773 mutex_enter(&ldcp->cblock);
3774
3775 /*
3776 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
3777 * the connection. See comments in vgen_set_vnet_proto_ops().
3778 */
3779 if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) &&
3780 (portp->nvids != 0 || portp->pvid != vnetp->pvid)) {
3781 need_reset = B_TRUE;
3782 }
3783 mutex_exit(&ldcp->cblock);
3784
3785 if (need_reset == B_TRUE) {
3786 (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
3787 }
3788 }
3789
3790 static void
vgen_port_reset(vgen_port_t * portp)3791 vgen_port_reset(vgen_port_t *portp)
3792 {
3793 (void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER);
3794 }
3795
3796 static void
vgen_reset_vlan_unaware_ports(vgen_t * vgenp)3797 vgen_reset_vlan_unaware_ports(vgen_t *vgenp)
3798 {
3799 vgen_port_t *portp;
3800 vgen_portlist_t *plistp;
3801
3802 plistp = &(vgenp->vgenports);
3803 READ_ENTER(&plistp->rwlock);
3804
3805 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) {
3806
3807 vgen_vlan_unaware_port_reset(portp);
3808
3809 }
3810
3811 RW_EXIT(&plistp->rwlock);
3812 }
3813
3814 static void
vgen_reset_vsw_port(vgen_t * vgenp)3815 vgen_reset_vsw_port(vgen_t *vgenp)
3816 {
3817 vgen_port_t *portp;
3818
3819 if ((portp = vgenp->vsw_portp) != NULL) {
3820 vgen_port_reset(portp);
3821 }
3822 }
3823
3824 static void
vgen_setup_handshake_params(vgen_ldc_t * ldcp)3825 vgen_setup_handshake_params(vgen_ldc_t *ldcp)
3826 {
3827 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3828
3829 /*
3830 * clear local handshake params and initialize.
3831 */
3832 bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams));
3833
3834 /* set version to the highest version supported */
3835 ldcp->local_hparams.ver_major =
3836 ldcp->vgen_versions[0].ver_major;
3837 ldcp->local_hparams.ver_minor =
3838 ldcp->vgen_versions[0].ver_minor;
3839 ldcp->local_hparams.dev_class = VDEV_NETWORK;
3840
3841 /* set attr_info params */
3842 ldcp->local_hparams.mtu = vgenp->max_frame_size;
3843 ldcp->local_hparams.addr =
3844 vnet_macaddr_strtoul(vgenp->macaddr);
3845 ldcp->local_hparams.addr_type = ADDR_TYPE_MAC;
3846 ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0;
3847 ldcp->local_hparams.ack_freq = 0; /* don't need acks */
3848 ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE;
3849
3850 /* reset protocol version specific function pointers */
3851 vgen_reset_vnet_proto_ops(ldcp);
3852 ldcp->local_hparams.dring_ident = 0;
3853 ldcp->local_hparams.dring_ready = B_FALSE;
3854
3855 /* clear peer_hparams */
3856 bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams));
3857 ldcp->peer_hparams.dring_ready = B_FALSE;
3858 }
3859
3860 /*
3861 * Process Channel Reset. We tear down the resources (timers, threads,
3862 * descriptor rings etc) associated with the channel and reinitialize the
3863 * channel based on the flags.
3864 *
3865 * Arguments:
3866 * ldcp: The channel being processed.
3867 *
3868 * flags:
3869 * VGEN_FLAG_EVT_RESET:
3870 * A ECONNRESET error occured while doing ldc operations such as
3871 * ldc_read() or ldc_write(); the channel is already reset and it
3872 * needs to be handled.
3873 * VGEN_FLAG_NEED_LDCRESET:
3874 * Some other errors occured and the error handling code needs to
3875 * explicitly reset the channel and restart handshake with the
3876 * peer. The error could be either in ldc operations or other
3877 * parts of the code such as timeouts or mdeg events etc.
3878 * VGEN_FLAG_UNINIT:
3879 * The channel is being torn down; no need to bring up the channel
3880 * after resetting.
3881 */
3882 static int
vgen_process_reset(vgen_ldc_t * ldcp,int flags)3883 vgen_process_reset(vgen_ldc_t *ldcp, int flags)
3884 {
3885 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
3886 vgen_port_t *portp = ldcp->portp;
3887 vgen_hparams_t *lp = &ldcp->local_hparams;
3888 boolean_t is_vsw_port = B_FALSE;
3889 boolean_t link_update = B_FALSE;
3890 ldc_status_t istatus;
3891 int rv;
3892 uint_t retries = 0;
3893 timeout_id_t htid = 0;
3894 timeout_id_t wd_tid = 0;
3895
3896 if (portp == vgenp->vsw_portp) { /* vswitch port ? */
3897 is_vsw_port = B_TRUE;
3898 }
3899
3900 /*
3901 * Report that the channel is being reset; it ensures that any HybridIO
3902 * configuration is torn down before we reset the channel if it is not
3903 * already reset (flags == VGEN_FLAG_NEED_LDCRESET).
3904 */
3905 if (is_vsw_port == B_TRUE) {
3906 vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err;
3907 rep_err(portp->vhp, VIO_NET_RES_DOWN);
3908 }
3909
3910 again:
3911 mutex_enter(&ldcp->cblock);
3912
3913 /* Clear hstate and hphase */
3914 ldcp->hstate = 0;
3915 ldcp->hphase = VH_PHASE0;
3916 if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) {
3917 DWARN(vgenp, ldcp, "Doing Channel Reset...\n");
3918 (void) ldc_down(ldcp->ldc_handle);
3919 (void) ldc_status(ldcp->ldc_handle, &istatus);
3920 DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus);
3921 ldcp->ldc_status = istatus;
3922
3923 if (flags == VGEN_FLAG_UNINIT) {
3924 /* disable further callbacks */
3925 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
3926 if (rv != 0) {
3927 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n");
3928 }
3929 }
3930
3931 } else {
3932 /* flags == VGEN_FLAG_EVT_RESET */
3933 DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status);
3934 }
3935
3936 /*
3937 * As the connection is now reset, mark the channel
3938 * link_state as 'down' and notify the stack if needed.
3939 */
3940 if (ldcp->link_state != LINK_STATE_DOWN) {
3941 ldcp->link_state = LINK_STATE_DOWN;
3942
3943 if (is_vsw_port == B_TRUE) { /* vswitch port ? */
3944 /*
3945 * As the channel link is down, mark physical link also
3946 * as down. After the channel comes back up and
3947 * handshake completes, we will get an update on the
3948 * physlink state from vswitch (if this device has been
3949 * configured to get phys link updates).
3950 */
3951 vgenp->phys_link_state = LINK_STATE_DOWN;
3952 link_update = B_TRUE;
3953
3954 }
3955 }
3956
3957 if (ldcp->htid != 0) {
3958 htid = ldcp->htid;
3959 ldcp->htid = 0;
3960 }
3961
3962 if (ldcp->wd_tid != 0) {
3963 wd_tid = ldcp->wd_tid;
3964 ldcp->wd_tid = 0;
3965 }
3966
3967 mutex_exit(&ldcp->cblock);
3968
3969 /* Update link state to the stack */
3970 if (link_update == B_TRUE) {
3971 vgen_link_update(vgenp, ldcp->link_state);
3972 }
3973
3974 /*
3975 * As the channel is being reset, redirect traffic to the peer through
3976 * vswitch, until the channel becomes ready to be used again.
3977 */
3978 if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) {
3979 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE);
3980 }
3981
3982 /* Cancel handshake watchdog timeout */
3983 if (htid) {
3984 (void) untimeout(htid);
3985 }
3986
3987 /* Cancel transmit watchdog timeout */
3988 if (wd_tid) {
3989 (void) untimeout(wd_tid);
3990 }
3991
3992 /* Stop the msg worker thread */
3993 if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) {
3994 vgen_stop_msg_thread(ldcp);
3995 }
3996
3997 /* Grab all locks while we tear down tx/rx resources */
3998 LDC_LOCK(ldcp);
3999
4000 /* Destroy the local dring which is exported to the peer */
4001 vgen_destroy_dring(ldcp);
4002
4003 /* Unmap the remote dring which is imported from the peer */
4004 vgen_unmap_dring(ldcp);
4005
4006 /*
4007 * Bring up the channel and restart handshake
4008 * only if the channel is not being torn down.
4009 */
4010 if (flags != VGEN_FLAG_UNINIT) {
4011
4012 /* Setup handshake parameters to restart a new handshake */
4013 vgen_setup_handshake_params(ldcp);
4014
4015 /* Bring the channel up */
4016 vgen_ldc_up(ldcp);
4017
4018 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
4019 DWARN(vgenp, ldcp, "ldc_status err\n");
4020 } else {
4021 ldcp->ldc_status = istatus;
4022 }
4023
4024 /* If the channel is UP, start handshake */
4025 if (ldcp->ldc_status == LDC_UP) {
4026
4027 if (is_vsw_port == B_FALSE) {
4028 /*
4029 * Channel is up; use this port from now on.
4030 */
4031 (void) atomic_swap_32(&portp->use_vsw_port,
4032 B_FALSE);
4033 }
4034
4035 /* Initialize local session id */
4036 ldcp->local_sid = ddi_get_lbolt();
4037
4038 /* clear peer session id */
4039 ldcp->peer_sid = 0;
4040
4041 /*
4042 * Initiate Handshake process with peer ldc endpoint by
4043 * sending version info vio message. If that fails we
4044 * go back to the top of this function to process the
4045 * error again. Note that we can be in this loop for
4046 * 'vgen_ldc_max_resets' times, after which the channel
4047 * is not brought up.
4048 */
4049 mutex_exit(&ldcp->tclock);
4050 mutex_exit(&ldcp->txlock);
4051 mutex_exit(&ldcp->wrlock);
4052 mutex_exit(&ldcp->rxlock);
4053 rv = vgen_handshake(vh_nextphase(ldcp));
4054 mutex_exit(&ldcp->cblock);
4055 if (rv != 0) {
4056 if (rv == ECONNRESET) {
4057 flags = VGEN_FLAG_EVT_RESET;
4058 } else {
4059 flags = VGEN_FLAG_NEED_LDCRESET;
4060 }
4061
4062 /*
4063 * We still hold 'reset_in_progress'; so we can
4064 * just loop back to the top to restart error
4065 * processing.
4066 */
4067 goto again;
4068 }
4069 } else {
4070 LDC_UNLOCK(ldcp);
4071 }
4072
4073 } else { /* flags == VGEN_FLAG_UNINIT */
4074
4075 /* Close the channel - retry on EAGAIN */
4076 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) {
4077 if (++retries > vgen_ldccl_retries) {
4078 break;
4079 }
4080 drv_usecwait(VGEN_LDC_CLOSE_DELAY);
4081 }
4082 if (rv != 0) {
4083 cmn_err(CE_NOTE,
4084 "!vnet%d: Error(%d) closing the channel(0x%lx)\n",
4085 vgenp->instance, rv, ldcp->ldc_id);
4086 }
4087
4088 ldcp->ldc_reset_count = 0;
4089 ldcp->ldc_status = LDC_INIT;
4090 ldcp->flags &= ~(CHANNEL_STARTED);
4091
4092 LDC_UNLOCK(ldcp);
4093 }
4094
4095 /* Done processing channel reset; clear the atomic flag */
4096 ldcp->reset_in_progress = 0;
4097 return (0);
4098 }
4099
4100 /*
4101 * Initiate handshake with the peer by sending various messages
4102 * based on the handshake-phase that the channel is currently in.
4103 */
4104 static int
vgen_handshake(vgen_ldc_t * ldcp)4105 vgen_handshake(vgen_ldc_t *ldcp)
4106 {
4107 uint32_t hphase = ldcp->hphase;
4108 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4109 int rv = 0;
4110 timeout_id_t htid;
4111
4112 switch (hphase) {
4113
4114 case VH_PHASE1:
4115
4116 /*
4117 * start timer, for entire handshake process, turn this timer
4118 * off if all phases of handshake complete successfully and
4119 * hphase goes to VH_DONE(below) or channel is reset due to
4120 * errors or vgen_ldc_uninit() is invoked(vgen_stop).
4121 */
4122 ASSERT(ldcp->htid == 0);
4123 ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp,
4124 drv_usectohz(vgen_hwd_interval * MICROSEC));
4125
4126 /* Phase 1 involves negotiating the version */
4127 rv = vgen_send_version_negotiate(ldcp);
4128 break;
4129
4130 case VH_PHASE2:
4131 rv = vgen_handshake_phase2(ldcp);
4132 break;
4133
4134 case VH_PHASE3:
4135 rv = vgen_handshake_phase3(ldcp);
4136 break;
4137
4138 case VH_PHASE4:
4139 rv = vgen_send_rdx_info(ldcp);
4140 break;
4141
4142 case VH_DONE:
4143
4144 ldcp->ldc_reset_count = 0;
4145
4146 DBG1(vgenp, ldcp, "Handshake Done\n");
4147
4148 /*
4149 * The channel is up and handshake is done successfully. Now we
4150 * can mark the channel link_state as 'up'. We also notify the
4151 * stack if the channel is connected to vswitch.
4152 */
4153 ldcp->link_state = LINK_STATE_UP;
4154
4155 if (ldcp->portp == vgenp->vsw_portp) {
4156 /*
4157 * If this channel(port) is connected to vsw,
4158 * need to sync multicast table with vsw.
4159 */
4160 rv = vgen_send_mcast_info(ldcp);
4161 if (rv != VGEN_SUCCESS)
4162 break;
4163
4164 if (vgenp->pls_negotiated == B_FALSE) {
4165 /*
4166 * We haven't negotiated with vswitch to get
4167 * physical link state updates. We can update
4168 * update the stack at this point as the
4169 * channel to vswitch is up and the handshake
4170 * is done successfully.
4171 *
4172 * If we have negotiated to get physical link
4173 * state updates, then we won't notify the
4174 * the stack here; we do that as soon as
4175 * vswitch sends us the initial phys link state
4176 * (see vgen_handle_physlink_info()).
4177 */
4178 mutex_exit(&ldcp->cblock);
4179 vgen_link_update(vgenp, ldcp->link_state);
4180 mutex_enter(&ldcp->cblock);
4181 }
4182 }
4183
4184 if (ldcp->htid != 0) {
4185 htid = ldcp->htid;
4186 ldcp->htid = 0;
4187
4188 mutex_exit(&ldcp->cblock);
4189 (void) untimeout(htid);
4190 mutex_enter(&ldcp->cblock);
4191 }
4192
4193 /*
4194 * Check if mac layer should be notified to restart
4195 * transmissions. This can happen if the channel got
4196 * reset and while tx_blocked is set.
4197 */
4198 mutex_enter(&ldcp->tclock);
4199 if (ldcp->tx_blocked) {
4200 vio_net_tx_update_t vtx_update =
4201 ldcp->portp->vcb.vio_net_tx_update;
4202
4203 ldcp->tx_blocked = B_FALSE;
4204 vtx_update(ldcp->portp->vhp);
4205 }
4206 mutex_exit(&ldcp->tclock);
4207
4208 /* start transmit watchdog timer */
4209 ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
4210 drv_usectohz(vgen_txwd_interval * 1000));
4211
4212 break;
4213
4214 default:
4215 break;
4216 }
4217
4218 return (rv);
4219 }
4220
4221 /*
4222 * Check if the current handshake phase has completed successfully and
4223 * return the status.
4224 */
4225 static int
vgen_handshake_done(vgen_ldc_t * ldcp)4226 vgen_handshake_done(vgen_ldc_t *ldcp)
4227 {
4228 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4229 uint32_t hphase = ldcp->hphase;
4230 int status = 0;
4231
4232 switch (hphase) {
4233
4234 case VH_PHASE1:
4235 /*
4236 * Phase1 is done, if version negotiation
4237 * completed successfully.
4238 */
4239 status = ((ldcp->hstate & VER_NEGOTIATED) ==
4240 VER_NEGOTIATED);
4241 break;
4242
4243 case VH_PHASE2:
4244 /*
4245 * Phase 2 is done, if attr info
4246 * has been exchanged successfully.
4247 */
4248 status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) ==
4249 ATTR_INFO_EXCHANGED);
4250 break;
4251
4252 case VH_PHASE3:
4253 /*
4254 * Phase 3 is done, if dring registration
4255 * has been exchanged successfully.
4256 */
4257 status = ((ldcp->hstate & DRING_INFO_EXCHANGED) ==
4258 DRING_INFO_EXCHANGED);
4259 break;
4260
4261 case VH_PHASE4:
4262 /* Phase 4 is done, if rdx msg has been exchanged */
4263 status = ((ldcp->hstate & RDX_EXCHANGED) ==
4264 RDX_EXCHANGED);
4265 break;
4266
4267 default:
4268 break;
4269 }
4270
4271 if (status == 0) {
4272 return (VGEN_FAILURE);
4273 }
4274 DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase);
4275 return (VGEN_SUCCESS);
4276 }
4277
4278 /*
4279 * Link State Update Notes:
4280 * The link state of the channel connected to vswitch is reported as the link
4281 * state of the vnet device, by default. If the channel is down or reset, then
4282 * the link state is marked 'down'. If the channel is 'up' *and* handshake
4283 * between the vnet and vswitch is successful, then the link state is marked
4284 * 'up'. If physical network link state is desired, then the vnet device must
4285 * be configured to get physical link updates and the 'linkprop' property
4286 * in the virtual-device MD node indicates this. As part of attribute exchange
4287 * the vnet device negotiates with the vswitch to obtain physical link state
4288 * updates. If it successfully negotiates, vswitch sends an initial physlink
4289 * msg once the handshake is done and further whenever the physical link state
4290 * changes. Currently we don't have mac layer interfaces to report two distinct
4291 * link states - virtual and physical. Thus, if the vnet has been configured to
4292 * get physical link updates, then the link status will be reported as 'up'
4293 * only when both the virtual and physical links are up.
4294 */
4295 static void
vgen_link_update(vgen_t * vgenp,link_state_t link_state)4296 vgen_link_update(vgen_t *vgenp, link_state_t link_state)
4297 {
4298 vnet_link_update(vgenp->vnetp, link_state);
4299 }
4300
4301 /*
4302 * Handle a version info msg from the peer or an ACK/NACK from the peer
4303 * to a version info msg that we sent.
4304 */
4305 static int
vgen_handle_version_negotiate(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4306 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4307 {
4308 vgen_t *vgenp;
4309 vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp;
4310 int ack = 0;
4311 int failed = 0;
4312 int idx;
4313 vgen_ver_t *versions = ldcp->vgen_versions;
4314 int rv = 0;
4315
4316 vgenp = LDC_TO_VGEN(ldcp);
4317 DBG1(vgenp, ldcp, "enter\n");
4318 switch (tagp->vio_subtype) {
4319 case VIO_SUBTYPE_INFO:
4320
4321 /* Cache sid of peer if this is the first time */
4322 if (ldcp->peer_sid == 0) {
4323 DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n",
4324 tagp->vio_sid);
4325 ldcp->peer_sid = tagp->vio_sid;
4326 }
4327
4328 if (ldcp->hphase != VH_PHASE1) {
4329 /*
4330 * If we are not already in VH_PHASE1, reset to
4331 * pre-handshake state, and initiate handshake
4332 * to the peer too.
4333 */
4334 return (EINVAL);
4335 }
4336
4337 ldcp->hstate |= VER_INFO_RCVD;
4338
4339 /* save peer's requested values */
4340 ldcp->peer_hparams.ver_major = vermsg->ver_major;
4341 ldcp->peer_hparams.ver_minor = vermsg->ver_minor;
4342 ldcp->peer_hparams.dev_class = vermsg->dev_class;
4343
4344 if ((vermsg->dev_class != VDEV_NETWORK) &&
4345 (vermsg->dev_class != VDEV_NETWORK_SWITCH)) {
4346 /* unsupported dev_class, send NACK */
4347
4348 DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4349
4350 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4351 tagp->vio_sid = ldcp->local_sid;
4352 /* send reply msg back to peer */
4353 rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
4354 sizeof (*vermsg), B_FALSE);
4355 if (rv != VGEN_SUCCESS) {
4356 return (rv);
4357 }
4358 return (VGEN_FAILURE);
4359 }
4360
4361 DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n",
4362 vermsg->ver_major, vermsg->ver_minor);
4363
4364 idx = 0;
4365
4366 for (;;) {
4367
4368 if (vermsg->ver_major > versions[idx].ver_major) {
4369
4370 /* nack with next lower version */
4371 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4372 vermsg->ver_major = versions[idx].ver_major;
4373 vermsg->ver_minor = versions[idx].ver_minor;
4374 break;
4375 }
4376
4377 if (vermsg->ver_major == versions[idx].ver_major) {
4378
4379 /* major version match - ACK version */
4380 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4381 ack = 1;
4382
4383 /*
4384 * lower minor version to the one this endpt
4385 * supports, if necessary
4386 */
4387 if (vermsg->ver_minor >
4388 versions[idx].ver_minor) {
4389 vermsg->ver_minor =
4390 versions[idx].ver_minor;
4391 ldcp->peer_hparams.ver_minor =
4392 versions[idx].ver_minor;
4393 }
4394 break;
4395 }
4396
4397 idx++;
4398
4399 if (idx == VGEN_NUM_VER) {
4400
4401 /* no version match - send NACK */
4402 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4403 vermsg->ver_major = 0;
4404 vermsg->ver_minor = 0;
4405 failed = 1;
4406 break;
4407 }
4408
4409 }
4410
4411 tagp->vio_sid = ldcp->local_sid;
4412
4413 /* send reply msg back to peer */
4414 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg),
4415 B_FALSE);
4416 if (rv != VGEN_SUCCESS) {
4417 return (rv);
4418 }
4419
4420 if (ack) {
4421 ldcp->hstate |= VER_ACK_SENT;
4422 DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n",
4423 vermsg->ver_major, vermsg->ver_minor);
4424 }
4425 if (failed) {
4426 DWARN(vgenp, ldcp, "Negotiation Failed\n");
4427 return (VGEN_FAILURE);
4428 }
4429 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4430
4431 /* VER_ACK_SENT and VER_ACK_RCVD */
4432
4433 /* local and peer versions match? */
4434 ASSERT((ldcp->local_hparams.ver_major ==
4435 ldcp->peer_hparams.ver_major) &&
4436 (ldcp->local_hparams.ver_minor ==
4437 ldcp->peer_hparams.ver_minor));
4438
4439 vgen_set_vnet_proto_ops(ldcp);
4440
4441 /* move to the next phase */
4442 rv = vgen_handshake(vh_nextphase(ldcp));
4443 if (rv != 0) {
4444 return (rv);
4445 }
4446 }
4447
4448 break;
4449
4450 case VIO_SUBTYPE_ACK:
4451
4452 if (ldcp->hphase != VH_PHASE1) {
4453 /* This should not happen. */
4454 DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase);
4455 return (VGEN_FAILURE);
4456 }
4457
4458 /* SUCCESS - we have agreed on a version */
4459 ldcp->local_hparams.ver_major = vermsg->ver_major;
4460 ldcp->local_hparams.ver_minor = vermsg->ver_minor;
4461 ldcp->hstate |= VER_ACK_RCVD;
4462
4463 DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n",
4464 vermsg->ver_major, vermsg->ver_minor);
4465
4466 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4467
4468 /* VER_ACK_SENT and VER_ACK_RCVD */
4469
4470 /* local and peer versions match? */
4471 ASSERT((ldcp->local_hparams.ver_major ==
4472 ldcp->peer_hparams.ver_major) &&
4473 (ldcp->local_hparams.ver_minor ==
4474 ldcp->peer_hparams.ver_minor));
4475
4476 vgen_set_vnet_proto_ops(ldcp);
4477
4478 /* move to the next phase */
4479 rv = vgen_handshake(vh_nextphase(ldcp));
4480 if (rv != 0) {
4481 return (rv);
4482 }
4483 }
4484 break;
4485
4486 case VIO_SUBTYPE_NACK:
4487
4488 if (ldcp->hphase != VH_PHASE1) {
4489 /* This should not happen. */
4490 DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid "
4491 "Phase(%u)\n", ldcp->hphase);
4492 return (VGEN_FAILURE);
4493 }
4494
4495 DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n",
4496 vermsg->ver_major, vermsg->ver_minor);
4497
4498 /* check if version in NACK is zero */
4499 if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) {
4500 /*
4501 * Version Negotiation has failed.
4502 */
4503 DWARN(vgenp, ldcp, "Version Negotiation Failed\n");
4504 return (VGEN_FAILURE);
4505 }
4506
4507 idx = 0;
4508
4509 for (;;) {
4510
4511 if (vermsg->ver_major > versions[idx].ver_major) {
4512 /* select next lower version */
4513
4514 ldcp->local_hparams.ver_major =
4515 versions[idx].ver_major;
4516 ldcp->local_hparams.ver_minor =
4517 versions[idx].ver_minor;
4518 break;
4519 }
4520
4521 if (vermsg->ver_major == versions[idx].ver_major) {
4522 /* major version match */
4523
4524 ldcp->local_hparams.ver_major =
4525 versions[idx].ver_major;
4526
4527 ldcp->local_hparams.ver_minor =
4528 versions[idx].ver_minor;
4529 break;
4530 }
4531
4532 idx++;
4533
4534 if (idx == VGEN_NUM_VER) {
4535 /*
4536 * no version match.
4537 * Version Negotiation has failed.
4538 */
4539 DWARN(vgenp, ldcp,
4540 "Version Negotiation Failed\n");
4541 return (VGEN_FAILURE);
4542 }
4543
4544 }
4545
4546 rv = vgen_send_version_negotiate(ldcp);
4547 if (rv != VGEN_SUCCESS) {
4548 return (rv);
4549 }
4550
4551 break;
4552 }
4553
4554 DBG1(vgenp, ldcp, "exit\n");
4555 return (VGEN_SUCCESS);
4556 }
4557
4558 static int
vgen_handle_attr_info(vgen_ldc_t * ldcp,vnet_attr_msg_t * msg)4559 vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4560 {
4561 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4562 vgen_hparams_t *lp = &ldcp->local_hparams;
4563 vgen_hparams_t *rp = &ldcp->peer_hparams;
4564 uint32_t mtu;
4565 uint8_t dring_mode;
4566
4567 ldcp->hstate |= ATTR_INFO_RCVD;
4568
4569 /* save peer's values */
4570 rp->mtu = msg->mtu;
4571 rp->addr = msg->addr;
4572 rp->addr_type = msg->addr_type;
4573 rp->xfer_mode = msg->xfer_mode;
4574 rp->ack_freq = msg->ack_freq;
4575 rp->dring_mode = msg->options;
4576
4577 /*
4578 * Process address type, ack frequency and transfer mode attributes.
4579 */
4580 if ((msg->addr_type != ADDR_TYPE_MAC) ||
4581 (msg->ack_freq > 64) ||
4582 (msg->xfer_mode != lp->xfer_mode)) {
4583 return (VGEN_FAILURE);
4584 }
4585
4586 /*
4587 * Process dring mode attribute.
4588 */
4589 if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4590 /*
4591 * Versions >= 1.6:
4592 * Though we are operating in v1.6 mode, it is possible that
4593 * RxDringData mode has been disabled either on this guest or
4594 * on the peer guest. If so, we revert to pre v1.6 behavior of
4595 * TxDring mode. But this must be agreed upon in both
4596 * directions of attr exchange. We first determine the mode
4597 * that can be negotiated.
4598 */
4599 if ((msg->options & VIO_RX_DRING_DATA) != 0 &&
4600 vgen_mapin_avail(ldcp) == B_TRUE) {
4601 /*
4602 * We are capable of handling RxDringData AND the peer
4603 * is also capable of it; we enable RxDringData mode on
4604 * this channel.
4605 */
4606 dring_mode = VIO_RX_DRING_DATA;
4607 } else if ((msg->options & VIO_TX_DRING) != 0) {
4608 /*
4609 * If the peer is capable of TxDring mode, we
4610 * negotiate TxDring mode on this channel.
4611 */
4612 dring_mode = VIO_TX_DRING;
4613 } else {
4614 /*
4615 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
4616 * modes. We don't support VIO_RX_DRING mode.
4617 */
4618 return (VGEN_FAILURE);
4619 }
4620
4621 /*
4622 * If we have received an ack for the attr info that we sent,
4623 * then check if the dring mode matches what the peer had ack'd
4624 * (saved in local hparams). If they don't match, we fail the
4625 * handshake.
4626 */
4627 if (ldcp->hstate & ATTR_ACK_RCVD) {
4628 if (msg->options != lp->dring_mode) {
4629 /* send NACK */
4630 return (VGEN_FAILURE);
4631 }
4632 } else {
4633 /*
4634 * Save the negotiated dring mode in our attr
4635 * parameters, so it gets sent in the attr info from us
4636 * to the peer.
4637 */
4638 lp->dring_mode = dring_mode;
4639 }
4640
4641 /* save the negotiated dring mode in the msg to be replied */
4642 msg->options = dring_mode;
4643 }
4644
4645 /*
4646 * Process MTU attribute.
4647 */
4648 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4649 /*
4650 * Versions >= 1.4:
4651 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
4652 * is negotiated down to the minimum of our mtu and peer's mtu.
4653 */
4654 if (msg->mtu < ETHERMAX) {
4655 return (VGEN_FAILURE);
4656 }
4657
4658 mtu = MIN(msg->mtu, vgenp->max_frame_size);
4659
4660 /*
4661 * If we have received an ack for the attr info
4662 * that we sent, then check if the mtu computed
4663 * above matches the mtu that the peer had ack'd
4664 * (saved in local hparams). If they don't
4665 * match, we fail the handshake.
4666 */
4667 if (ldcp->hstate & ATTR_ACK_RCVD) {
4668 if (mtu != lp->mtu) {
4669 /* send NACK */
4670 return (VGEN_FAILURE);
4671 }
4672 } else {
4673 /*
4674 * Save the mtu computed above in our
4675 * attr parameters, so it gets sent in
4676 * the attr info from us to the peer.
4677 */
4678 lp->mtu = mtu;
4679 }
4680
4681 /* save the MIN mtu in the msg to be replied */
4682 msg->mtu = mtu;
4683
4684 } else {
4685 /* versions < 1.4, mtu must match */
4686 if (msg->mtu != lp->mtu) {
4687 return (VGEN_FAILURE);
4688 }
4689 }
4690
4691 return (VGEN_SUCCESS);
4692 }
4693
4694 static int
vgen_handle_attr_ack(vgen_ldc_t * ldcp,vnet_attr_msg_t * msg)4695 vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg)
4696 {
4697 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4698 vgen_hparams_t *lp = &ldcp->local_hparams;
4699
4700 /*
4701 * Process dring mode attribute.
4702 */
4703 if (VGEN_VER_GTEQ(ldcp, 1, 6)) {
4704 /*
4705 * Versions >= 1.6:
4706 * The ack msg sent by the peer contains the negotiated dring
4707 * mode between our capability (that we had sent in our attr
4708 * info) and the peer's capability.
4709 */
4710 if (ldcp->hstate & ATTR_ACK_SENT) {
4711 /*
4712 * If we have sent an ack for the attr info msg from
4713 * the peer, check if the dring mode that was
4714 * negotiated then (saved in local hparams) matches the
4715 * mode that the peer has ack'd. If they don't match,
4716 * we fail the handshake.
4717 */
4718 if (lp->dring_mode != msg->options) {
4719 return (VGEN_FAILURE);
4720 }
4721 } else {
4722 if ((msg->options & lp->dring_mode) == 0) {
4723 /*
4724 * Peer ack'd with a mode that we don't
4725 * support; we fail the handshake.
4726 */
4727 return (VGEN_FAILURE);
4728 }
4729 if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA))
4730 == (VIO_TX_DRING|VIO_RX_DRING_DATA)) {
4731 /*
4732 * Peer must ack with only one negotiated mode.
4733 * Otherwise fail handshake.
4734 */
4735 return (VGEN_FAILURE);
4736 }
4737
4738 /*
4739 * Save the negotiated mode, so we can validate it when
4740 * we receive attr info from the peer.
4741 */
4742 lp->dring_mode = msg->options;
4743 }
4744 }
4745
4746 /*
4747 * Process Physical Link Update attribute.
4748 */
4749 if (VGEN_VER_GTEQ(ldcp, 1, 5) &&
4750 ldcp->portp == vgenp->vsw_portp) {
4751 /*
4752 * Versions >= 1.5:
4753 * If the vnet device has been configured to get
4754 * physical link state updates, check the corresponding
4755 * bits in the ack msg, if the peer is vswitch.
4756 */
4757 if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4758 PHYSLINK_UPDATE_STATE) &&
4759 ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) ==
4760 PHYSLINK_UPDATE_STATE_ACK)) {
4761 vgenp->pls_negotiated = B_TRUE;
4762 } else {
4763 vgenp->pls_negotiated = B_FALSE;
4764 }
4765 }
4766
4767 /*
4768 * Process MTU attribute.
4769 */
4770 if (VGEN_VER_GTEQ(ldcp, 1, 4)) {
4771 /*
4772 * Versions >= 1.4:
4773 * The ack msg sent by the peer contains the minimum of
4774 * our mtu (that we had sent in our attr info) and the
4775 * peer's mtu.
4776 *
4777 * If we have sent an ack for the attr info msg from
4778 * the peer, check if the mtu that was computed then
4779 * (saved in local hparams) matches the mtu that the
4780 * peer has ack'd. If they don't match, we fail the
4781 * handshake.
4782 */
4783 if (ldcp->hstate & ATTR_ACK_SENT) {
4784 if (lp->mtu != msg->mtu) {
4785 return (VGEN_FAILURE);
4786 }
4787 } else {
4788 /*
4789 * If the mtu ack'd by the peer is > our mtu
4790 * fail handshake. Otherwise, save the mtu, so
4791 * we can validate it when we receive attr info
4792 * from our peer.
4793 */
4794 if (msg->mtu > lp->mtu) {
4795 return (VGEN_FAILURE);
4796 }
4797 if (msg->mtu <= lp->mtu) {
4798 lp->mtu = msg->mtu;
4799 }
4800 }
4801 }
4802
4803 return (VGEN_SUCCESS);
4804 }
4805
4806
4807 /*
4808 * Handle an attribute info msg from the peer or an ACK/NACK from the peer
4809 * to an attr info msg that we sent.
4810 */
4811 static int
vgen_handle_attr_msg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4812 vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4813 {
4814 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4815 vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp;
4816 int rv = 0;
4817
4818 DBG1(vgenp, ldcp, "enter\n");
4819 if (ldcp->hphase != VH_PHASE2) {
4820 DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d),"
4821 " Invalid Phase(%u)\n",
4822 tagp->vio_subtype, ldcp->hphase);
4823 return (VGEN_FAILURE);
4824 }
4825 switch (tagp->vio_subtype) {
4826 case VIO_SUBTYPE_INFO:
4827
4828 rv = vgen_handle_attr_info(ldcp, msg);
4829 if (rv == VGEN_SUCCESS) {
4830 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4831 } else {
4832 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4833 }
4834 tagp->vio_sid = ldcp->local_sid;
4835
4836 /* send reply msg back to peer */
4837 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg),
4838 B_FALSE);
4839 if (rv != VGEN_SUCCESS) {
4840 return (rv);
4841 }
4842
4843 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
4844 DWARN(vgenp, ldcp, "ATTR_NACK_SENT");
4845 break;
4846 }
4847
4848 ldcp->hstate |= ATTR_ACK_SENT;
4849 DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n");
4850 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4851 rv = vgen_handshake(vh_nextphase(ldcp));
4852 if (rv != 0) {
4853 return (rv);
4854 }
4855 }
4856
4857 break;
4858
4859 case VIO_SUBTYPE_ACK:
4860
4861 rv = vgen_handle_attr_ack(ldcp, msg);
4862 if (rv == VGEN_FAILURE) {
4863 break;
4864 }
4865
4866 ldcp->hstate |= ATTR_ACK_RCVD;
4867 DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n");
4868
4869 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
4870 rv = vgen_handshake(vh_nextphase(ldcp));
4871 if (rv != 0) {
4872 return (rv);
4873 }
4874 }
4875 break;
4876
4877 case VIO_SUBTYPE_NACK:
4878
4879 DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n");
4880 return (VGEN_FAILURE);
4881 }
4882 DBG1(vgenp, ldcp, "exit\n");
4883 return (VGEN_SUCCESS);
4884 }
4885
4886 static int
vgen_handle_dring_reg_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4887 vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4888 {
4889 int rv = 0;
4890 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4891 vgen_hparams_t *lp = &ldcp->local_hparams;
4892
4893 DBG2(vgenp, ldcp, "DRING_INFO_RCVD");
4894 ldcp->hstate |= DRING_INFO_RCVD;
4895
4896 if (VGEN_VER_GTEQ(ldcp, 1, 6) &&
4897 (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) {
4898 /*
4899 * The earlier version of Solaris vnet driver doesn't set the
4900 * option (VIO_TX_DRING in its case) correctly in its dring reg
4901 * message. We workaround that here by doing the check only
4902 * for versions >= v1.6.
4903 */
4904 DWARN(vgenp, ldcp,
4905 "Rcvd dring reg option (%d), negotiated mode (%d)\n",
4906 ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode);
4907 return (VGEN_FAILURE);
4908 }
4909
4910 /*
4911 * Map dring exported by the peer.
4912 */
4913 rv = vgen_map_dring(ldcp, (void *)tagp);
4914 if (rv != VGEN_SUCCESS) {
4915 return (rv);
4916 }
4917
4918 /*
4919 * Map data buffers exported by the peer if we are in RxDringData mode.
4920 */
4921 if (lp->dring_mode == VIO_RX_DRING_DATA) {
4922 rv = vgen_map_data(ldcp, (void *)tagp);
4923 if (rv != VGEN_SUCCESS) {
4924 vgen_unmap_dring(ldcp);
4925 return (rv);
4926 }
4927 }
4928
4929 if (ldcp->peer_hparams.dring_ready == B_FALSE) {
4930 ldcp->peer_hparams.dring_ready = B_TRUE;
4931 }
4932
4933 return (VGEN_SUCCESS);
4934 }
4935
4936 static int
vgen_handle_dring_reg_ack(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4937 vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4938 {
4939 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4940 vgen_hparams_t *lp = &ldcp->local_hparams;
4941
4942 DBG2(vgenp, ldcp, "DRING_ACK_RCVD");
4943 ldcp->hstate |= DRING_ACK_RCVD;
4944
4945 if (lp->dring_ready) {
4946 return (VGEN_SUCCESS);
4947 }
4948
4949 /* save dring_ident acked by peer */
4950 lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident;
4951
4952 /* local dring is now ready */
4953 lp->dring_ready = B_TRUE;
4954
4955 return (VGEN_SUCCESS);
4956 }
4957
4958 /*
4959 * Handle a descriptor ring register msg from the peer or an ACK/NACK from
4960 * the peer to a dring register msg that we sent.
4961 */
4962 static int
vgen_handle_dring_reg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)4963 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
4964 {
4965 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
4966 int rv = 0;
4967 int msgsize;
4968 vgen_hparams_t *lp = &ldcp->local_hparams;
4969
4970 DBG1(vgenp, ldcp, "enter\n");
4971 if (ldcp->hphase < VH_PHASE2) {
4972 /* dring_info can be rcvd in any of the phases after Phase1 */
4973 DWARN(vgenp, ldcp,
4974 "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n",
4975 tagp->vio_subtype, ldcp->hphase);
4976 return (VGEN_FAILURE);
4977 }
4978
4979 switch (tagp->vio_subtype) {
4980 case VIO_SUBTYPE_INFO:
4981
4982 rv = vgen_handle_dring_reg_info(ldcp, tagp);
4983 if (rv == VGEN_SUCCESS) {
4984 tagp->vio_subtype = VIO_SUBTYPE_ACK;
4985 } else {
4986 tagp->vio_subtype = VIO_SUBTYPE_NACK;
4987 }
4988
4989 tagp->vio_sid = ldcp->local_sid;
4990
4991 if (lp->dring_mode == VIO_RX_DRING_DATA) {
4992 msgsize =
4993 VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies);
4994 } else {
4995 msgsize = sizeof (vio_dring_reg_msg_t);
4996 }
4997
4998 /* send reply msg back to peer */
4999 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize,
5000 B_FALSE);
5001 if (rv != VGEN_SUCCESS) {
5002 return (rv);
5003 }
5004
5005 if (tagp->vio_subtype == VIO_SUBTYPE_NACK) {
5006 DWARN(vgenp, ldcp, "DRING_NACK_SENT");
5007 return (VGEN_FAILURE);
5008 }
5009
5010 ldcp->hstate |= DRING_ACK_SENT;
5011 DBG2(vgenp, ldcp, "DRING_ACK_SENT");
5012
5013 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5014 rv = vgen_handshake(vh_nextphase(ldcp));
5015 if (rv != 0) {
5016 return (rv);
5017 }
5018 }
5019 break;
5020
5021 case VIO_SUBTYPE_ACK:
5022
5023 rv = vgen_handle_dring_reg_ack(ldcp, tagp);
5024 if (rv == VGEN_FAILURE) {
5025 return (rv);
5026 }
5027
5028 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5029 rv = vgen_handshake(vh_nextphase(ldcp));
5030 if (rv != 0) {
5031 return (rv);
5032 }
5033 }
5034
5035 break;
5036
5037 case VIO_SUBTYPE_NACK:
5038
5039 DWARN(vgenp, ldcp, "DRING_NACK_RCVD");
5040 return (VGEN_FAILURE);
5041 }
5042 DBG1(vgenp, ldcp, "exit\n");
5043 return (VGEN_SUCCESS);
5044 }
5045
5046 /*
5047 * Handle a rdx info msg from the peer or an ACK/NACK
5048 * from the peer to a rdx info msg that we sent.
5049 */
5050 static int
vgen_handle_rdx_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5051 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5052 {
5053 int rv = 0;
5054 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5055
5056 DBG1(vgenp, ldcp, "enter\n");
5057 if (ldcp->hphase != VH_PHASE4) {
5058 DWARN(vgenp, ldcp,
5059 "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n",
5060 tagp->vio_subtype, ldcp->hphase);
5061 return (VGEN_FAILURE);
5062 }
5063 switch (tagp->vio_subtype) {
5064 case VIO_SUBTYPE_INFO:
5065
5066 DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n");
5067 ldcp->hstate |= RDX_INFO_RCVD;
5068
5069 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5070 tagp->vio_sid = ldcp->local_sid;
5071 /* send reply msg back to peer */
5072 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t),
5073 B_FALSE);
5074 if (rv != VGEN_SUCCESS) {
5075 return (rv);
5076 }
5077
5078 ldcp->hstate |= RDX_ACK_SENT;
5079 DBG2(vgenp, ldcp, "RDX_ACK_SENT \n");
5080
5081 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5082 rv = vgen_handshake(vh_nextphase(ldcp));
5083 if (rv != 0) {
5084 return (rv);
5085 }
5086 }
5087
5088 break;
5089
5090 case VIO_SUBTYPE_ACK:
5091
5092 ldcp->hstate |= RDX_ACK_RCVD;
5093
5094 DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n");
5095
5096 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) {
5097 rv = vgen_handshake(vh_nextphase(ldcp));
5098 if (rv != 0) {
5099 return (rv);
5100 }
5101 }
5102 break;
5103
5104 case VIO_SUBTYPE_NACK:
5105
5106 DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n");
5107 return (VGEN_FAILURE);
5108 }
5109 DBG1(vgenp, ldcp, "exit\n");
5110 return (VGEN_SUCCESS);
5111 }
5112
5113 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */
5114 static int
vgen_handle_mcast_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5115 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5116 {
5117 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5118 vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp;
5119 struct ether_addr *addrp;
5120 int count;
5121 int i;
5122
5123 DBG1(vgenp, ldcp, "enter\n");
5124 switch (tagp->vio_subtype) {
5125
5126 case VIO_SUBTYPE_INFO:
5127
5128 /* vnet shouldn't recv set mcast msg, only vsw handles it */
5129 DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n");
5130 break;
5131
5132 case VIO_SUBTYPE_ACK:
5133
5134 /* success adding/removing multicast addr */
5135 DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n");
5136 break;
5137
5138 case VIO_SUBTYPE_NACK:
5139
5140 DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n");
5141 if (!(msgp->set)) {
5142 /* multicast remove request failed */
5143 break;
5144 }
5145
5146 /* multicast add request failed */
5147 for (count = 0; count < msgp->count; count++) {
5148 addrp = &(msgp->mca[count]);
5149
5150 /* delete address from the table */
5151 for (i = 0; i < vgenp->mccount; i++) {
5152 if (ether_cmp(addrp,
5153 &(vgenp->mctab[i])) == 0) {
5154 if (vgenp->mccount > 1) {
5155 int t = vgenp->mccount - 1;
5156 vgenp->mctab[i] =
5157 vgenp->mctab[t];
5158 }
5159 vgenp->mccount--;
5160 break;
5161 }
5162 }
5163 }
5164 break;
5165
5166 }
5167 DBG1(vgenp, ldcp, "exit\n");
5168
5169 return (VGEN_SUCCESS);
5170 }
5171
5172 /*
5173 * Physical link information message from the peer. Only vswitch should send
5174 * us this message; if the vnet device has been configured to get physical link
5175 * state updates. Note that we must have already negotiated this with the
5176 * vswitch during attribute exchange phase of handshake.
5177 */
5178 static int
vgen_handle_physlink_info(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5179 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5180 {
5181 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5182 vnet_physlink_msg_t *msgp = (vnet_physlink_msg_t *)tagp;
5183 link_state_t link_state;
5184 int rv;
5185
5186 if (ldcp->portp != vgenp->vsw_portp) {
5187 /*
5188 * drop the message and don't process; as we should
5189 * receive physlink_info message from only vswitch.
5190 */
5191 return (VGEN_SUCCESS);
5192 }
5193
5194 if (vgenp->pls_negotiated == B_FALSE) {
5195 /*
5196 * drop the message and don't process; as we should receive
5197 * physlink_info message only if physlink update is enabled for
5198 * the device and negotiated with vswitch.
5199 */
5200 return (VGEN_SUCCESS);
5201 }
5202
5203 switch (tagp->vio_subtype) {
5204
5205 case VIO_SUBTYPE_INFO:
5206
5207 if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) ==
5208 VNET_PHYSLINK_STATE_UP) {
5209 link_state = LINK_STATE_UP;
5210 } else {
5211 link_state = LINK_STATE_DOWN;
5212 }
5213
5214 if (vgenp->phys_link_state != link_state) {
5215 vgenp->phys_link_state = link_state;
5216 mutex_exit(&ldcp->cblock);
5217
5218 /* Now update the stack */
5219 vgen_link_update(vgenp, link_state);
5220
5221 mutex_enter(&ldcp->cblock);
5222 }
5223
5224 tagp->vio_subtype = VIO_SUBTYPE_ACK;
5225 tagp->vio_sid = ldcp->local_sid;
5226
5227 /* send reply msg back to peer */
5228 rv = vgen_sendmsg(ldcp, (caddr_t)tagp,
5229 sizeof (vnet_physlink_msg_t), B_FALSE);
5230 if (rv != VGEN_SUCCESS) {
5231 return (rv);
5232 }
5233 break;
5234
5235 case VIO_SUBTYPE_ACK:
5236
5237 /* vnet shouldn't recv physlink acks */
5238 DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n");
5239 break;
5240
5241 case VIO_SUBTYPE_NACK:
5242
5243 /* vnet shouldn't recv physlink nacks */
5244 DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n");
5245 break;
5246
5247 }
5248 DBG1(vgenp, ldcp, "exit\n");
5249
5250 return (VGEN_SUCCESS);
5251 }
5252
5253 /* handler for control messages received from the peer ldc end-point */
5254 static int
vgen_handle_ctrlmsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5255 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5256 {
5257 int rv = 0;
5258 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5259
5260 DBG1(vgenp, ldcp, "enter\n");
5261 switch (tagp->vio_subtype_env) {
5262
5263 case VIO_VER_INFO:
5264 rv = vgen_handle_version_negotiate(ldcp, tagp);
5265 break;
5266
5267 case VIO_ATTR_INFO:
5268 rv = vgen_handle_attr_msg(ldcp, tagp);
5269 break;
5270
5271 case VIO_DRING_REG:
5272 rv = vgen_handle_dring_reg(ldcp, tagp);
5273 break;
5274
5275 case VIO_RDX:
5276 rv = vgen_handle_rdx_info(ldcp, tagp);
5277 break;
5278
5279 case VNET_MCAST_INFO:
5280 rv = vgen_handle_mcast_info(ldcp, tagp);
5281 break;
5282
5283 case VIO_DDS_INFO:
5284 /*
5285 * If we are in the process of resetting the vswitch channel,
5286 * drop the dds message. A new handshake will be initiated
5287 * when the channel comes back up after the reset and dds
5288 * negotiation can then continue.
5289 */
5290 if (ldcp->reset_in_progress == 1) {
5291 break;
5292 }
5293 rv = vgen_dds_rx(ldcp, tagp);
5294 break;
5295
5296 case VNET_PHYSLINK_INFO:
5297 rv = vgen_handle_physlink_info(ldcp, tagp);
5298 break;
5299 }
5300
5301 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5302 return (rv);
5303 }
5304
5305 /* handler for error messages received from the peer ldc end-point */
5306 static void
vgen_handle_errmsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5307 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5308 {
5309 _NOTE(ARGUNUSED(ldcp, tagp))
5310 }
5311
5312 /*
5313 * This function handles raw pkt data messages received over the channel.
5314 * Currently, only priority-eth-type frames are received through this mechanism.
5315 * In this case, the frame(data) is present within the message itself which
5316 * is copied into an mblk before sending it up the stack.
5317 */
5318 void
vgen_handle_pkt_data(void * arg1,void * arg2,uint32_t msglen)5319 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen)
5320 {
5321 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1;
5322 vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2;
5323 uint32_t size;
5324 mblk_t *mp;
5325 vio_mblk_t *vmp;
5326 vio_net_rx_cb_t vrx_cb = NULL;
5327 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5328 vgen_stats_t *statsp = &ldcp->stats;
5329 vgen_hparams_t *lp = &ldcp->local_hparams;
5330 uint_t dring_mode = lp->dring_mode;
5331
5332 ASSERT(MUTEX_HELD(&ldcp->cblock));
5333
5334 mutex_exit(&ldcp->cblock);
5335
5336 size = msglen - VIO_PKT_DATA_HDRSIZE;
5337 if (size < ETHERMIN || size > lp->mtu) {
5338 (void) atomic_inc_32(&statsp->rx_pri_fail);
5339 mutex_enter(&ldcp->cblock);
5340 return;
5341 }
5342
5343 vmp = vio_multipool_allocb(&ldcp->vmp, size);
5344 if (vmp == NULL) {
5345 mp = allocb(size, BPRI_MED);
5346 if (mp == NULL) {
5347 (void) atomic_inc_32(&statsp->rx_pri_fail);
5348 DWARN(vgenp, ldcp, "allocb failure, "
5349 "unable to process priority frame\n");
5350 mutex_enter(&ldcp->cblock);
5351 return;
5352 }
5353 } else {
5354 mp = vmp->mp;
5355 }
5356
5357 /* copy the frame from the payload of raw data msg into the mblk */
5358 bcopy(pkt->data, mp->b_rptr, size);
5359 mp->b_wptr = mp->b_rptr + size;
5360
5361 if (vmp != NULL) {
5362 vmp->state = VIO_MBLK_HAS_DATA;
5363 }
5364
5365 /* update stats */
5366 (void) atomic_inc_64(&statsp->rx_pri_packets);
5367 (void) atomic_add_64(&statsp->rx_pri_bytes, size);
5368
5369 /*
5370 * If polling is currently enabled, add the packet to the priority
5371 * packets list and return. It will be picked up by the polling thread.
5372 */
5373 if (dring_mode == VIO_RX_DRING_DATA) {
5374 mutex_enter(&ldcp->rxlock);
5375 } else {
5376 mutex_enter(&ldcp->pollq_lock);
5377 }
5378
5379 if (ldcp->polling_on == B_TRUE) {
5380 if (ldcp->rx_pri_tail != NULL) {
5381 ldcp->rx_pri_tail->b_next = mp;
5382 } else {
5383 ldcp->rx_pri_head = ldcp->rx_pri_tail = mp;
5384 }
5385 } else {
5386 vrx_cb = ldcp->portp->vcb.vio_net_rx_cb;
5387 }
5388
5389 if (dring_mode == VIO_RX_DRING_DATA) {
5390 mutex_exit(&ldcp->rxlock);
5391 } else {
5392 mutex_exit(&ldcp->pollq_lock);
5393 }
5394
5395 if (vrx_cb != NULL) {
5396 vrx_cb(ldcp->portp->vhp, mp);
5397 }
5398
5399 mutex_enter(&ldcp->cblock);
5400 }
5401
5402 /*
5403 * dummy pkt data handler function for vnet protocol version 1.0
5404 */
5405 static void
vgen_handle_pkt_data_nop(void * arg1,void * arg2,uint32_t msglen)5406 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen)
5407 {
5408 _NOTE(ARGUNUSED(arg1, arg2, msglen))
5409 }
5410
5411 /* handler for data messages received from the peer ldc end-point */
5412 static int
vgen_handle_datamsg(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp,uint32_t msglen)5413 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen)
5414 {
5415 int rv = 0;
5416 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5417 vgen_hparams_t *lp = &ldcp->local_hparams;
5418
5419 DBG1(vgenp, ldcp, "enter\n");
5420
5421 if (ldcp->hphase != VH_DONE) {
5422 return (0);
5423 }
5424
5425 /*
5426 * We check the data msg seqnum. This is needed only in TxDring mode.
5427 */
5428 if (lp->dring_mode == VIO_TX_DRING &&
5429 tagp->vio_subtype == VIO_SUBTYPE_INFO) {
5430 rv = vgen_check_datamsg_seq(ldcp, tagp);
5431 if (rv != 0) {
5432 return (rv);
5433 }
5434 }
5435
5436 switch (tagp->vio_subtype_env) {
5437 case VIO_DRING_DATA:
5438 rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp);
5439 break;
5440
5441 case VIO_PKT_DATA:
5442 ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen);
5443 break;
5444 default:
5445 break;
5446 }
5447
5448 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv);
5449 return (rv);
5450 }
5451
5452
5453 static int
vgen_ldc_reset(vgen_ldc_t * ldcp,vgen_caller_t caller)5454 vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller)
5455 {
5456 int rv;
5457
5458 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5459 ASSERT(MUTEX_HELD(&ldcp->cblock));
5460 }
5461
5462 /* Set the flag to indicate reset is in progress */
5463 if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) {
5464 /* another thread is already in the process of resetting */
5465 return (EBUSY);
5466 }
5467
5468 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5469 mutex_exit(&ldcp->cblock);
5470 }
5471
5472 rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET);
5473
5474 if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) {
5475 mutex_enter(&ldcp->cblock);
5476 }
5477
5478 return (rv);
5479 }
5480
5481 static void
vgen_ldc_up(vgen_ldc_t * ldcp)5482 vgen_ldc_up(vgen_ldc_t *ldcp)
5483 {
5484 int rv;
5485 uint32_t retries = 0;
5486 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5487
5488 ASSERT(MUTEX_HELD(&ldcp->cblock));
5489
5490 /*
5491 * If the channel has been reset max # of times, without successfully
5492 * completing handshake, stop and do not bring the channel up.
5493 */
5494 if (ldcp->ldc_reset_count == vgen_ldc_max_resets) {
5495 cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted"
5496 " handshake attempts (%d) on channel %ld",
5497 vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id);
5498 return;
5499 }
5500 ldcp->ldc_reset_count++;
5501
5502 do {
5503 rv = ldc_up(ldcp->ldc_handle);
5504 if ((rv != 0) && (rv == EWOULDBLOCK)) {
5505 drv_usecwait(VGEN_LDC_UP_DELAY);
5506 }
5507 if (retries++ >= vgen_ldcup_retries)
5508 break;
5509 } while (rv == EWOULDBLOCK);
5510
5511 if (rv != 0) {
5512 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv);
5513 }
5514 }
5515
5516 int
vgen_enable_intr(void * arg)5517 vgen_enable_intr(void *arg)
5518 {
5519 uint32_t end_ix;
5520 vio_dring_msg_t msg;
5521 vgen_port_t *portp = (vgen_port_t *)arg;
5522 vgen_ldc_t *ldcp = portp->ldcp;
5523 vgen_hparams_t *lp = &ldcp->local_hparams;
5524
5525 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5526 mutex_enter(&ldcp->rxlock);
5527
5528 ldcp->polling_on = B_FALSE;
5529 /*
5530 * We send a stopped message to peer (sender) as we are turning
5531 * off polled mode. This effectively restarts data interrupts
5532 * by allowing the peer to send further dring data msgs to us.
5533 */
5534 end_ix = ldcp->next_rxi;
5535 DECR_RXI(end_ix, ldcp);
5536 msg.dring_ident = ldcp->peer_hparams.dring_ident;
5537 (void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg,
5538 VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED);
5539
5540 mutex_exit(&ldcp->rxlock);
5541 } else {
5542 mutex_enter(&ldcp->pollq_lock);
5543 ldcp->polling_on = B_FALSE;
5544 mutex_exit(&ldcp->pollq_lock);
5545 }
5546
5547 return (0);
5548 }
5549
5550 int
vgen_disable_intr(void * arg)5551 vgen_disable_intr(void *arg)
5552 {
5553 vgen_port_t *portp = (vgen_port_t *)arg;
5554 vgen_ldc_t *ldcp = portp->ldcp;
5555 vgen_hparams_t *lp = &ldcp->local_hparams;
5556
5557 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5558 mutex_enter(&ldcp->rxlock);
5559 ldcp->polling_on = B_TRUE;
5560 mutex_exit(&ldcp->rxlock);
5561 } else {
5562 mutex_enter(&ldcp->pollq_lock);
5563 ldcp->polling_on = B_TRUE;
5564 mutex_exit(&ldcp->pollq_lock);
5565 }
5566
5567 return (0);
5568 }
5569
5570 mblk_t *
vgen_rx_poll(void * arg,int bytes_to_pickup)5571 vgen_rx_poll(void *arg, int bytes_to_pickup)
5572 {
5573 vgen_port_t *portp = (vgen_port_t *)arg;
5574 vgen_ldc_t *ldcp = portp->ldcp;
5575 vgen_hparams_t *lp = &ldcp->local_hparams;
5576 mblk_t *mp = NULL;
5577
5578 if (lp->dring_mode == VIO_RX_DRING_DATA) {
5579 mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup);
5580 } else {
5581 mp = vgen_poll_rcv(ldcp, bytes_to_pickup);
5582 }
5583
5584 return (mp);
5585 }
5586
5587 /* transmit watchdog timeout handler */
5588 static void
vgen_tx_watchdog(void * arg)5589 vgen_tx_watchdog(void *arg)
5590 {
5591 vgen_ldc_t *ldcp;
5592 vgen_t *vgenp;
5593 int rv;
5594 boolean_t tx_blocked;
5595 clock_t tx_blocked_lbolt;
5596
5597 ldcp = (vgen_ldc_t *)arg;
5598 vgenp = LDC_TO_VGEN(ldcp);
5599
5600 tx_blocked = ldcp->tx_blocked;
5601 tx_blocked_lbolt = ldcp->tx_blocked_lbolt;
5602
5603 if (vgen_txwd_timeout &&
5604 (tx_blocked == B_TRUE) &&
5605 ((ddi_get_lbolt() - tx_blocked_lbolt) >
5606 drv_usectohz(vgen_txwd_timeout * 1000))) {
5607 /*
5608 * Something is wrong; the peer is not picking up the packets
5609 * in the transmit dring. We now go ahead and reset the channel
5610 * to break out of this condition.
5611 */
5612 DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), "
5613 "tx_blocked_lbolt(%lx)\n",
5614 ddi_get_lbolt(), tx_blocked_lbolt);
5615
5616 #ifdef DEBUG
5617 if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) {
5618 /* tx timeout triggered for debugging */
5619 vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT);
5620 }
5621 #endif
5622
5623 /*
5624 * Clear tid before invoking vgen_ldc_reset(). Otherwise,
5625 * it will result in a deadlock when vgen_process_reset() tries
5626 * to untimeout() on seeing a non-zero tid, but it is being
5627 * invoked by the timer itself in this case.
5628 */
5629 mutex_enter(&ldcp->cblock);
5630 if (ldcp->wd_tid == 0) {
5631 /* Cancelled by vgen_process_reset() */
5632 mutex_exit(&ldcp->cblock);
5633 return;
5634 }
5635 ldcp->wd_tid = 0;
5636 mutex_exit(&ldcp->cblock);
5637
5638 /*
5639 * Now reset the channel.
5640 */
5641 rv = vgen_ldc_reset(ldcp, VGEN_OTHER);
5642 if (rv == 0) {
5643 /*
5644 * We have successfully reset the channel. If we are
5645 * in tx flow controlled state, clear it now and enable
5646 * transmit in the upper layer.
5647 */
5648 if (ldcp->tx_blocked) {
5649 vio_net_tx_update_t vtx_update =
5650 ldcp->portp->vcb.vio_net_tx_update;
5651
5652 ldcp->tx_blocked = B_FALSE;
5653 vtx_update(ldcp->portp->vhp);
5654 }
5655 }
5656
5657 /*
5658 * Channel has been reset by us or some other thread is already
5659 * in the process of resetting. In either case, we return
5660 * without restarting the timer. When handshake completes and
5661 * the channel is ready for data transmit/receive we start a
5662 * new watchdog timer.
5663 */
5664 return;
5665 }
5666
5667 restart_timer:
5668 /* Restart the timer */
5669 mutex_enter(&ldcp->cblock);
5670 if (ldcp->wd_tid == 0) {
5671 /* Cancelled by vgen_process_reset() */
5672 mutex_exit(&ldcp->cblock);
5673 return;
5674 }
5675 ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp,
5676 drv_usectohz(vgen_txwd_interval * 1000));
5677 mutex_exit(&ldcp->cblock);
5678 }
5679
5680 /* Handshake watchdog timeout handler */
5681 static void
vgen_hwatchdog(void * arg)5682 vgen_hwatchdog(void *arg)
5683 {
5684 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg;
5685 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5686
5687 DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n",
5688 ldcp->hphase, ldcp->hstate);
5689
5690 mutex_enter(&ldcp->cblock);
5691 if (ldcp->htid == 0) {
5692 /* Cancelled by vgen_process_reset() */
5693 mutex_exit(&ldcp->cblock);
5694 return;
5695 }
5696 ldcp->htid = 0;
5697 mutex_exit(&ldcp->cblock);
5698
5699 /*
5700 * Something is wrong; handshake with the peer seems to be hung. We now
5701 * go ahead and reset the channel to break out of this condition.
5702 */
5703 (void) vgen_ldc_reset(ldcp, VGEN_OTHER);
5704 }
5705
5706 /* Check if the session id in the received message is valid */
5707 static int
vgen_check_sid(vgen_ldc_t * ldcp,vio_msg_tag_t * tagp)5708 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp)
5709 {
5710 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5711
5712 if (tagp->vio_sid != ldcp->peer_sid) {
5713 DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n",
5714 ldcp->peer_sid, tagp->vio_sid);
5715 return (VGEN_FAILURE);
5716 }
5717 else
5718 return (VGEN_SUCCESS);
5719 }
5720
5721 /*
5722 * Initialize the common part of dring registration
5723 * message; used in both TxDring and RxDringData modes.
5724 */
5725 static void
vgen_init_dring_reg_msg(vgen_ldc_t * ldcp,vio_dring_reg_msg_t * msg,uint8_t option)5726 vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg,
5727 uint8_t option)
5728 {
5729 vio_msg_tag_t *tagp;
5730
5731 tagp = &msg->tag;
5732 tagp->vio_msgtype = VIO_TYPE_CTRL;
5733 tagp->vio_subtype = VIO_SUBTYPE_INFO;
5734 tagp->vio_subtype_env = VIO_DRING_REG;
5735 tagp->vio_sid = ldcp->local_sid;
5736
5737 /* get dring info msg payload from ldcp->local */
5738 bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]),
5739 sizeof (ldc_mem_cookie_t));
5740 msg->ncookies = ldcp->local_hparams.dring_ncookies;
5741 msg->num_descriptors = ldcp->local_hparams.num_desc;
5742 msg->descriptor_size = ldcp->local_hparams.desc_size;
5743
5744 msg->options = option;
5745
5746 /*
5747 * dring_ident is set to 0. After mapping the dring, peer sets this
5748 * value and sends it in the ack, which is saved in
5749 * vgen_handle_dring_reg().
5750 */
5751 msg->dring_ident = 0;
5752 }
5753
5754 static int
vgen_mapin_avail(vgen_ldc_t * ldcp)5755 vgen_mapin_avail(vgen_ldc_t *ldcp)
5756 {
5757 int rv;
5758 ldc_info_t info;
5759 uint64_t mapin_sz_req;
5760 uint64_t dblk_sz;
5761 vgen_t *vgenp = LDC_TO_VGEN(ldcp);
5762
5763 rv = ldc_info(ldcp->ldc_handle, &info);
5764 if (rv != 0) {
5765 return (B_FALSE);
5766 }
5767
5768 dblk_sz = RXDRING_DBLK_SZ(vgenp->max_frame_size);
5769 mapin_sz_req = (VGEN_RXDRING_NRBUFS * dblk_sz);
5770
5771 if (info.direct_map_size_max >= mapin_sz_req) {
5772 return (B_TRUE);
5773 }
5774
5775 return (B_FALSE);
5776 }
5777
5778 #if DEBUG
5779
5780 /*
5781 * Print debug messages - set to 0xf to enable all msgs
5782 */
5783 void
vgen_debug_printf(const char * fname,vgen_t * vgenp,vgen_ldc_t * ldcp,const char * fmt,...)5784 vgen_debug_printf(const char *fname, vgen_t *vgenp,
5785 vgen_ldc_t *ldcp, const char *fmt, ...)
5786 {
5787 char buf[256];
5788 char *bufp = buf;
5789 va_list ap;
5790
5791 if ((vgenp != NULL) && (vgenp->vnetp != NULL)) {
5792 (void) sprintf(bufp, "vnet%d:",
5793 ((vnet_t *)(vgenp->vnetp))->instance);
5794 bufp += strlen(bufp);
5795 }
5796 if (ldcp != NULL) {
5797 (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id);
5798 bufp += strlen(bufp);
5799 }
5800 (void) sprintf(bufp, "%s: ", fname);
5801 bufp += strlen(bufp);
5802
5803 va_start(ap, fmt);
5804 (void) vsprintf(bufp, fmt, ap);
5805 va_end(ap);
5806
5807 if ((ldcp == NULL) ||(vgendbg_ldcid == -1) ||
5808 (vgendbg_ldcid == ldcp->ldc_id)) {
5809 cmn_err(CE_CONT, "%s\n", buf);
5810 }
5811 }
5812 #endif
5813
5814 #ifdef VNET_IOC_DEBUG
5815
5816 static void
vgen_ioctl(void * arg,queue_t * q,mblk_t * mp)5817 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5818 {
5819 struct iocblk *iocp;
5820 vgen_port_t *portp;
5821 enum ioc_reply {
5822 IOC_INVAL = -1, /* bad, NAK with EINVAL */
5823 IOC_ACK /* OK, just send ACK */
5824 } status;
5825 int rv;
5826
5827 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr;
5828 iocp->ioc_error = 0;
5829 portp = (vgen_port_t *)arg;
5830
5831 if (portp == NULL) {
5832 status = IOC_INVAL;
5833 goto vgen_ioc_exit;
5834 }
5835
5836 mutex_enter(&portp->lock);
5837
5838 switch (iocp->ioc_cmd) {
5839
5840 case VNET_FORCE_LINK_DOWN:
5841 case VNET_FORCE_LINK_UP:
5842 rv = vgen_force_link_state(portp, iocp->ioc_cmd);
5843 (rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL);
5844 break;
5845
5846 default:
5847 status = IOC_INVAL;
5848 break;
5849
5850 }
5851
5852 mutex_exit(&portp->lock);
5853
5854 vgen_ioc_exit:
5855
5856 switch (status) {
5857 default:
5858 case IOC_INVAL:
5859 /* Error, reply with a NAK and EINVAL error */
5860 miocnak(q, mp, 0, EINVAL);
5861 break;
5862 case IOC_ACK:
5863 /* OK, reply with an ACK */
5864 miocack(q, mp, 0, 0);
5865 break;
5866 }
5867 }
5868
5869 static int
vgen_force_link_state(vgen_port_t * portp,int cmd)5870 vgen_force_link_state(vgen_port_t *portp, int cmd)
5871 {
5872 ldc_status_t istatus;
5873 int rv;
5874 vgen_ldc_t *ldcp = portp->ldcp;
5875 vgen_t *vgenp = portp->vgenp;
5876
5877 mutex_enter(&ldcp->cblock);
5878
5879 switch (cmd) {
5880
5881 case VNET_FORCE_LINK_DOWN:
5882 (void) ldc_down(ldcp->ldc_handle);
5883 ldcp->link_down_forced = B_TRUE;
5884 break;
5885
5886 case VNET_FORCE_LINK_UP:
5887 vgen_ldc_up(ldcp);
5888 ldcp->link_down_forced = B_FALSE;
5889
5890 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
5891 DWARN(vgenp, ldcp, "ldc_status err\n");
5892 } else {
5893 ldcp->ldc_status = istatus;
5894 }
5895
5896 /* if channel is already UP - restart handshake */
5897 if (ldcp->ldc_status == LDC_UP) {
5898 vgen_handle_evt_up(ldcp);
5899 }
5900 break;
5901
5902 }
5903
5904 mutex_exit(&ldcp->cblock);
5905
5906 return (0);
5907 }
5908
5909 #else
5910
5911 static void
vgen_ioctl(void * arg,queue_t * q,mblk_t * mp)5912 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp)
5913 {
5914 vgen_port_t *portp;
5915
5916 portp = (vgen_port_t *)arg;
5917
5918 if (portp == NULL) {
5919 miocnak(q, mp, 0, EINVAL);
5920 return;
5921 }
5922
5923 miocnak(q, mp, 0, ENOTSUP);
5924 }
5925
5926 #endif
5927