xref: /titanic_50/usr/src/uts/common/sys/ib/clients/eoib/enx_impl.h (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #ifndef _SYS_IB_EOIB_ENX_IMPL_H
27 #define	_SYS_IB_EOIB_ENX_IMPL_H
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 #include <sys/ddi.h>
34 #include <sys/sunddi.h>
35 #include <sys/varargs.h>
36 #include <sys/ib/ibtl/ibti.h>
37 #include <sys/ib/ibtl/ibvti.h>
38 #include <sys/ib/ib_pkt_hdrs.h>
39 #include <sys/ib/ibtl/impl/ibtl_ibnex.h>
40 #include <sys/ib/mgt/sm_attr.h>
41 
42 #include <sys/ib/clients/eoib/fip.h>
43 #include <sys/ib/clients/eoib/eib.h>
44 
45 /*
46  * Driver specific constants
47  */
48 #define	ENX_E_SUCCESS		0
49 #define	ENX_E_FAILURE		-1
50 #define	ENX_MAX_LINE		128
51 #define	ENX_GRH_SZ		(sizeof (ib_grh_t))
52 
53 /*
54  * Debug messages
55  */
56 #define	ENX_MSGS_CRIT		0x01
57 #define	ENX_MSGS_ERR		0x02
58 #define	ENX_MSGS_WARN		0x04
59 #define	ENX_MSGS_DEBUG		0x08
60 #define	ENX_MSGS_ARGS		0x10
61 #define	ENX_MSGS_VERBOSE	0x20
62 #define	ENX_MSGS_DEFAULT	(ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN)
63 
64 #define	ENX_LOGSZ_DEFAULT	0x20000
65 
66 #define	ENX_DPRINTF_CRIT	eibnx_dprintf_crit
67 #define	ENX_DPRINTF_ERR		eibnx_dprintf_err
68 #define	ENX_DPRINTF_WARN	eibnx_dprintf_warn
69 #ifdef ENX_DEBUG
70 #define	ENX_DPRINTF_DEBUG	eibnx_dprintf_debug
71 #define	ENX_DPRINTF_ARGS	eibnx_dprintf_args
72 #define	ENX_DPRINTF_VERBOSE	eibnx_dprintf_verbose
73 #else
74 #define	ENX_DPRINTF_DEBUG	0 &&
75 #define	ENX_DPRINTF_ARGS	0 &&
76 #define	ENX_DPRINTF_VERBOSE	0 &&
77 #endif
78 
79 /*
80  *  EoIB Nexus service threads
81  */
82 #define	ENX_PORT_MONITOR	"eibnx_port_%d_monitor"
83 #define	ENX_NODE_CREATOR	"eibnx_node_creator"
84 
85 /*
86  * Default period (us) for unicast solicitations to discovered gateways.
87  * EoIB specification requires that hosts send solicitation atleast every
88  * 4 * GW_ADV_PERIOD.
89  */
90 #define	ENX_DFL_SOLICIT_PERIOD_USEC	32000000
91 
92 /*
93  * Portinfo list per HCA
94  */
95 typedef struct eibnx_port_s {
96 	struct eibnx_port_s 	*po_next;
97 	ibt_hca_portinfo_t	*po_pi;
98 	uint_t			po_pi_size;
99 } eibnx_port_t;
100 
101 /*
102  * HCA details
103  */
104 typedef struct eibnx_hca_s {
105 	struct eibnx_hca_s 	*hc_next;
106 	ib_guid_t		hc_guid;
107 	ibt_hca_hdl_t		hc_hdl;
108 	ibt_pd_hdl_t		hc_pd;
109 	eibnx_port_t		*hc_port;
110 } eibnx_hca_t;
111 
112 /*
113  * The port_monitor thread in EoIB nexus driver only sends two types of
114  * packets: multicast solicitation the first time around, and periodic
115  * unicast solicitations later to gateways that have been discovered. So
116  * we need a couple of send wqes for the multicast solicitation and
117  * probably as many send wqes as the number of gateways that may be
118  * discovered from each port, for sending the unicast solicitations.
119  * For unicast solicitations though, the UD destination needs to be set
120  * up at the time we receive the advertisement from the gateway, using
121  * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each
122  * gateway that we discover.  This means that we need to acquire these
123  * send wqe entries during rx processing in the completion handler, which
124  * means we must avoid sleeping in trying to acquire the swqe. Therefore,
125  * we'll pre-allocate these unicast solication send wqes to be atleast
126  * twice the number of recv wqes.
127  *
128  * The receive packets expected by the EoIB nexus driver are the multicast
129  * and unicast messages on the SOLICIT and ADVERTISE groups. These
130  * shouldn't be too many, and should be tuned as we gain experience on
131  * the traffic pattern.  We'll start with 16.
132  */
133 #define	ENX_NUM_SWQE			46
134 #define	ENX_NUM_RWQE			16
135 #define	ENX_CQ_SIZE			(ENX_NUM_SWQE + ENX_NUM_RWQE + 2)
136 
137 /*
138  * qe_type values
139  */
140 #define	ENX_QETYP_RWQE			0x1
141 #define	ENX_QETYP_SWQE			0x2
142 
143 /*
144  * qe_flags bitmasks (protected by qe_lock). None of the
145  * flag values may be zero.
146  */
147 #define	ENX_QEFL_INUSE			0x01
148 #define	ENX_QEFL_POSTED			0x02
149 #define	ENX_QEFL_RELONCOMP		0x04
150 
151 /*
152  * Recv and send workq entries
153  */
154 typedef struct eibnx_wqe_s {
155 	uint_t			qe_type;
156 	uint_t			qe_bufsz;
157 	ibt_wr_ds_t		qe_sgl;
158 	ibt_all_wr_t		qe_wr;
159 	kmutex_t		qe_lock;
160 	uint_t			qe_flags;
161 } eibnx_wqe_t;
162 
163 /*
164  * Tx descriptor
165  */
166 typedef struct eibnx_tx_s {
167 	ib_vaddr_t		tx_vaddr;
168 	ibt_mr_hdl_t		tx_mr;
169 	ibt_lkey_t		tx_lkey;
170 	eibnx_wqe_t		tx_wqe[ENX_NUM_SWQE];
171 } eibnx_tx_t;
172 
173 /*
174  * Rx descriptor
175  */
176 typedef struct eibnx_rx_s {
177 	ib_vaddr_t		rx_vaddr;
178 	ibt_mr_hdl_t		rx_mr;
179 	ibt_lkey_t		rx_lkey;
180 	eibnx_wqe_t		rx_wqe[ENX_NUM_RWQE];
181 } eibnx_rx_t;
182 
183 /*
184  * Details about the address of each gateway we discover.
185  */
186 typedef struct eibnx_gw_addr_s {
187 	ibt_adds_vect_t		*ga_vect;
188 	ib_gid_t		ga_gid;
189 	ib_qpn_t		ga_qpn;
190 	ib_qkey_t		ga_qkey;
191 	ib_pkey_t		ga_pkey;
192 } eibnx_gw_addr_t;
193 
194 /*
195  * States for each GW
196  */
197 #define	ENX_GW_STATE_UNAVAILABLE	1	/* GW nackd availability */
198 #define	ENX_GW_STATE_AVAILABLE		2	/* GW mcasted availability */
199 #define	ENX_GW_STATE_READY_TO_LOGIN	3	/* GW ucasted availability */
200 
201 typedef struct eibnx_gw_info_s {
202 	struct eibnx_gw_info_s	*gw_next;
203 	eibnx_wqe_t		*gw_swqe;
204 	uint_t			gw_state;
205 
206 	kmutex_t		gw_adv_lock;
207 	uint_t			gw_adv_flag;
208 	int64_t			gw_adv_last_lbolt;
209 	int64_t			gw_adv_timeout_ticks;
210 
211 	eibnx_gw_addr_t		gw_addr;
212 
213 	ib_guid_t		gw_system_guid;
214 	ib_guid_t		gw_guid;
215 
216 	uint32_t		gw_adv_period;
217 	uint32_t		gw_ka_period;
218 	uint32_t		gw_vnic_ka_period;
219 	ib_qpn_t		gw_ctrl_qpn;
220 
221 	ib_lid_t		gw_lid;
222 	uint16_t		gw_portid;
223 	uint16_t		gw_num_net_vnics;
224 
225 	uint8_t			gw_is_host_adm_vnics;
226 	uint8_t			gw_sl;
227 	uint8_t			gw_n_rss_qpn;
228 	uint8_t			gw_flag_ucast_advt;
229 	uint8_t			gw_flag_available;
230 
231 	uint8_t			gw_system_name[EIB_GW_SYSNAME_LEN];
232 	uint8_t			gw_port_name[EIB_GW_PORTNAME_LEN];
233 	uint8_t			gw_vendor_id[EIB_GW_VENDOR_LEN];
234 } eibnx_gw_info_t;
235 
236 /*
237  * Values for gw_adv_flag (non-zero only)
238  */
239 #define	ENX_GW_DEAD		1
240 #define	ENX_GW_ALIVE		2
241 #define	ENX_GW_AWARE		3
242 
243 /*
244  * Currently, we only expect the advertisement type of packets
245  * from the gw. But we do get login acks from the gateway also
246  * here in the nexus, so we'll need an identifier for that.
247  */
248 typedef enum {
249 	FIP_GW_ADVERTISE_MCAST = 0,
250 	FIP_GW_ADVERTISE_UCAST,
251 	FIP_VNIC_LOGIN_ACK
252 } eibnx_gw_pkt_type_t;
253 
254 /*
255  * Currently, the only gw response handled by the eibnx driver
256  * are the ucast/mcast advertisements.  Information collected from
257  * both these responses may be packed into a eibnx_gw_info_t.
258  * In the future, if we decide to handle other types of responses
259  * from the gw, we could simply add the new types to the union.
260  */
261 typedef struct eibnx_gw_msg_s {
262 	eibnx_gw_pkt_type_t	gm_type;
263 	union {
264 		eibnx_gw_info_t	gm_info;
265 	} u;
266 } eibnx_gw_msg_t;
267 
268 /*
269  * List to hold the devinfo nodes of eoib instances
270  */
271 typedef struct eibnx_child_s {
272 	struct eibnx_child_s	*ch_next;
273 	dev_info_t		*ch_dip;
274 	eibnx_gw_info_t		*ch_gwi;
275 	char			*ch_node_name;
276 } eibnx_child_t;
277 
278 /*
279  * Event bitmasks for the port-monitor to wait on. None of these flags
280  * may be zero.
281  */
282 #define	ENX_EVENT_LINK_UP		0x01
283 #define	ENX_EVENT_MCGS_AVAILABLE	0x02
284 #define	ENX_EVENT_TIMED_OUT		0x04
285 #define	ENX_EVENT_DIE			0x08
286 #define	ENX_EVENT_COMPLETION		0x10
287 
288 /*
289  * MCG Query/Join status
290  */
291 #define	ENX_MCGS_FOUND			0x1
292 #define	ENX_MCGS_JOINED			0x2
293 
294 /*
295  * Information that each port-monitor thread cares about
296  */
297 typedef struct eibnx_thr_info_s {
298 	struct eibnx_thr_info_s	*ti_next;
299 	uint_t			ti_progress;
300 
301 	/*
302 	 * Our kernel thread id
303 	 */
304 	kt_did_t		ti_kt_did;
305 
306 	/*
307 	 * HCA, port and protection domain information
308 	 */
309 	ib_guid_t		ti_hca_guid;
310 	ibt_hca_hdl_t		ti_hca;
311 	ibt_pd_hdl_t		ti_pd;
312 	ibt_hca_portinfo_t	*ti_pi;
313 	char			*ti_ident;
314 
315 	/*
316 	 * Well-known multicast groups for solicitations
317 	 * and advertisements.
318 	 */
319 	kmutex_t		ti_mcg_lock;
320 	uint_t			ti_mcg_status;
321 	ibt_mcg_info_t		*ti_advertise_mcg;
322 	ibt_mcg_info_t		*ti_solicit_mcg;
323 	uint_t			ti_mcast_done;
324 
325 	/*
326 	 * Completion queue stuff
327 	 */
328 	ibt_cq_hdl_t		ti_cq_hdl;
329 	uint_t			ti_cq_sz;
330 	ibt_wc_t		*ti_wc;
331 	ddi_softint_handle_t    ti_softint_hdl;
332 
333 	/*
334 	 * Channel related
335 	 */
336 	ibt_channel_hdl_t	ti_chan;
337 	ib_qpn_t		ti_qpn;
338 
339 	/*
340 	 * Transmit/Receive stuff
341 	 */
342 	eibnx_tx_t		ti_snd;
343 	eibnx_rx_t		ti_rcv;
344 
345 	/*
346 	 * GW related stuff
347 	 */
348 	kmutex_t		ti_gw_lock;
349 	eibnx_gw_info_t		*ti_gw;
350 
351 	/*
352 	 * Devinfo nodes for the eoib children
353 	 */
354 	kmutex_t		ti_child_lock;
355 	eibnx_child_t		*ti_child;
356 
357 	/*
358 	 * Events that we wait on and/or handle
359 	 */
360 	kmutex_t		ti_event_lock;
361 	kcondvar_t		ti_event_cv;
362 	uint_t			ti_event;
363 } eibnx_thr_info_t;
364 
365 /*
366  * Workq entry for creation of eoib nodes
367  */
368 typedef struct eibnx_nodeq_s {
369 	struct eibnx_nodeq_s	*nc_next;
370 	eibnx_thr_info_t	*nc_info;
371 	eibnx_gw_info_t		*nc_gwi;
372 } eibnx_nodeq_t;
373 
374 /*
375  * Bus config status flags.  The in-prog is protected by
376  * nx_lock, and the rest of the flags (currently only
377  * buscfg-complete) is protected by the in-prog bit itself.
378  */
379 #define	NX_FL_BUSOP_INPROG		0x1
380 #define	NX_FL_BUSCFG_COMPLETE		0x2
381 #define	NX_FL_BUSOP_MASK		0x3
382 
383 /*
384  * EoIB nexus per-instance state
385  */
386 typedef struct eibnx_s {
387 	dev_info_t		*nx_dip;
388 	ibt_clnt_hdl_t		nx_ibt_hdl;
389 
390 	kmutex_t		nx_lock;
391 	eibnx_hca_t		*nx_hca;
392 	eibnx_thr_info_t	*nx_thr_info;
393 	boolean_t		nx_monitors_up;
394 
395 	kmutex_t		nx_nodeq_lock;
396 	kcondvar_t		nx_nodeq_cv;
397 	eibnx_nodeq_t		*nx_nodeq;
398 	kt_did_t		nx_nodeq_kt_did;
399 	uint_t			nx_nodeq_thr_die;
400 
401 	kmutex_t		nx_busop_lock;
402 	kcondvar_t		nx_busop_cv;
403 	uint_t			nx_busop_flags;
404 } eibnx_t;
405 
406 
407 /*
408  * Event tags for EoIB Nexus events delivered to EoIB instances
409  */
410 #define	ENX_EVENT_TAG_GW_INFO_UPDATE		0
411 #define	ENX_EVENT_TAG_GW_AVAILABLE		1
412 #define	ENX_EVENT_TAG_LOGIN_ACK			2
413 
414 /*
415  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
416  */
417 
418 /*
419  * Threads and Event Handlers
420  */
421 void eibnx_port_monitor(eibnx_thr_info_t *);
422 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
423     ibt_subnet_event_t *);
424 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
425     ibt_async_event_t *);
426 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *);
427 void eibnx_create_eoib_node(void);
428 void eibnx_comp_intr(ibt_cq_hdl_t, void *);
429 uint_t eibnx_comp_handler(caddr_t, caddr_t);
430 
431 /*
432  * IBT related functions
433  */
434 int eibnx_ibt_init(eibnx_t *);
435 int eibnx_find_mgroups(eibnx_thr_info_t *);
436 int eibnx_setup_cq(eibnx_thr_info_t *);
437 int eibnx_setup_ud_channel(eibnx_thr_info_t *);
438 int eibnx_setup_bufs(eibnx_thr_info_t *);
439 int eibnx_setup_cq_handler(eibnx_thr_info_t *);
440 int eibnx_join_mcgs(eibnx_thr_info_t *);
441 int eibnx_rejoin_mcgs(eibnx_thr_info_t *);
442 int eibnx_ibt_fini(eibnx_t *);
443 
444 void eibnx_rb_find_mgroups(eibnx_thr_info_t *);
445 void eibnx_rb_setup_cq(eibnx_thr_info_t *);
446 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *);
447 void eibnx_rb_setup_bufs(eibnx_thr_info_t *);
448 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *);
449 void eibnx_rb_join_mcgs(eibnx_thr_info_t *);
450 
451 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t);
452 int eibnx_cleanup_hca(eibnx_hca_t *);
453 
454 /*
455  * FIP packetizing related functions
456  */
457 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *);
458 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *);
459 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *);
460 
461 /*
462  * Queue and List related routines
463  */
464 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int);
465 void eibnx_return_swqe(eibnx_wqe_t *);
466 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *);
467 void eibnx_release_swqe(eibnx_wqe_t *);
468 
469 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *,
470     dev_info_t *);
471 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *);
472 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int);
473 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t);
474 
475 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *);
476 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
477     ibt_wc_t *, uint8_t *);
478 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
479     eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *);
480 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *);
481 
482 /*
483  * Logging and Error reporting routines
484  */
485 void eibnx_debug_init(void);
486 void eibnx_debug_fini(void);
487 void eibnx_dprintf_crit(const char *fmt, ...);
488 void eibnx_dprintf_err(const char *fmt, ...);
489 void eibnx_dprintf_warn(const char *fmt, ...);
490 #ifdef ENX_DEBUG
491 void eibnx_dprintf_debug(const char *fmt, ...);
492 void eibnx_dprintf_args(const char *fmt, ...);
493 void eibnx_dprintf_verbose(const char *fmt, ...);
494 #endif
495 
496 /*
497  * Miscellaneous
498  */
499 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *);
500 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *,
501     eibnx_gw_info_t *);
502 int eibnx_name_child(dev_info_t *, char *, size_t);
503 void eibnx_busop_inprog_enter(eibnx_t *);
504 void eibnx_busop_inprog_exit(eibnx_t *);
505 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *);
506 void eibnx_stop_port_monitor(eibnx_thr_info_t *);
507 void eibnx_terminate_monitors(void);
508 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **);
509 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *);
510 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **);
511 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **);
512 
513 /*
514  * Devctl cbops (currently dummy)
515  */
516 int eibnx_devctl_open(dev_t *, int, int, cred_t *);
517 int eibnx_devctl_close(dev_t, int, int, cred_t *);
518 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
519 
520 /*
521  * External variable references
522  */
523 extern pri_t minclsyspri;
524 extern eibnx_t *enx_global_ss;
525 extern ib_gid_t enx_solicit_mgid;
526 extern ib_gid_t enx_advertise_mgid;
527 
528 #ifdef __cplusplus
529 }
530 #endif
531 
532 #endif	/* _SYS_IB_EOIB_ENX_IMPL_H */
533