xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib.c (revision d99cb22f7f0de8584336bda08cb86c562ffbab55)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/modctl.h>
34 #include <inet/ip.h>
35 #include <sys/ib/clients/rds/rdsib_ib.h>
36 #include <sys/ib/clients/rds/rdsib_buf.h>
37 #include <sys/ib/clients/rds/rdsib_cm.h>
38 #include <sys/ib/clients/rds/rdsib_protocol.h>
39 #include <sys/ib/clients/rds/rds_transport.h>
40 #include <sys/ib/clients/rds/rds_kstat.h>
41 
42 /*
43  * Global Configuration Variables
44  * As defined in RDS proposal
45  */
46 uint_t		MaxNodes		= RDS_MAX_NODES;
47 uint_t		RdsPktSize;
48 uint_t		NDataRX;
49 uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
50 uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
51 uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
52 uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
53 uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
54 uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
55 uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
56 uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
57 uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
58 uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
59 
60 extern int rdsib_open_ib();
61 extern void rdsib_close_ib();
62 extern void rds_resume_port(in_port_t port);
63 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
64     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
65 extern boolean_t rds_if_lookup_by_name(char *devname);
66 
67 rds_transport_ops_t rds_ib_transport_ops = {
68 	rdsib_open_ib,
69 	rdsib_close_ib,
70 	rds_sendmsg,
71 	rds_resume_port,
72 	rds_if_lookup_by_name
73 };
74 
75 /* global */
76 rds_state_t	*rdsib_statep = NULL;
77 krwlock_t	rds_loopback_portmap_lock;
78 uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
79 ddi_taskq_t	*rds_taskq = NULL;
80 dev_info_t	*rdsib_dev_info = NULL;
81 uint_t		rds_rx_pkts_pending_hwm;
82 
83 #ifdef DEBUG
84 uint32_t	rdsdbglvl = RDS_LOG_L3;
85 #else
86 uint32_t	rdsdbglvl = RDS_LOG_L2;
87 #endif
88 
89 #define		RDS_NUM_TASKQ_THREADS	4
90 
91 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
92 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
93 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
94     void **result);
95 static void rds_read_config_values(dev_info_t *dip);
96 
97 /* Driver entry points */
98 static struct cb_ops	rdsib_cb_ops = {
99 	nulldev,		/* open */
100 	nulldev,		/* close */
101 	nodev,			/* strategy */
102 	nodev,			/* print */
103 	nodev,			/* dump */
104 	nodev,			/* read */
105 	nodev,			/* write */
106 	nodev,			/* ioctl */
107 	nodev,			/* devmap */
108 	nodev,			/* mmap */
109 	nodev,			/* segmap */
110 	nochpoll,		/* poll */
111 	ddi_prop_op,		/* prop_op */
112 	NULL,			/* stream */
113 	D_MP,			/* cb_flag */
114 	CB_REV,			/* rev */
115 	nodev,			/* int (*cb_aread)() */
116 	nodev,			/* int (*cb_awrite)() */
117 };
118 
119 /* Device options */
120 static struct dev_ops rdsib_ops = {
121 	DEVO_REV,		/* devo_rev, */
122 	0,			/* refcnt  */
123 	rdsib_info,		/* info */
124 	nulldev,		/* identify */
125 	nulldev,		/* probe */
126 	rdsib_attach,		/* attach */
127 	rdsib_detach,		/* detach */
128 	nodev,			/* reset */
129 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
130 	NULL,			/* bus operations */
131 	NULL			/* power */
132 };
133 
134 /*
135  * Module linkage information.
136  */
137 #define	RDS_DEVDESC	"RDS IB driver %I%"
138 static struct modldrv rdsib_modldrv = {
139 	&mod_driverops,		/* Driver module */
140 	RDS_DEVDESC,		/* Driver name and version */
141 	&rdsib_ops,		/* Driver ops */
142 };
143 
144 static struct modlinkage rdsib_modlinkage = {
145 	MODREV_1,
146 	(void *)&rdsib_modldrv,
147 	NULL
148 };
149 
150 /* Called from _init */
151 int
152 rdsib_init()
153 {
154 	/* RDS supports only one instance */
155 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
156 
157 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
158 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
159 
160 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
161 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
162 
163 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
164 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
165 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
166 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
167 
168 	/* Initialize logging */
169 	rds_logging_initialization();
170 
171 	RDS_SET_NPORT(1); /* this should never be 0 */
172 
173 	ASSERT(rds_transport_ops == NULL);
174 	rds_transport_ops = &rds_ib_transport_ops;
175 
176 	return (0);
177 }
178 
179 /* Called from _fini */
180 void
181 rdsib_fini()
182 {
183 	/* Stop logging */
184 	rds_logging_destroy();
185 
186 	cv_destroy(&rds_dpool.pool_cv);
187 	mutex_destroy(&rds_dpool.pool_lock);
188 	cv_destroy(&rds_cpool.pool_cv);
189 	mutex_destroy(&rds_cpool.pool_lock);
190 
191 	rw_destroy(&rds_loopback_portmap_lock);
192 
193 	rw_destroy(&rdsib_statep->rds_hca_lock);
194 	rw_destroy(&rdsib_statep->rds_sessionlock);
195 	kmem_free(rdsib_statep, sizeof (rds_state_t));
196 
197 	rds_transport_ops = NULL;
198 }
199 
200 int
201 _init(void)
202 {
203 	int	ret;
204 
205 	if (ibt_hw_is_present() == 0) {
206 		return (ENODEV);
207 	}
208 
209 	ret = rdsib_init();
210 	if (ret != 0) {
211 		return (ret);
212 	}
213 
214 	ret = mod_install(&rdsib_modlinkage);
215 	if (ret != 0) {
216 		/*
217 		 * Could not load module
218 		 */
219 		rdsib_fini();
220 		return (ret);
221 	}
222 
223 	return (0);
224 }
225 
226 int
227 _fini()
228 {
229 	int	ret;
230 
231 	/*
232 	 * Remove module
233 	 */
234 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
235 		return (ret);
236 	}
237 
238 	rdsib_fini();
239 
240 	return (0);
241 }
242 
243 int
244 _info(struct modinfo *modinfop)
245 {
246 	return (mod_info(&rdsib_modlinkage, modinfop));
247 }
248 
249 static int
250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
251 {
252 	int	ret;
253 
254 	RDS_DPRINTF2("rdsib_attach", "enter");
255 
256 	if (cmd != DDI_ATTACH)
257 		return (DDI_FAILURE);
258 
259 	if (rdsib_dev_info != NULL) {
260 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
261 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
262 		return (DDI_FAILURE);
263 	}
264 
265 	rdsib_dev_info = dip;
266 	rds_read_config_values(dip);
267 
268 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
269 	    TASKQ_DEFAULTPRI, 0);
270 	if (rds_taskq == NULL) {
271 		RDS_DPRINTF1(LABEL, "ddi_taskq_create failed for rds_taskq");
272 		rdsib_dev_info = NULL;
273 		return (DDI_FAILURE);
274 	}
275 
276 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
277 	if (ret != DDI_SUCCESS) {
278 		cmn_err(CE_CONT, "ddi_create_minor_node failed: %d", ret);
279 		ddi_taskq_destroy(rds_taskq);
280 		rds_taskq = NULL;
281 		rdsib_dev_info = NULL;
282 		return (DDI_FAILURE);
283 	}
284 
285 	/* Max number of receive buffers on the system */
286 	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
287 
288 	/*
289 	 * High water mark for the receive buffers in the system. If the
290 	 * number of buffers used crosses this mark then all sockets in
291 	 * would be stalled. The port quota for the sockets is set based
292 	 * on this limit.
293 	 */
294 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
295 
296 	ret = rdsib_initialize_ib();
297 	if (ret != 0) {
298 		cmn_err(CE_CONT, "rdsib_initialize_ib failed: %d", ret);
299 		ddi_taskq_destroy(rds_taskq);
300 		rds_taskq = NULL;
301 		rdsib_dev_info = NULL;
302 		return (DDI_FAILURE);
303 	}
304 
305 	RDS_DPRINTF2("rdsib_attach", "return");
306 
307 	return (DDI_SUCCESS);
308 }
309 
310 static int
311 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
312 {
313 	RDS_DPRINTF2("rdsib_detach", "enter");
314 
315 	if (cmd != DDI_DETACH)
316 		return (DDI_FAILURE);
317 
318 	rdsib_deinitialize_ib();
319 
320 	ddi_remove_minor_node(dip, "rdsib");
321 
322 	/* destroy taskq */
323 	if (rds_taskq != NULL) {
324 		ddi_taskq_destroy(rds_taskq);
325 		rds_taskq = NULL;
326 	}
327 
328 	rdsib_dev_info = NULL;
329 
330 	RDS_DPRINTF2("rdsib_detach", "return");
331 
332 	return (DDI_SUCCESS);
333 }
334 
335 /* ARGSUSED */
336 static int
337 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
338 {
339 	int ret = DDI_FAILURE;
340 
341 	switch (cmd) {
342 	case DDI_INFO_DEVT2DEVINFO:
343 		if (rdsib_dev_info != NULL) {
344 			*result = (void *)rdsib_dev_info;
345 			ret = DDI_SUCCESS;
346 		}
347 		break;
348 
349 	case DDI_INFO_DEVT2INSTANCE:
350 		*result = NULL;
351 		ret = DDI_SUCCESS;
352 		break;
353 
354 	default:
355 		break;
356 	}
357 
358 	return (ret);
359 }
360 
361 static void
362 rds_read_config_values(dev_info_t *dip)
363 {
364 	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
365 	    "MaxNodes", RDS_MAX_NODES);
366 
367 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
368 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
369 
370 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
371 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
372 
373 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
374 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
375 
376 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
377 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
378 
379 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
380 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
381 
382 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
383 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
384 
385 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
386 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
387 
388 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
389 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
390 
391 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
392 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
393 
394 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
395 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
396 
397 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
398 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
399 
400 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
401 	    "rdsdbglvl", RDS_LOG_L2);
402 
403 	if (MaxNodes < 2) {
404 		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
405 		MaxNodes = 2;
406 	}
407 }
408