xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib.c (revision 2bbdd445a21f9d61f4a0ca0faf05d5ceb2bd91f3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <inet/ip.h>
34 #include <sys/ib/clients/rds/rdsib_ib.h>
35 #include <sys/ib/clients/rds/rdsib_buf.h>
36 #include <sys/ib/clients/rds/rdsib_cm.h>
37 #include <sys/ib/clients/rds/rdsib_protocol.h>
38 #include <sys/ib/clients/rds/rds_transport.h>
39 #include <sys/ib/clients/rds/rds_kstat.h>
40 
41 /*
42  * Global Configuration Variables
43  * As defined in RDS proposal
44  */
45 uint_t		MaxNodes		= RDS_MAX_NODES;
46 uint_t		RdsPktSize;
47 uint_t		NDataRX;
48 uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
49 uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
50 uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
51 uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
52 uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
53 uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
54 uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
55 uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
56 uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
57 uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
58 
59 extern int rdsib_open_ib();
60 extern void rdsib_close_ib();
61 extern void rds_resume_port(in_port_t port);
62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
63     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
64 extern boolean_t rds_if_lookup_by_name(char *devname);
65 
66 rds_transport_ops_t rds_ib_transport_ops = {
67 	rdsib_open_ib,
68 	rdsib_close_ib,
69 	rds_sendmsg,
70 	rds_resume_port,
71 	rds_if_lookup_by_name
72 };
73 
74 /* global */
75 rds_state_t	*rdsib_statep = NULL;
76 krwlock_t	rds_loopback_portmap_lock;
77 uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
78 ddi_taskq_t	*rds_taskq = NULL;
79 dev_info_t	*rdsib_dev_info = NULL;
80 uint_t		rds_rx_pkts_pending_hwm;
81 
82 #ifdef DEBUG
83 uint32_t	rdsdbglvl = RDS_LOG_L3;
84 #else
85 uint32_t	rdsdbglvl = RDS_LOG_L2;
86 #endif
87 
88 #define		RDS_NUM_TASKQ_THREADS	4
89 
90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
93     void **result);
94 static void rds_read_config_values(dev_info_t *dip);
95 
96 /* Driver entry points */
97 static struct cb_ops	rdsib_cb_ops = {
98 	nulldev,		/* open */
99 	nulldev,		/* close */
100 	nodev,			/* strategy */
101 	nodev,			/* print */
102 	nodev,			/* dump */
103 	nodev,			/* read */
104 	nodev,			/* write */
105 	nodev,			/* ioctl */
106 	nodev,			/* devmap */
107 	nodev,			/* mmap */
108 	nodev,			/* segmap */
109 	nochpoll,		/* poll */
110 	ddi_prop_op,		/* prop_op */
111 	NULL,			/* stream */
112 	D_MP,			/* cb_flag */
113 	CB_REV,			/* rev */
114 	nodev,			/* int (*cb_aread)() */
115 	nodev,			/* int (*cb_awrite)() */
116 };
117 
118 /* Device options */
119 static struct dev_ops rdsib_ops = {
120 	DEVO_REV,		/* devo_rev, */
121 	0,			/* refcnt  */
122 	rdsib_info,		/* info */
123 	nulldev,		/* identify */
124 	nulldev,		/* probe */
125 	rdsib_attach,		/* attach */
126 	rdsib_detach,		/* detach */
127 	nodev,			/* reset */
128 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
129 	NULL,			/* bus operations */
130 	NULL,			/* power */
131 	ddi_quiesce_not_needed,	/* devo_quiesce */
132 };
133 
134 /*
135  * Module linkage information.
136  */
137 #define	RDS_DEVDESC	"RDS IB driver"
138 static struct modldrv rdsib_modldrv = {
139 	&mod_driverops,		/* Driver module */
140 	RDS_DEVDESC,		/* Driver name and version */
141 	&rdsib_ops,		/* Driver ops */
142 };
143 
144 static struct modlinkage rdsib_modlinkage = {
145 	MODREV_1,
146 	(void *)&rdsib_modldrv,
147 	NULL
148 };
149 
150 /* Called from _init */
151 int
152 rdsib_init()
153 {
154 	/* RDS supports only one instance */
155 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
156 
157 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
158 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
159 
160 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
161 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
162 
163 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
164 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
165 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
166 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
167 
168 	/* Initialize logging */
169 	rds_logging_initialization();
170 
171 	RDS_SET_NPORT(1); /* this should never be 0 */
172 
173 	ASSERT(rds_transport_ops == NULL);
174 	rds_transport_ops = &rds_ib_transport_ops;
175 
176 	return (0);
177 }
178 
179 /* Called from _fini */
180 void
181 rdsib_fini()
182 {
183 	/* Stop logging */
184 	rds_logging_destroy();
185 
186 	cv_destroy(&rds_dpool.pool_cv);
187 	mutex_destroy(&rds_dpool.pool_lock);
188 	cv_destroy(&rds_cpool.pool_cv);
189 	mutex_destroy(&rds_cpool.pool_lock);
190 
191 	rw_destroy(&rds_loopback_portmap_lock);
192 
193 	rw_destroy(&rdsib_statep->rds_hca_lock);
194 	rw_destroy(&rdsib_statep->rds_sessionlock);
195 	kmem_free(rdsib_statep, sizeof (rds_state_t));
196 
197 	rds_transport_ops = NULL;
198 }
199 
200 int
201 _init(void)
202 {
203 	int	ret;
204 
205 	if (ibt_hw_is_present() == 0) {
206 		return (ENODEV);
207 	}
208 
209 	ret = rdsib_init();
210 	if (ret != 0) {
211 		return (ret);
212 	}
213 
214 	ret = mod_install(&rdsib_modlinkage);
215 	if (ret != 0) {
216 		/*
217 		 * Could not load module
218 		 */
219 		rdsib_fini();
220 		return (ret);
221 	}
222 
223 	return (0);
224 }
225 
226 int
227 _fini()
228 {
229 	int	ret;
230 
231 	/*
232 	 * Remove module
233 	 */
234 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
235 		return (ret);
236 	}
237 
238 	rdsib_fini();
239 
240 	return (0);
241 }
242 
243 int
244 _info(struct modinfo *modinfop)
245 {
246 	return (mod_info(&rdsib_modlinkage, modinfop));
247 }
248 
249 static int
250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
251 {
252 	int	ret;
253 
254 	RDS_DPRINTF2("rdsib_attach", "enter");
255 
256 	if (cmd != DDI_ATTACH)
257 		return (DDI_FAILURE);
258 
259 	if (rdsib_dev_info != NULL) {
260 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
261 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
262 		return (DDI_FAILURE);
263 	}
264 
265 	rdsib_dev_info = dip;
266 	rds_read_config_values(dip);
267 
268 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
269 	    TASKQ_DEFAULTPRI, 0);
270 	if (rds_taskq == NULL) {
271 		RDS_DPRINTF1("rdsib_attach",
272 		    "ddi_taskq_create failed for rds_taskq");
273 		rdsib_dev_info = NULL;
274 		return (DDI_FAILURE);
275 	}
276 
277 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
278 	if (ret != DDI_SUCCESS) {
279 		RDS_DPRINTF1("rdsib_attach",
280 		    "ddi_create_minor_node failed: %d", ret);
281 		ddi_taskq_destroy(rds_taskq);
282 		rds_taskq = NULL;
283 		rdsib_dev_info = NULL;
284 		return (DDI_FAILURE);
285 	}
286 
287 	/* Max number of receive buffers on the system */
288 	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
289 
290 	/*
291 	 * High water mark for the receive buffers in the system. If the
292 	 * number of buffers used crosses this mark then all sockets in
293 	 * would be stalled. The port quota for the sockets is set based
294 	 * on this limit.
295 	 */
296 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
297 
298 	ret = rdsib_initialize_ib();
299 	if (ret != 0) {
300 		RDS_DPRINTF1("rdsib_attach",
301 		    "rdsib_initialize_ib failed: %d", ret);
302 		ddi_taskq_destroy(rds_taskq);
303 		rds_taskq = NULL;
304 		rdsib_dev_info = NULL;
305 		return (DDI_FAILURE);
306 	}
307 
308 	RDS_DPRINTF2("rdsib_attach", "return");
309 
310 	return (DDI_SUCCESS);
311 }
312 
313 static int
314 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
315 {
316 	RDS_DPRINTF2("rdsib_detach", "enter");
317 
318 	if (cmd != DDI_DETACH)
319 		return (DDI_FAILURE);
320 
321 	rdsib_deinitialize_ib();
322 
323 	ddi_remove_minor_node(dip, "rdsib");
324 
325 	/* destroy taskq */
326 	if (rds_taskq != NULL) {
327 		ddi_taskq_destroy(rds_taskq);
328 		rds_taskq = NULL;
329 	}
330 
331 	rdsib_dev_info = NULL;
332 
333 	RDS_DPRINTF2("rdsib_detach", "return");
334 
335 	return (DDI_SUCCESS);
336 }
337 
338 /* ARGSUSED */
339 static int
340 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
341 {
342 	int ret = DDI_FAILURE;
343 
344 	switch (cmd) {
345 	case DDI_INFO_DEVT2DEVINFO:
346 		if (rdsib_dev_info != NULL) {
347 			*result = (void *)rdsib_dev_info;
348 			ret = DDI_SUCCESS;
349 		}
350 		break;
351 
352 	case DDI_INFO_DEVT2INSTANCE:
353 		*result = NULL;
354 		ret = DDI_SUCCESS;
355 		break;
356 
357 	default:
358 		break;
359 	}
360 
361 	return (ret);
362 }
363 
364 static void
365 rds_read_config_values(dev_info_t *dip)
366 {
367 	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
368 	    "MaxNodes", RDS_MAX_NODES);
369 
370 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
371 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
372 
373 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
374 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
375 
376 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
377 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
378 
379 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
380 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
381 
382 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
383 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
384 
385 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
386 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
387 
388 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
389 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
390 
391 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
392 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
393 
394 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
395 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
396 
397 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
398 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
399 
400 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
401 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
402 
403 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
404 	    "rdsdbglvl", RDS_LOG_L2);
405 
406 	if (MaxNodes < 2) {
407 		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
408 		MaxNodes = 2;
409 	}
410 }
411