xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib.c (revision d4039345c8fe6e54a31d17d91e86e393fdcf401b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <inet/ip.h>
34 #include <sys/ib/clients/rds/rdsib_ib.h>
35 #include <sys/ib/clients/rds/rdsib_buf.h>
36 #include <sys/ib/clients/rds/rdsib_cm.h>
37 #include <sys/ib/clients/rds/rdsib_protocol.h>
38 #include <sys/ib/clients/rds/rds_transport.h>
39 #include <sys/ib/clients/rds/rds_kstat.h>
40 
41 /*
42  * Global Configuration Variables
43  * As defined in RDS proposal
44  */
45 uint_t		MaxNodes		= RDS_MAX_NODES;
46 uint_t		RdsPktSize;
47 uint_t		NDataRX;
48 uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
49 uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
50 uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
51 uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
52 uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
53 uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
54 uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
55 uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
56 uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
57 uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
58 
59 extern int rdsib_open_ib();
60 extern void rdsib_close_ib();
61 extern void rds_resume_port(in_port_t port);
62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
63     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
64 extern boolean_t rds_if_lookup_by_name(char *devname);
65 
66 rds_transport_ops_t rds_ib_transport_ops = {
67 	rdsib_open_ib,
68 	rdsib_close_ib,
69 	rds_sendmsg,
70 	rds_resume_port,
71 	rds_if_lookup_by_name
72 };
73 
74 /* Global pools of buffers */
75 rds_bufpool_t	rds_dpool; /* data pool */
76 rds_bufpool_t	rds_cpool; /* ctrl pool */
77 
78 /* global */
79 rds_state_t	*rdsib_statep = NULL;
80 krwlock_t	rds_loopback_portmap_lock;
81 uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
82 ddi_taskq_t	*rds_taskq = NULL;
83 dev_info_t	*rdsib_dev_info = NULL;
84 uint_t		rds_rx_pkts_pending_hwm;
85 
86 #ifdef DEBUG
87 uint32_t	rdsdbglvl = RDS_LOG_L3;
88 #else
89 uint32_t	rdsdbglvl = RDS_LOG_L2;
90 #endif
91 
92 #define		RDS_NUM_TASKQ_THREADS	4
93 
94 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
95 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
96 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
97     void **result);
98 static void rds_read_config_values(dev_info_t *dip);
99 
100 /* Driver entry points */
101 static struct cb_ops	rdsib_cb_ops = {
102 	nulldev,		/* open */
103 	nulldev,		/* close */
104 	nodev,			/* strategy */
105 	nodev,			/* print */
106 	nodev,			/* dump */
107 	nodev,			/* read */
108 	nodev,			/* write */
109 	nodev,			/* ioctl */
110 	nodev,			/* devmap */
111 	nodev,			/* mmap */
112 	nodev,			/* segmap */
113 	nochpoll,		/* poll */
114 	ddi_prop_op,		/* prop_op */
115 	NULL,			/* stream */
116 	D_MP,			/* cb_flag */
117 	CB_REV,			/* rev */
118 	nodev,			/* int (*cb_aread)() */
119 	nodev,			/* int (*cb_awrite)() */
120 };
121 
122 /* Device options */
123 static struct dev_ops rdsib_ops = {
124 	DEVO_REV,		/* devo_rev, */
125 	0,			/* refcnt  */
126 	rdsib_info,		/* info */
127 	nulldev,		/* identify */
128 	nulldev,		/* probe */
129 	rdsib_attach,		/* attach */
130 	rdsib_detach,		/* detach */
131 	nodev,			/* reset */
132 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
133 	NULL,			/* bus operations */
134 	NULL,			/* power */
135 	ddi_quiesce_not_needed,	/* devo_quiesce */
136 };
137 
138 /*
139  * Module linkage information.
140  */
141 #define	RDS_DEVDESC	"RDS IB driver"
142 static struct modldrv rdsib_modldrv = {
143 	&mod_driverops,		/* Driver module */
144 	RDS_DEVDESC,		/* Driver name and version */
145 	&rdsib_ops,		/* Driver ops */
146 };
147 
148 static struct modlinkage rdsib_modlinkage = {
149 	MODREV_1,
150 	(void *)&rdsib_modldrv,
151 	NULL
152 };
153 
154 /* Called from _init */
155 int
156 rdsib_init()
157 {
158 	/* RDS supports only one instance */
159 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
160 
161 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
162 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
163 
164 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
165 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
166 
167 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
168 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
169 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
170 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
171 
172 	/* Initialize logging */
173 	rds_logging_initialization();
174 
175 	RDS_SET_NPORT(1); /* this should never be 0 */
176 
177 	ASSERT(rds_transport_ops == NULL);
178 	rds_transport_ops = &rds_ib_transport_ops;
179 
180 	return (0);
181 }
182 
183 /* Called from _fini */
184 void
185 rdsib_fini()
186 {
187 	/* Stop logging */
188 	rds_logging_destroy();
189 
190 	cv_destroy(&rds_dpool.pool_cv);
191 	mutex_destroy(&rds_dpool.pool_lock);
192 	cv_destroy(&rds_cpool.pool_cv);
193 	mutex_destroy(&rds_cpool.pool_lock);
194 
195 	rw_destroy(&rds_loopback_portmap_lock);
196 
197 	rw_destroy(&rdsib_statep->rds_hca_lock);
198 	rw_destroy(&rdsib_statep->rds_sessionlock);
199 	kmem_free(rdsib_statep, sizeof (rds_state_t));
200 
201 	rds_transport_ops = NULL;
202 }
203 
204 int
205 _init(void)
206 {
207 	int	ret;
208 
209 	if (ibt_hw_is_present() == 0) {
210 		return (ENODEV);
211 	}
212 
213 	ret = rdsib_init();
214 	if (ret != 0) {
215 		return (ret);
216 	}
217 
218 	ret = mod_install(&rdsib_modlinkage);
219 	if (ret != 0) {
220 		/*
221 		 * Could not load module
222 		 */
223 		rdsib_fini();
224 		return (ret);
225 	}
226 
227 	return (0);
228 }
229 
230 int
231 _fini()
232 {
233 	int	ret;
234 
235 	/*
236 	 * Remove module
237 	 */
238 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
239 		return (ret);
240 	}
241 
242 	rdsib_fini();
243 
244 	return (0);
245 }
246 
247 int
248 _info(struct modinfo *modinfop)
249 {
250 	return (mod_info(&rdsib_modlinkage, modinfop));
251 }
252 
253 static int
254 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
255 {
256 	int	ret;
257 
258 	RDS_DPRINTF2("rdsib_attach", "enter");
259 
260 	if (cmd != DDI_ATTACH)
261 		return (DDI_FAILURE);
262 
263 	if (rdsib_dev_info != NULL) {
264 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
265 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
266 		return (DDI_FAILURE);
267 	}
268 
269 	rdsib_dev_info = dip;
270 	rds_read_config_values(dip);
271 
272 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
273 	    TASKQ_DEFAULTPRI, 0);
274 	if (rds_taskq == NULL) {
275 		RDS_DPRINTF1("rdsib_attach",
276 		    "ddi_taskq_create failed for rds_taskq");
277 		rdsib_dev_info = NULL;
278 		return (DDI_FAILURE);
279 	}
280 
281 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
282 	if (ret != DDI_SUCCESS) {
283 		RDS_DPRINTF1("rdsib_attach",
284 		    "ddi_create_minor_node failed: %d", ret);
285 		ddi_taskq_destroy(rds_taskq);
286 		rds_taskq = NULL;
287 		rdsib_dev_info = NULL;
288 		return (DDI_FAILURE);
289 	}
290 
291 	/* Max number of receive buffers on the system */
292 	NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
293 
294 	/*
295 	 * High water mark for the receive buffers in the system. If the
296 	 * number of buffers used crosses this mark then all sockets in
297 	 * would be stalled. The port quota for the sockets is set based
298 	 * on this limit.
299 	 */
300 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
301 
302 	ret = rdsib_initialize_ib();
303 	if (ret != 0) {
304 		RDS_DPRINTF1("rdsib_attach",
305 		    "rdsib_initialize_ib failed: %d", ret);
306 		ddi_taskq_destroy(rds_taskq);
307 		rds_taskq = NULL;
308 		rdsib_dev_info = NULL;
309 		return (DDI_FAILURE);
310 	}
311 
312 	RDS_DPRINTF2("rdsib_attach", "return");
313 
314 	return (DDI_SUCCESS);
315 }
316 
317 static int
318 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
319 {
320 	RDS_DPRINTF2("rdsib_detach", "enter");
321 
322 	if (cmd != DDI_DETACH)
323 		return (DDI_FAILURE);
324 
325 	rdsib_deinitialize_ib();
326 
327 	ddi_remove_minor_node(dip, "rdsib");
328 
329 	/* destroy taskq */
330 	if (rds_taskq != NULL) {
331 		ddi_taskq_destroy(rds_taskq);
332 		rds_taskq = NULL;
333 	}
334 
335 	rdsib_dev_info = NULL;
336 
337 	RDS_DPRINTF2("rdsib_detach", "return");
338 
339 	return (DDI_SUCCESS);
340 }
341 
342 /* ARGSUSED */
343 static int
344 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
345 {
346 	int ret = DDI_FAILURE;
347 
348 	switch (cmd) {
349 	case DDI_INFO_DEVT2DEVINFO:
350 		if (rdsib_dev_info != NULL) {
351 			*result = (void *)rdsib_dev_info;
352 			ret = DDI_SUCCESS;
353 		}
354 		break;
355 
356 	case DDI_INFO_DEVT2INSTANCE:
357 		*result = NULL;
358 		ret = DDI_SUCCESS;
359 		break;
360 
361 	default:
362 		break;
363 	}
364 
365 	return (ret);
366 }
367 
368 static void
369 rds_read_config_values(dev_info_t *dip)
370 {
371 	MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
372 	    "MaxNodes", RDS_MAX_NODES);
373 
374 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
375 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
376 
377 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
378 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
379 
380 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
381 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
382 
383 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
384 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
385 
386 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
387 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
388 
389 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
390 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
391 
392 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
393 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
394 
395 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
396 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
397 
398 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
399 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
400 
401 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
402 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
403 
404 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
405 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
406 
407 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
408 	    "rdsdbglvl", RDS_LOG_L2);
409 
410 	if (MaxNodes < 2) {
411 		cmn_err(CE_WARN, "MaxNodes is set to less than 2");
412 		MaxNodes = 2;
413 	}
414 }
415