1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <inet/ip.h>
34 #include <sys/ib/clients/rds/rdsib_ib.h>
35 #include <sys/ib/clients/rds/rdsib_buf.h>
36 #include <sys/ib/clients/rds/rdsib_cm.h>
37 #include <sys/ib/clients/rds/rdsib_protocol.h>
38 #include <sys/ib/clients/rds/rds_transport.h>
39 #include <sys/ib/clients/rds/rds_kstat.h>
40
41 /*
42 * Global Configuration Variables
43 * As defined in RDS proposal
44 */
45 uint_t MaxNodes = RDS_MAX_NODES;
46 uint_t RdsPktSize;
47 uint_t NDataRX;
48 uint_t MaxDataSendBuffers = RDS_MAX_DATA_SEND_BUFFERS;
49 uint_t MaxDataRecvBuffers = RDS_MAX_DATA_RECV_BUFFERS;
50 uint_t MaxCtrlSendBuffers = RDS_MAX_CTRL_SEND_BUFFERS;
51 uint_t MaxCtrlRecvBuffers = RDS_MAX_CTRL_RECV_BUFFERS;
52 uint_t DataRecvBufferLWM = RDS_DATA_RECV_BUFFER_LWM;
53 uint_t CtrlRecvBufferLWM = RDS_CTRL_RECV_BUFFER_LWM;
54 uint_t PendingRxPktsHWM = RDS_PENDING_RX_PKTS_HWM;
55 uint_t MinRnrRetry = RDS_IB_RNR_RETRY;
56 uint8_t IBPathRetryCount = RDS_IB_PATH_RETRY;
57 uint8_t IBPktLifeTime = RDS_IB_PKT_LT;
58
59 extern int rdsib_open_ib();
60 extern void rdsib_close_ib();
61 extern void rds_resume_port(in_port_t port);
62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
63 in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
64 extern boolean_t rds_if_lookup_by_name(char *devname);
65
66 rds_transport_ops_t rds_ib_transport_ops = {
67 rdsib_open_ib,
68 rdsib_close_ib,
69 rds_sendmsg,
70 rds_resume_port,
71 rds_if_lookup_by_name
72 };
73
74 /* Global pools of buffers */
75 rds_bufpool_t rds_dpool; /* data pool */
76 rds_bufpool_t rds_cpool; /* ctrl pool */
77
78 /* global */
79 rds_state_t *rdsib_statep = NULL;
80 krwlock_t rds_loopback_portmap_lock;
81 uint8_t rds_loopback_portmap[RDS_PORT_MAP_SIZE];
82 ddi_taskq_t *rds_taskq = NULL;
83 dev_info_t *rdsib_dev_info = NULL;
84 uint_t rds_rx_pkts_pending_hwm;
85
86 #ifdef DEBUG
87 uint32_t rdsdbglvl = RDS_LOG_L3;
88 #else
89 uint32_t rdsdbglvl = RDS_LOG_L2;
90 #endif
91
92 #define RDS_NUM_TASKQ_THREADS 4
93
94 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
95 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
96 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
97 void **result);
98 static void rds_read_config_values(dev_info_t *dip);
99
100 /* Driver entry points */
101 static struct cb_ops rdsib_cb_ops = {
102 nulldev, /* open */
103 nulldev, /* close */
104 nodev, /* strategy */
105 nodev, /* print */
106 nodev, /* dump */
107 nodev, /* read */
108 nodev, /* write */
109 nodev, /* ioctl */
110 nodev, /* devmap */
111 nodev, /* mmap */
112 nodev, /* segmap */
113 nochpoll, /* poll */
114 ddi_prop_op, /* prop_op */
115 NULL, /* stream */
116 D_MP, /* cb_flag */
117 CB_REV, /* rev */
118 nodev, /* int (*cb_aread)() */
119 nodev, /* int (*cb_awrite)() */
120 };
121
122 /* Device options */
123 static struct dev_ops rdsib_ops = {
124 DEVO_REV, /* devo_rev, */
125 0, /* refcnt */
126 rdsib_info, /* info */
127 nulldev, /* identify */
128 nulldev, /* probe */
129 rdsib_attach, /* attach */
130 rdsib_detach, /* detach */
131 nodev, /* reset */
132 &rdsib_cb_ops, /* driver ops - devctl interfaces */
133 NULL, /* bus operations */
134 NULL, /* power */
135 ddi_quiesce_not_needed, /* devo_quiesce */
136 };
137
138 /*
139 * Module linkage information.
140 */
141 #define RDS_DEVDESC "RDS IB driver"
142 static struct modldrv rdsib_modldrv = {
143 &mod_driverops, /* Driver module */
144 RDS_DEVDESC, /* Driver name and version */
145 &rdsib_ops, /* Driver ops */
146 };
147
148 static struct modlinkage rdsib_modlinkage = {
149 MODREV_1,
150 (void *)&rdsib_modldrv,
151 NULL
152 };
153
154 /* Called from _init */
155 int
rdsib_init()156 rdsib_init()
157 {
158 /* RDS supports only one instance */
159 rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
160
161 rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
162 rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
163
164 rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
165 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
166
167 mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
168 cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
169 mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
170 cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
171
172 /* Initialize logging */
173 rds_logging_initialization();
174
175 RDS_SET_NPORT(1); /* this should never be 0 */
176
177 ASSERT(rds_transport_ops == NULL);
178 rds_transport_ops = &rds_ib_transport_ops;
179
180 return (0);
181 }
182
183 /* Called from _fini */
184 void
rdsib_fini()185 rdsib_fini()
186 {
187 /* Stop logging */
188 rds_logging_destroy();
189
190 cv_destroy(&rds_dpool.pool_cv);
191 mutex_destroy(&rds_dpool.pool_lock);
192 cv_destroy(&rds_cpool.pool_cv);
193 mutex_destroy(&rds_cpool.pool_lock);
194
195 rw_destroy(&rds_loopback_portmap_lock);
196
197 rw_destroy(&rdsib_statep->rds_hca_lock);
198 rw_destroy(&rdsib_statep->rds_sessionlock);
199 kmem_free(rdsib_statep, sizeof (rds_state_t));
200
201 rds_transport_ops = NULL;
202 }
203
204 int
_init(void)205 _init(void)
206 {
207 int ret;
208
209 if (ibt_hw_is_present() == 0) {
210 return (ENODEV);
211 }
212
213 ret = rdsib_init();
214 if (ret != 0) {
215 return (ret);
216 }
217
218 ret = mod_install(&rdsib_modlinkage);
219 if (ret != 0) {
220 /*
221 * Could not load module
222 */
223 rdsib_fini();
224 return (ret);
225 }
226
227 return (0);
228 }
229
230 int
_fini()231 _fini()
232 {
233 int ret;
234
235 /*
236 * Remove module
237 */
238 if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
239 return (ret);
240 }
241
242 rdsib_fini();
243
244 return (0);
245 }
246
247 int
_info(struct modinfo * modinfop)248 _info(struct modinfo *modinfop)
249 {
250 return (mod_info(&rdsib_modlinkage, modinfop));
251 }
252
253 static int
rdsib_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)254 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
255 {
256 int ret;
257
258 RDS_DPRINTF2("rdsib_attach", "enter");
259
260 if (cmd != DDI_ATTACH)
261 return (DDI_FAILURE);
262
263 if (rdsib_dev_info != NULL) {
264 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
265 " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
266 return (DDI_FAILURE);
267 }
268
269 rdsib_dev_info = dip;
270 rds_read_config_values(dip);
271
272 rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
273 TASKQ_DEFAULTPRI, 0);
274 if (rds_taskq == NULL) {
275 RDS_DPRINTF1("rdsib_attach",
276 "ddi_taskq_create failed for rds_taskq");
277 rdsib_dev_info = NULL;
278 return (DDI_FAILURE);
279 }
280
281 ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
282 if (ret != DDI_SUCCESS) {
283 RDS_DPRINTF1("rdsib_attach",
284 "ddi_create_minor_node failed: %d", ret);
285 ddi_taskq_destroy(rds_taskq);
286 rds_taskq = NULL;
287 rdsib_dev_info = NULL;
288 return (DDI_FAILURE);
289 }
290
291 /* Max number of receive buffers on the system */
292 NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
293
294 /*
295 * High water mark for the receive buffers in the system. If the
296 * number of buffers used crosses this mark then all sockets in
297 * would be stalled. The port quota for the sockets is set based
298 * on this limit.
299 */
300 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
301
302 ret = rdsib_initialize_ib();
303 if (ret != 0) {
304 RDS_DPRINTF1("rdsib_attach",
305 "rdsib_initialize_ib failed: %d", ret);
306 ddi_taskq_destroy(rds_taskq);
307 rds_taskq = NULL;
308 rdsib_dev_info = NULL;
309 return (DDI_FAILURE);
310 }
311
312 RDS_DPRINTF2("rdsib_attach", "return");
313
314 return (DDI_SUCCESS);
315 }
316
317 static int
rdsib_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)318 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
319 {
320 RDS_DPRINTF2("rdsib_detach", "enter");
321
322 if (cmd != DDI_DETACH)
323 return (DDI_FAILURE);
324
325 rdsib_deinitialize_ib();
326
327 ddi_remove_minor_node(dip, "rdsib");
328
329 /* destroy taskq */
330 if (rds_taskq != NULL) {
331 ddi_taskq_destroy(rds_taskq);
332 rds_taskq = NULL;
333 }
334
335 rdsib_dev_info = NULL;
336
337 RDS_DPRINTF2("rdsib_detach", "return");
338
339 return (DDI_SUCCESS);
340 }
341
342 /* ARGSUSED */
343 static int
rdsib_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)344 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
345 {
346 int ret = DDI_FAILURE;
347
348 switch (cmd) {
349 case DDI_INFO_DEVT2DEVINFO:
350 if (rdsib_dev_info != NULL) {
351 *result = (void *)rdsib_dev_info;
352 ret = DDI_SUCCESS;
353 }
354 break;
355
356 case DDI_INFO_DEVT2INSTANCE:
357 *result = NULL;
358 ret = DDI_SUCCESS;
359 break;
360
361 default:
362 break;
363 }
364
365 return (ret);
366 }
367
368 static void
rds_read_config_values(dev_info_t * dip)369 rds_read_config_values(dev_info_t *dip)
370 {
371 MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
372 "MaxNodes", RDS_MAX_NODES);
373
374 UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
375 DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
376
377 MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
378 DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
379
380 MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
381 DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
382
383 MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
384 DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
385
386 MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
387 DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
388
389 DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
390 DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
391
392 CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
393 DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
394
395 PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
396 DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
397
398 MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
399 "MinRnrRetry", RDS_IB_RNR_RETRY);
400
401 IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
402 DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
403
404 IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
405 DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
406
407 rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
408 "rdsdbglvl", RDS_LOG_L2);
409
410 if (MaxNodes < 2) {
411 cmn_err(CE_WARN, "MaxNodes is set to less than 2");
412 MaxNodes = 2;
413 }
414 }
415