1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/modctl.h>
33 #include <inet/ip.h>
34 #include <sys/ib/clients/rds/rdsib_ib.h>
35 #include <sys/ib/clients/rds/rdsib_buf.h>
36 #include <sys/ib/clients/rds/rdsib_cm.h>
37 #include <sys/ib/clients/rds/rdsib_protocol.h>
38 #include <sys/ib/clients/rds/rds_transport.h>
39 #include <sys/ib/clients/rds/rds_kstat.h>
40
41 /*
42 * Global Configuration Variables
43 * As defined in RDS proposal
44 */
45 uint_t MaxNodes = RDS_MAX_NODES;
46 uint_t RdsPktSize;
47 uint_t NDataRX;
48 uint_t MaxDataSendBuffers = RDS_MAX_DATA_SEND_BUFFERS;
49 uint_t MaxDataRecvBuffers = RDS_MAX_DATA_RECV_BUFFERS;
50 uint_t MaxCtrlSendBuffers = RDS_MAX_CTRL_SEND_BUFFERS;
51 uint_t MaxCtrlRecvBuffers = RDS_MAX_CTRL_RECV_BUFFERS;
52 uint_t DataRecvBufferLWM = RDS_DATA_RECV_BUFFER_LWM;
53 uint_t CtrlRecvBufferLWM = RDS_CTRL_RECV_BUFFER_LWM;
54 uint_t PendingRxPktsHWM = RDS_PENDING_RX_PKTS_HWM;
55 uint_t MinRnrRetry = RDS_IB_RNR_RETRY;
56 uint8_t IBPathRetryCount = RDS_IB_PATH_RETRY;
57 uint8_t IBPktLifeTime = RDS_IB_PKT_LT;
58
59 extern int rdsib_open_ib();
60 extern void rdsib_close_ib();
61 extern void rds_resume_port(in_port_t port);
62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
63 in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
64 extern boolean_t rds_if_lookup_by_name(char *devname);
65
66 rds_transport_ops_t rds_ib_transport_ops = {
67 rdsib_open_ib,
68 rdsib_close_ib,
69 rds_sendmsg,
70 rds_resume_port,
71 rds_if_lookup_by_name
72 };
73
74 /* global */
75 rds_state_t *rdsib_statep = NULL;
76 krwlock_t rds_loopback_portmap_lock;
77 uint8_t rds_loopback_portmap[RDS_PORT_MAP_SIZE];
78 ddi_taskq_t *rds_taskq = NULL;
79 dev_info_t *rdsib_dev_info = NULL;
80 uint_t rds_rx_pkts_pending_hwm;
81
82 #ifdef DEBUG
83 uint32_t rdsdbglvl = RDS_LOG_L3;
84 #else
85 uint32_t rdsdbglvl = RDS_LOG_L2;
86 #endif
87
88 #define RDS_NUM_TASKQ_THREADS 4
89
90 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
91 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
92 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
93 void **result);
94 static void rds_read_config_values(dev_info_t *dip);
95
96 /* Driver entry points */
97 static struct cb_ops rdsib_cb_ops = {
98 nulldev, /* open */
99 nulldev, /* close */
100 nodev, /* strategy */
101 nodev, /* print */
102 nodev, /* dump */
103 nodev, /* read */
104 nodev, /* write */
105 nodev, /* ioctl */
106 nodev, /* devmap */
107 nodev, /* mmap */
108 nodev, /* segmap */
109 nochpoll, /* poll */
110 ddi_prop_op, /* prop_op */
111 NULL, /* stream */
112 D_MP, /* cb_flag */
113 CB_REV, /* rev */
114 nodev, /* int (*cb_aread)() */
115 nodev, /* int (*cb_awrite)() */
116 };
117
118 /* Device options */
119 static struct dev_ops rdsib_ops = {
120 DEVO_REV, /* devo_rev, */
121 0, /* refcnt */
122 rdsib_info, /* info */
123 nulldev, /* identify */
124 nulldev, /* probe */
125 rdsib_attach, /* attach */
126 rdsib_detach, /* detach */
127 nodev, /* reset */
128 &rdsib_cb_ops, /* driver ops - devctl interfaces */
129 NULL, /* bus operations */
130 NULL, /* power */
131 ddi_quiesce_not_needed, /* devo_quiesce */
132 };
133
134 /*
135 * Module linkage information.
136 */
137 #define RDS_DEVDESC "RDS IB driver"
138 static struct modldrv rdsib_modldrv = {
139 &mod_driverops, /* Driver module */
140 RDS_DEVDESC, /* Driver name and version */
141 &rdsib_ops, /* Driver ops */
142 };
143
144 static struct modlinkage rdsib_modlinkage = {
145 MODREV_1,
146 (void *)&rdsib_modldrv,
147 NULL
148 };
149
150 /* Called from _init */
151 int
rdsib_init()152 rdsib_init()
153 {
154 /* RDS supports only one instance */
155 rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
156
157 rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
158 rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
159
160 rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
161 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
162
163 mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
164 cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
165 mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
166 cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
167
168 /* Initialize logging */
169 rds_logging_initialization();
170
171 RDS_SET_NPORT(1); /* this should never be 0 */
172
173 ASSERT(rds_transport_ops == NULL);
174 rds_transport_ops = &rds_ib_transport_ops;
175
176 return (0);
177 }
178
179 /* Called from _fini */
180 void
rdsib_fini()181 rdsib_fini()
182 {
183 /* Stop logging */
184 rds_logging_destroy();
185
186 cv_destroy(&rds_dpool.pool_cv);
187 mutex_destroy(&rds_dpool.pool_lock);
188 cv_destroy(&rds_cpool.pool_cv);
189 mutex_destroy(&rds_cpool.pool_lock);
190
191 rw_destroy(&rds_loopback_portmap_lock);
192
193 rw_destroy(&rdsib_statep->rds_hca_lock);
194 rw_destroy(&rdsib_statep->rds_sessionlock);
195 kmem_free(rdsib_statep, sizeof (rds_state_t));
196
197 rds_transport_ops = NULL;
198 }
199
200 int
_init(void)201 _init(void)
202 {
203 int ret;
204
205 if (ibt_hw_is_present() == 0) {
206 return (ENODEV);
207 }
208
209 ret = rdsib_init();
210 if (ret != 0) {
211 return (ret);
212 }
213
214 ret = mod_install(&rdsib_modlinkage);
215 if (ret != 0) {
216 /*
217 * Could not load module
218 */
219 rdsib_fini();
220 return (ret);
221 }
222
223 return (0);
224 }
225
226 int
_fini()227 _fini()
228 {
229 int ret;
230
231 /*
232 * Remove module
233 */
234 if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
235 return (ret);
236 }
237
238 rdsib_fini();
239
240 return (0);
241 }
242
243 int
_info(struct modinfo * modinfop)244 _info(struct modinfo *modinfop)
245 {
246 return (mod_info(&rdsib_modlinkage, modinfop));
247 }
248
249 static int
rdsib_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
251 {
252 int ret;
253
254 RDS_DPRINTF2("rdsib_attach", "enter");
255
256 if (cmd != DDI_ATTACH)
257 return (DDI_FAILURE);
258
259 if (rdsib_dev_info != NULL) {
260 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
261 " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
262 return (DDI_FAILURE);
263 }
264
265 rdsib_dev_info = dip;
266 rds_read_config_values(dip);
267
268 rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
269 TASKQ_DEFAULTPRI, 0);
270 if (rds_taskq == NULL) {
271 RDS_DPRINTF1("rdsib_attach",
272 "ddi_taskq_create failed for rds_taskq");
273 rdsib_dev_info = NULL;
274 return (DDI_FAILURE);
275 }
276
277 ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
278 if (ret != DDI_SUCCESS) {
279 RDS_DPRINTF1("rdsib_attach",
280 "ddi_create_minor_node failed: %d", ret);
281 ddi_taskq_destroy(rds_taskq);
282 rds_taskq = NULL;
283 rdsib_dev_info = NULL;
284 return (DDI_FAILURE);
285 }
286
287 /* Max number of receive buffers on the system */
288 NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2;
289
290 /*
291 * High water mark for the receive buffers in the system. If the
292 * number of buffers used crosses this mark then all sockets in
293 * would be stalled. The port quota for the sockets is set based
294 * on this limit.
295 */
296 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100;
297
298 ret = rdsib_initialize_ib();
299 if (ret != 0) {
300 RDS_DPRINTF1("rdsib_attach",
301 "rdsib_initialize_ib failed: %d", ret);
302 ddi_taskq_destroy(rds_taskq);
303 rds_taskq = NULL;
304 rdsib_dev_info = NULL;
305 return (DDI_FAILURE);
306 }
307
308 RDS_DPRINTF2("rdsib_attach", "return");
309
310 return (DDI_SUCCESS);
311 }
312
313 static int
rdsib_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)314 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
315 {
316 RDS_DPRINTF2("rdsib_detach", "enter");
317
318 if (cmd != DDI_DETACH)
319 return (DDI_FAILURE);
320
321 rdsib_deinitialize_ib();
322
323 ddi_remove_minor_node(dip, "rdsib");
324
325 /* destroy taskq */
326 if (rds_taskq != NULL) {
327 ddi_taskq_destroy(rds_taskq);
328 rds_taskq = NULL;
329 }
330
331 rdsib_dev_info = NULL;
332
333 RDS_DPRINTF2("rdsib_detach", "return");
334
335 return (DDI_SUCCESS);
336 }
337
338 /* ARGSUSED */
339 static int
rdsib_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)340 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
341 {
342 int ret = DDI_FAILURE;
343
344 switch (cmd) {
345 case DDI_INFO_DEVT2DEVINFO:
346 if (rdsib_dev_info != NULL) {
347 *result = (void *)rdsib_dev_info;
348 ret = DDI_SUCCESS;
349 }
350 break;
351
352 case DDI_INFO_DEVT2INSTANCE:
353 *result = NULL;
354 ret = DDI_SUCCESS;
355 break;
356
357 default:
358 break;
359 }
360
361 return (ret);
362 }
363
364 static void
rds_read_config_values(dev_info_t * dip)365 rds_read_config_values(dev_info_t *dip)
366 {
367 MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
368 "MaxNodes", RDS_MAX_NODES);
369
370 UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
371 DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
372
373 MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
374 DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
375
376 MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
377 DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
378
379 MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
380 DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
381
382 MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
383 DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
384
385 DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
386 DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
387
388 CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
389 DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
390
391 PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
392 DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
393
394 MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
395 "MinRnrRetry", RDS_IB_RNR_RETRY);
396
397 IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
398 DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
399
400 IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
401 DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
402
403 rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
404 "rdsdbglvl", RDS_LOG_L2);
405
406 if (MaxNodes < 2) {
407 cmn_err(CE_WARN, "MaxNodes is set to less than 2");
408 MaxNodes = 2;
409 }
410 }
411