1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <sys/types.h> 28 #include <sys/stat.h> 29 #include <sys/conf.h> 30 #include <sys/ddi.h> 31 #include <sys/sunddi.h> 32 #include <sys/modctl.h> 33 #include <inet/ip.h> 34 #include <sys/ib/clients/rds/rdsib_ib.h> 35 #include <sys/ib/clients/rds/rdsib_buf.h> 36 #include <sys/ib/clients/rds/rdsib_cm.h> 37 #include <sys/ib/clients/rds/rdsib_protocol.h> 38 #include <sys/ib/clients/rds/rds_transport.h> 39 #include <sys/ib/clients/rds/rds_kstat.h> 40 41 /* 42 * Global Configuration Variables 43 * As defined in RDS proposal 44 */ 45 uint_t MaxNodes = RDS_MAX_NODES; 46 uint_t RdsPktSize; 47 uint_t NDataRX; 48 uint_t MaxDataSendBuffers = RDS_MAX_DATA_SEND_BUFFERS; 49 uint_t MaxDataRecvBuffers = RDS_MAX_DATA_RECV_BUFFERS; 50 uint_t MaxCtrlSendBuffers = RDS_MAX_CTRL_SEND_BUFFERS; 51 uint_t MaxCtrlRecvBuffers = RDS_MAX_CTRL_RECV_BUFFERS; 52 uint_t DataRecvBufferLWM = RDS_DATA_RECV_BUFFER_LWM; 53 uint_t CtrlRecvBufferLWM = RDS_CTRL_RECV_BUFFER_LWM; 54 uint_t PendingRxPktsHWM = RDS_PENDING_RX_PKTS_HWM; 55 uint_t MinRnrRetry = RDS_IB_RNR_RETRY; 56 uint8_t IBPathRetryCount = RDS_IB_PATH_RETRY; 57 uint8_t IBPktLifeTime = RDS_IB_PKT_LT; 58 59 extern int rdsib_open_ib(); 60 extern void rdsib_close_ib(); 61 extern void rds_resume_port(in_port_t port); 62 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, 63 in_port_t sendport, in_port_t recvport, zoneid_t zoneid); 64 extern boolean_t rds_if_lookup_by_name(char *devname); 65 66 rds_transport_ops_t rds_ib_transport_ops = { 67 rdsib_open_ib, 68 rdsib_close_ib, 69 rds_sendmsg, 70 rds_resume_port, 71 rds_if_lookup_by_name 72 }; 73 74 /* Global pools of buffers */ 75 rds_bufpool_t rds_dpool; /* data pool */ 76 rds_bufpool_t rds_cpool; /* ctrl pool */ 77 78 /* global */ 79 rds_state_t *rdsib_statep = NULL; 80 krwlock_t rds_loopback_portmap_lock; 81 uint8_t rds_loopback_portmap[RDS_PORT_MAP_SIZE]; 82 ddi_taskq_t *rds_taskq = NULL; 83 dev_info_t *rdsib_dev_info = NULL; 84 uint_t rds_rx_pkts_pending_hwm; 85 86 #ifdef DEBUG 87 uint32_t rdsdbglvl = RDS_LOG_L3; 88 #else 89 uint32_t rdsdbglvl = RDS_LOG_L2; 90 #endif 91 92 #define RDS_NUM_TASKQ_THREADS 4 93 94 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 95 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 96 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, 97 void **result); 98 static void rds_read_config_values(dev_info_t *dip); 99 100 /* Driver entry points */ 101 static struct cb_ops rdsib_cb_ops = { 102 nulldev, /* open */ 103 nulldev, /* close */ 104 nodev, /* strategy */ 105 nodev, /* print */ 106 nodev, /* dump */ 107 nodev, /* read */ 108 nodev, /* write */ 109 nodev, /* ioctl */ 110 nodev, /* devmap */ 111 nodev, /* mmap */ 112 nodev, /* segmap */ 113 nochpoll, /* poll */ 114 ddi_prop_op, /* prop_op */ 115 NULL, /* stream */ 116 D_MP, /* cb_flag */ 117 CB_REV, /* rev */ 118 nodev, /* int (*cb_aread)() */ 119 nodev, /* int (*cb_awrite)() */ 120 }; 121 122 /* Device options */ 123 static struct dev_ops rdsib_ops = { 124 DEVO_REV, /* devo_rev, */ 125 0, /* refcnt */ 126 rdsib_info, /* info */ 127 nulldev, /* identify */ 128 nulldev, /* probe */ 129 rdsib_attach, /* attach */ 130 rdsib_detach, /* detach */ 131 nodev, /* reset */ 132 &rdsib_cb_ops, /* driver ops - devctl interfaces */ 133 NULL, /* bus operations */ 134 NULL, /* power */ 135 ddi_quiesce_not_needed, /* devo_quiesce */ 136 }; 137 138 /* 139 * Module linkage information. 140 */ 141 #define RDS_DEVDESC "RDS IB driver" 142 static struct modldrv rdsib_modldrv = { 143 &mod_driverops, /* Driver module */ 144 RDS_DEVDESC, /* Driver name and version */ 145 &rdsib_ops, /* Driver ops */ 146 }; 147 148 static struct modlinkage rdsib_modlinkage = { 149 MODREV_1, 150 (void *)&rdsib_modldrv, 151 NULL 152 }; 153 154 /* Called from _init */ 155 int 156 rdsib_init() 157 { 158 /* RDS supports only one instance */ 159 rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP); 160 161 rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL); 162 rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL); 163 164 rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL); 165 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE); 166 167 mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL); 168 cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL); 169 mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL); 170 cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL); 171 172 /* Initialize logging */ 173 rds_logging_initialization(); 174 175 RDS_SET_NPORT(1); /* this should never be 0 */ 176 177 ASSERT(rds_transport_ops == NULL); 178 rds_transport_ops = &rds_ib_transport_ops; 179 180 return (0); 181 } 182 183 /* Called from _fini */ 184 void 185 rdsib_fini() 186 { 187 /* Stop logging */ 188 rds_logging_destroy(); 189 190 cv_destroy(&rds_dpool.pool_cv); 191 mutex_destroy(&rds_dpool.pool_lock); 192 cv_destroy(&rds_cpool.pool_cv); 193 mutex_destroy(&rds_cpool.pool_lock); 194 195 rw_destroy(&rds_loopback_portmap_lock); 196 197 rw_destroy(&rdsib_statep->rds_hca_lock); 198 rw_destroy(&rdsib_statep->rds_sessionlock); 199 kmem_free(rdsib_statep, sizeof (rds_state_t)); 200 201 rds_transport_ops = NULL; 202 } 203 204 int 205 _init(void) 206 { 207 int ret; 208 209 if (ibt_hw_is_present() == 0) { 210 return (ENODEV); 211 } 212 213 ret = rdsib_init(); 214 if (ret != 0) { 215 return (ret); 216 } 217 218 ret = mod_install(&rdsib_modlinkage); 219 if (ret != 0) { 220 /* 221 * Could not load module 222 */ 223 rdsib_fini(); 224 return (ret); 225 } 226 227 return (0); 228 } 229 230 int 231 _fini() 232 { 233 int ret; 234 235 /* 236 * Remove module 237 */ 238 if ((ret = mod_remove(&rdsib_modlinkage)) != 0) { 239 return (ret); 240 } 241 242 rdsib_fini(); 243 244 return (0); 245 } 246 247 int 248 _info(struct modinfo *modinfop) 249 { 250 return (mod_info(&rdsib_modlinkage, modinfop)); 251 } 252 253 static int 254 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 255 { 256 int ret; 257 258 RDS_DPRINTF2("rdsib_attach", "enter"); 259 260 if (cmd != DDI_ATTACH) 261 return (DDI_FAILURE); 262 263 if (rdsib_dev_info != NULL) { 264 RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are" 265 " not supported (rds_dev_info: 0x%p)", rdsib_dev_info); 266 return (DDI_FAILURE); 267 } 268 269 rdsib_dev_info = dip; 270 rds_read_config_values(dip); 271 272 rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS, 273 TASKQ_DEFAULTPRI, 0); 274 if (rds_taskq == NULL) { 275 RDS_DPRINTF1("rdsib_attach", 276 "ddi_taskq_create failed for rds_taskq"); 277 rdsib_dev_info = NULL; 278 return (DDI_FAILURE); 279 } 280 281 ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0); 282 if (ret != DDI_SUCCESS) { 283 RDS_DPRINTF1("rdsib_attach", 284 "ddi_create_minor_node failed: %d", ret); 285 ddi_taskq_destroy(rds_taskq); 286 rds_taskq = NULL; 287 rdsib_dev_info = NULL; 288 return (DDI_FAILURE); 289 } 290 291 /* Max number of receive buffers on the system */ 292 NDataRX = (MaxNodes - 1) * MaxDataRecvBuffers * 2; 293 294 /* 295 * High water mark for the receive buffers in the system. If the 296 * number of buffers used crosses this mark then all sockets in 297 * would be stalled. The port quota for the sockets is set based 298 * on this limit. 299 */ 300 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * NDataRX)/100; 301 302 ret = rdsib_initialize_ib(); 303 if (ret != 0) { 304 RDS_DPRINTF1("rdsib_attach", 305 "rdsib_initialize_ib failed: %d", ret); 306 ddi_taskq_destroy(rds_taskq); 307 rds_taskq = NULL; 308 rdsib_dev_info = NULL; 309 return (DDI_FAILURE); 310 } 311 312 RDS_DPRINTF2("rdsib_attach", "return"); 313 314 return (DDI_SUCCESS); 315 } 316 317 static int 318 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 319 { 320 RDS_DPRINTF2("rdsib_detach", "enter"); 321 322 if (cmd != DDI_DETACH) 323 return (DDI_FAILURE); 324 325 rdsib_deinitialize_ib(); 326 327 ddi_remove_minor_node(dip, "rdsib"); 328 329 /* destroy taskq */ 330 if (rds_taskq != NULL) { 331 ddi_taskq_destroy(rds_taskq); 332 rds_taskq = NULL; 333 } 334 335 rdsib_dev_info = NULL; 336 337 RDS_DPRINTF2("rdsib_detach", "return"); 338 339 return (DDI_SUCCESS); 340 } 341 342 /* ARGSUSED */ 343 static int 344 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 345 { 346 int ret = DDI_FAILURE; 347 348 switch (cmd) { 349 case DDI_INFO_DEVT2DEVINFO: 350 if (rdsib_dev_info != NULL) { 351 *result = (void *)rdsib_dev_info; 352 ret = DDI_SUCCESS; 353 } 354 break; 355 356 case DDI_INFO_DEVT2INSTANCE: 357 *result = NULL; 358 ret = DDI_SUCCESS; 359 break; 360 361 default: 362 break; 363 } 364 365 return (ret); 366 } 367 368 static void 369 rds_read_config_values(dev_info_t *dip) 370 { 371 MaxNodes = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 372 "MaxNodes", RDS_MAX_NODES); 373 374 UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 375 DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE); 376 377 MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 378 DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS); 379 380 MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 381 DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS); 382 383 MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 384 DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS); 385 386 MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 387 DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS); 388 389 DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 390 DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM); 391 392 CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 393 DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM); 394 395 PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 396 DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM); 397 398 MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 399 "MinRnrRetry", RDS_IB_RNR_RETRY); 400 401 IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip, 402 DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY); 403 404 IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip, 405 DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT); 406 407 rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 408 "rdsdbglvl", RDS_LOG_L2); 409 410 if (MaxNodes < 2) { 411 cmn_err(CE_WARN, "MaxNodes is set to less than 2"); 412 MaxNodes = 2; 413 } 414 } 415