1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/systm.h> 30 #include <sys/kstat.h> 31 #include <sys/modctl.h> 32 #include <rpc/rpc_rdma.h> 33 34 #include <sys/ib/ibtl/ibti.h> 35 36 /* 37 * RDMA chunk size 38 */ 39 #define RDMA_MINCHUNK 1024 40 uint_t rdma_minchunk = RDMA_MINCHUNK; 41 42 /* 43 * Globals 44 */ 45 int rdma_modloaded = 0; /* flag to load RDMA plugin modules */ 46 int rdma_dev_available = 0; /* if any RDMA device is loaded */ 47 kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ 48 rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */ 49 krwlock_t rdma_lock; /* protects rdma_mod_head list */ 50 ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */ 51 52 /* 53 * Statics 54 */ 55 static ldi_handle_t rpcib_handle = NULL; 56 57 /* 58 * Externs 59 */ 60 extern kstat_named_t *rdmarcstat_ptr; 61 extern uint_t rdmarcstat_ndata; 62 extern kstat_named_t *rdmarsstat_ptr; 63 extern uint_t rdmarsstat_ndata; 64 65 void rdma_kstat_init(); 66 67 /* 68 * RDMATF module registration routine. 69 * This routine is expected to be called by the init routine in 70 * the plugin modules. 71 */ 72 rdma_stat 73 rdma_register_mod(rdma_mod_t *mod) 74 { 75 rdma_registry_t **mp, *m; 76 77 if (mod->rdma_version != RDMATF_VERS) { 78 return (RDMA_BADVERS); 79 } 80 81 rw_enter(&rdma_lock, RW_WRITER); 82 /* 83 * Ensure not already registered 84 */ 85 mp = &rdma_mod_head; 86 while (*mp != NULL) { 87 if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api, 88 KNC_STRSIZE) == 0) { 89 rw_exit(&rdma_lock); 90 return (RDMA_REG_EXIST); 91 } 92 mp = &((*mp)->r_next); 93 } 94 95 /* 96 * New one, create and add to registry 97 */ 98 m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP); 99 m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP); 100 *m->r_mod = *mod; 101 m->r_next = NULL; 102 m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); 103 (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE); 104 m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0'; 105 *mp = m; 106 rw_exit(&rdma_lock); 107 108 return (RDMA_SUCCESS); 109 } 110 111 /* 112 * RDMATF module unregistration routine. 113 * This routine is expected to be called by the fini routine in 114 * the plugin modules. 115 */ 116 rdma_stat 117 rdma_unregister_mod(rdma_mod_t *mod) 118 { 119 rdma_registry_t **m, *mmod = NULL; 120 121 rw_enter(&rdma_lock, RW_WRITER); 122 123 m = &rdma_mod_head; 124 while (*m != NULL) { 125 if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api, 126 KNC_STRSIZE) != 0) { 127 m = &((*m)->r_next); 128 continue; 129 } 130 /* 131 * Check if any device attached, if so return error 132 */ 133 if ((*m)->r_mod->rdma_count != 0) { 134 rw_exit(&rdma_lock); 135 return (RDMA_FAILED); 136 } 137 /* 138 * Found entry. Now remove it. 139 */ 140 mmod = *m; 141 *m = (*m)->r_next; 142 kmem_free(mmod->r_mod->rdma_api, KNC_STRSIZE); 143 kmem_free(mmod->r_mod, sizeof (rdma_mod_t)); 144 kmem_free(mmod, sizeof (rdma_registry_t)); 145 rw_exit(&rdma_lock); 146 return (RDMA_SUCCESS); 147 } 148 149 /* 150 * Not found. 151 */ 152 rw_exit(&rdma_lock); 153 return (RDMA_FAILED); 154 } 155 156 /* 157 * Creates a new chunk list entry, and 158 * adds it to the end of a chunk list. 159 */ 160 void 161 clist_add(struct clist **clp, uint32_t xdroff, int len, 162 struct mrc *shandle, caddr_t saddr, 163 struct mrc *dhandle, caddr_t daddr) 164 { 165 struct clist *cl; 166 167 /* Find the end of the list */ 168 169 while (*clp != NULL) 170 clp = &((*clp)->c_next); 171 172 cl = kmem_zalloc(sizeof (*cl), KM_SLEEP); 173 cl->c_xdroff = xdroff; 174 cl->c_len = len; 175 cl->c_saddr = (uint64_t)(uintptr_t)saddr; 176 if (shandle) 177 cl->c_smemhandle = *shandle; 178 cl->c_daddr = (uint64_t)(uintptr_t)daddr; 179 if (dhandle) 180 cl->c_dmemhandle = *dhandle; 181 cl->c_next = NULL; 182 183 *clp = cl; 184 } 185 186 int 187 clist_register(CONN *conn, struct clist *cl, bool_t src) 188 { 189 struct clist *c; 190 int status; 191 192 for (c = cl; c; c = c->c_next) { 193 if (src) { 194 status = RDMA_REGMEMSYNC(conn, 195 (caddr_t)(uintptr_t)c->c_saddr, c->c_len, 196 &c->c_smemhandle, (void **)&c->c_ssynchandle); 197 } else { 198 status = RDMA_REGMEMSYNC(conn, 199 (caddr_t)(uintptr_t)c->c_daddr, c->c_len, 200 &c->c_dmemhandle, (void **)&c->c_dsynchandle); 201 } 202 if (status != RDMA_SUCCESS) { 203 (void) clist_deregister(conn, cl, src); 204 return (status); 205 } 206 } 207 208 return (RDMA_SUCCESS); 209 } 210 211 int 212 clist_deregister(CONN *conn, struct clist *cl, bool_t src) 213 { 214 struct clist *c; 215 216 for (c = cl; c; c = c->c_next) { 217 if (src) { 218 if (c->c_smemhandle.mrc_rmr != 0) { 219 (void) RDMA_DEREGMEMSYNC(conn, 220 (caddr_t)(uintptr_t)c->c_saddr, 221 c->c_smemhandle, 222 (void *)(uintptr_t)c->c_ssynchandle); 223 c->c_smemhandle.mrc_rmr = 0; 224 c->c_ssynchandle = NULL; 225 } 226 } else { 227 if (c->c_dmemhandle.mrc_rmr != 0) { 228 (void) RDMA_DEREGMEMSYNC(conn, 229 (caddr_t)(uintptr_t)c->c_daddr, 230 c->c_dmemhandle, 231 (void *)(uintptr_t)c->c_dsynchandle); 232 c->c_dmemhandle.mrc_rmr = 0; 233 c->c_dsynchandle = NULL; 234 } 235 } 236 } 237 238 return (RDMA_SUCCESS); 239 } 240 241 /* 242 * Frees up entries in chunk list 243 */ 244 void 245 clist_free(struct clist *cl) 246 { 247 struct clist *c = cl; 248 249 while (c != NULL) { 250 cl = cl->c_next; 251 kmem_free(c, sizeof (struct clist)); 252 c = cl; 253 } 254 } 255 256 rdma_stat 257 rdma_clnt_postrecv(CONN *conn, uint32_t xid) 258 { 259 struct clist *cl = NULL; 260 rdma_stat retval; 261 rdma_buf_t rbuf; 262 263 rbuf.type = RECV_BUFFER; 264 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 265 retval = RDMA_NORESOURCE; 266 } else { 267 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 268 NULL, NULL); 269 retval = RDMA_CLNT_RECVBUF(conn, cl, xid); 270 clist_free(cl); 271 } 272 return (retval); 273 } 274 275 rdma_stat 276 rdma_svc_postrecv(CONN *conn) 277 { 278 struct clist *cl = NULL; 279 rdma_stat retval; 280 rdma_buf_t rbuf; 281 282 rbuf.type = RECV_BUFFER; 283 if (RDMA_BUF_ALLOC(conn, &rbuf)) { 284 retval = RDMA_NORESOURCE; 285 } else { 286 clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, 287 NULL, NULL); 288 retval = RDMA_SVC_RECVBUF(conn, cl); 289 clist_free(cl); 290 } 291 return (retval); 292 } 293 294 rdma_stat 295 clist_syncmem(CONN *conn, struct clist *cl, bool_t src) 296 { 297 struct clist *c; 298 rdma_stat status; 299 300 c = cl; 301 if (src) { 302 while (c != NULL) { 303 status = RDMA_SYNCMEM(conn, 304 (void *)(uintptr_t)c->c_ssynchandle, 305 (caddr_t)(uintptr_t)c->c_saddr, c->c_len, 0); 306 if (status != RDMA_SUCCESS) 307 return (status); 308 c = c->c_next; 309 } 310 } else { 311 while (c != NULL) { 312 status = RDMA_SYNCMEM(conn, 313 (void *)(uintptr_t)c->c_dsynchandle, 314 (caddr_t)(uintptr_t)c->c_daddr, c->c_len, 1); 315 if (status != RDMA_SUCCESS) 316 return (status); 317 c = c->c_next; 318 } 319 } 320 return (RDMA_SUCCESS); 321 } 322 323 void 324 rdma_buf_free(CONN *conn, rdma_buf_t *rbuf) 325 { 326 if (!rbuf || rbuf->addr == NULL) { 327 return; 328 } 329 if (rbuf->type != CHUNK_BUFFER) { 330 /* pool buffer */ 331 RDMA_BUF_FREE(conn, rbuf); 332 } else { 333 kmem_free(rbuf->addr, rbuf->len); 334 } 335 rbuf->addr = NULL; 336 rbuf->len = 0; 337 } 338 339 /* 340 * Caller is holding rdma_modload_lock mutex 341 */ 342 int 343 rdma_modload() 344 { 345 int status; 346 ASSERT(MUTEX_HELD(&rdma_modload_lock)); 347 /* 348 * Load all available RDMA plugins which right now is only IB plugin. 349 * If no IB hardware is present, then quit right away. 350 * ENODEV -- For no device on the system 351 * EPROTONOSUPPORT -- For module not avilable either due to failure to 352 * load or some other reason. 353 */ 354 rdma_modloaded = 1; 355 if (ibt_hw_is_present() == 0) { 356 rdma_dev_available = 0; 357 return (ENODEV); 358 } 359 360 rdma_dev_available = 1; 361 if (rpcmod_li == NULL) 362 return (EPROTONOSUPPORT); 363 364 status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib", 365 FREAD | FWRITE, kcred, 366 &rpcib_handle, rpcmod_li); 367 if (status != 0) 368 return (EPROTONOSUPPORT); 369 370 /* success */ 371 rdma_kstat_init(); 372 return (0); 373 } 374 375 void 376 rdma_kstat_init(void) 377 { 378 kstat_t *ksp; 379 380 /* 381 * The RDMA framework doesn't know how to deal with Zones, and is 382 * only available in the global zone. 383 */ 384 ASSERT(INGLOBALZONE(curproc)); 385 ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc", 386 KSTAT_TYPE_NAMED, rdmarcstat_ndata, 387 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 388 if (ksp) { 389 ksp->ks_data = (void *) rdmarcstat_ptr; 390 kstat_install(ksp); 391 } 392 393 ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc", 394 KSTAT_TYPE_NAMED, rdmarsstat_ndata, 395 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); 396 if (ksp) { 397 ksp->ks_data = (void *) rdmarsstat_ptr; 398 kstat_install(ksp); 399 } 400 } 401