1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * UDAPL kernel agent 27 */ 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/stropts.h> 33 #include <sys/stream.h> 34 #include <sys/strlog.h> 35 #include <sys/cmn_err.h> 36 #include <sys/kmem.h> 37 #include <sys/conf.h> 38 #include <sys/stat.h> 39 #include <sys/modctl.h> 40 #include <sys/kstat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/strsun.h> 44 #include <sys/taskq.h> 45 #include <sys/open.h> 46 #include <sys/uio.h> 47 #include <sys/cpuvar.h> 48 #include <sys/atomic.h> 49 #include <sys/sysmacros.h> 50 #include <sys/esunddi.h> 51 #include <sys/avl.h> 52 #include <sys/cred.h> 53 #include <sys/note.h> 54 #include <sys/ib/ibtl/ibti.h> 55 #include <sys/socket.h> 56 #include <netinet/in.h> 57 #include <daplt_if.h> 58 #include <daplt.h> 59 60 /* 61 * The following variables support the debug log buffer scheme. 62 */ 63 #ifdef DEBUG 64 static char daplka_dbgbuf[0x80000]; 65 #else /* DEBUG */ 66 static char daplka_dbgbuf[0x4000]; 67 #endif /* DEBUG */ 68 static int daplka_dbgsize = sizeof (daplka_dbgbuf); 69 static size_t daplka_dbgnext; 70 static int daplka_dbginit = 0; 71 static kmutex_t daplka_dbglock; 72 _NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock, 73 daplka_dbgbuf 74 daplka_dbgnext)) 75 76 static int daplka_dbg = 0x0103; 77 static void daplka_console(const char *, ...); 78 static void daplka_debug(const char *, ...); 79 static int daplka_apm = 0x1; /* default enable */ 80 static int daplka_failback = 0x1; /* default enable */ 81 static int daplka_query_aft_setaltpath = 10; 82 83 #define DERR \ 84 if (daplka_dbg & 0x100) \ 85 daplka_debug 86 87 #ifdef DEBUG 88 89 #define DINFO \ 90 daplka_console 91 92 #define D1 \ 93 if (daplka_dbg & 0x01) \ 94 daplka_debug 95 #define D2 \ 96 if (daplka_dbg & 0x02) \ 97 daplka_debug 98 #define D3 \ 99 if (daplka_dbg & 0x04) \ 100 daplka_debug 101 #define D4 \ 102 if (daplka_dbg & 0x08) \ 103 daplka_debug 104 105 #else /* DEBUG */ 106 107 #define DINFO if (0) printf 108 #define D1 if (0) printf 109 #define D2 if (0) printf 110 #define D3 if (0) printf 111 #define D4 if (0) printf 112 113 #endif /* DEBUG */ 114 115 /* 116 * driver entry points 117 */ 118 static int daplka_open(dev_t *, int, int, struct cred *); 119 static int daplka_close(dev_t, int, int, struct cred *); 120 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t); 121 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t); 122 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 123 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 124 125 /* 126 * types of ioctls 127 */ 128 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *); 129 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 130 cred_t *, int *); 131 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 132 cred_t *, int *); 133 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 134 cred_t *, int *); 135 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 136 cred_t *, int *); 137 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 138 cred_t *, int *); 139 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 140 cred_t *, int *); 141 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 142 cred_t *, int *); 143 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 144 cred_t *, int *); 145 146 /* 147 * common ioctls and supporting functions 148 */ 149 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *); 150 static int daplka_ia_destroy(daplka_resource_t *); 151 152 /* 153 * EP ioctls and supporting functions 154 */ 155 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int, 156 cred_t *, int *); 157 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int, 158 cred_t *, int *); 159 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int, 160 cred_t *, int *); 161 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int, 162 cred_t *, int *); 163 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int, 164 cred_t *, int *); 165 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int, 166 cred_t *, int *); 167 static int daplka_ep_destroy(daplka_resource_t *); 168 static void daplka_hash_ep_free(void *); 169 static int daplka_ep_failback(void *objp, void *arg); 170 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *); 171 172 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *); 173 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t); 174 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t); 175 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *); 176 static void daplka_timer_info_free(daplka_timer_info_t *); 177 static void daplka_timer_handler(void *); 178 static void daplka_timer_dispatch(void *); 179 static void daplka_timer_thread(void *); 180 static int daplka_cancel_timer(daplka_ep_resource_t *); 181 static void daplka_hash_timer_free(void *); 182 183 /* 184 * EVD ioctls and supporting functions 185 */ 186 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int, 187 cred_t *, int *); 188 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int, 189 cred_t *, int *); 190 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int, 191 cred_t *, int *); 192 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int, 193 cred_t *, int *); 194 static int daplka_evd_destroy(daplka_resource_t *); 195 static void daplka_cq_handler(ibt_cq_hdl_t, void *); 196 static void daplka_evd_wakeup(daplka_evd_resource_t *, 197 daplka_evd_event_list_t *, daplka_evd_event_t *); 198 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *, 199 daplka_evd_event_t *); 200 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *); 201 static void daplka_hash_evd_free(void *); 202 203 204 /* 205 * SRQ ioctls and supporting functions 206 */ 207 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int, 208 cred_t *, int *); 209 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int, 210 cred_t *, int *); 211 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int, 212 cred_t *, int *); 213 static int daplka_srq_destroy(daplka_resource_t *); 214 static void daplka_hash_srq_free(void *); 215 216 /* 217 * Miscellaneous ioctls 218 */ 219 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int, 220 cred_t *, int *); 221 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int, 222 cred_t *, int *); 223 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int, 224 cred_t *, int *); 225 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int, 226 cred_t *, int *); 227 228 /* 229 * PD ioctls and supporting functions 230 */ 231 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int, 232 cred_t *, int *); 233 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int, 234 cred_t *, int *); 235 static int daplka_pd_destroy(daplka_resource_t *); 236 static void daplka_hash_pd_free(void *); 237 238 /* 239 * SP ioctls and supporting functions 240 */ 241 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int, 242 cred_t *, int *); 243 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int, 244 cred_t *, int *); 245 static int daplka_sp_destroy(daplka_resource_t *); 246 static void daplka_hash_sp_free(void *); 247 static void daplka_hash_sp_unref(void *); 248 249 /* 250 * MR ioctls and supporting functions 251 */ 252 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int, 253 cred_t *, int *); 254 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int, 255 cred_t *, int *); 256 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int, 257 cred_t *, int *); 258 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int, 259 cred_t *, int *); 260 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int, 261 cred_t *, int *); 262 static int daplka_mr_destroy(daplka_resource_t *); 263 static void daplka_hash_mr_free(void *); 264 static void daplka_shared_mr_free(daplka_mr_resource_t *); 265 266 /* 267 * MW ioctls and supporting functions 268 */ 269 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int, 270 cred_t *, int *); 271 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int, 272 cred_t *, int *); 273 static int daplka_mw_destroy(daplka_resource_t *); 274 static void daplka_hash_mw_free(void *); 275 276 /* 277 * CNO ioctls and supporting functions 278 */ 279 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int, 280 cred_t *, int *); 281 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int, 282 cred_t *, int *); 283 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int, 284 cred_t *, int *); 285 static int daplka_cno_destroy(daplka_resource_t *); 286 static void daplka_hash_cno_free(void *); 287 288 /* 289 * CM handlers 290 */ 291 static ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *, 292 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 293 294 static ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *, 295 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 296 297 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *, 298 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 299 300 /* 301 * resource management routines 302 */ 303 static int daplka_resource_reserve(minor_t *); 304 static int daplka_resource_insert(minor_t, daplka_resource_t *); 305 static daplka_resource_t *daplka_resource_remove(minor_t rnum); 306 static daplka_resource_t *daplka_resource_lookup(minor_t); 307 static void daplka_resource_init(void); 308 static void daplka_resource_fini(void); 309 static struct daplka_resource_table daplka_resource; 310 311 /* 312 * hash table routines 313 */ 314 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *); 315 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **); 316 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *), 317 void *, krw_t); 318 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t); 319 static int daplka_hash_create(daplka_hash_table_t *, uint_t, 320 void (*)(void *), void (*)(void *)); 321 static void daplka_hash_destroy(daplka_hash_table_t *); 322 static uint32_t daplka_hash_getsize(daplka_hash_table_t *); 323 static void daplka_hash_generic_lookup(void *); 324 325 static uint32_t daplka_timer_hkey_gen(); 326 327 /* 328 * async event handlers 329 */ 330 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *, 331 uint64_t, daplka_ia_resource_t *); 332 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 333 ibt_async_event_t *); 334 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 335 ibt_async_event_t *); 336 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 337 ibt_async_event_t *); 338 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 339 ibt_async_event_t *); 340 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t, 341 ibt_subnet_event_t *event); 342 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *); 343 344 /* 345 * IBTF wrappers and default limits used for resource accounting 346 */ 347 static boolean_t daplka_accounting_enabled = B_TRUE; 348 static uint32_t daplka_max_qp_percent = 100; 349 static uint32_t daplka_max_cq_percent = 100; 350 static uint32_t daplka_max_pd_percent = 100; 351 static uint32_t daplka_max_mw_percent = 100; 352 static uint32_t daplka_max_mr_percent = 100; 353 static uint32_t daplka_max_srq_percent = 100; 354 355 static ibt_status_t 356 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t, 357 ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *, 358 ibt_channel_hdl_t *, ibt_chan_sizes_t *); 359 360 static ibt_status_t 361 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t); 362 363 static ibt_status_t 364 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t, 365 ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *); 366 367 static ibt_status_t 368 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t); 369 370 static ibt_status_t 371 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, 372 ibt_pd_flags_t, ibt_pd_hdl_t *); 373 374 static ibt_status_t 375 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t); 376 377 static ibt_status_t 378 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t, 379 ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *); 380 381 static ibt_status_t 382 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t); 383 384 static ibt_status_t 385 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t, 386 ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *); 387 388 static ibt_status_t 389 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, 390 ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *, 391 ibt_mr_desc_t *); 392 393 static ibt_status_t 394 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t); 395 396 static ibt_status_t 397 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t, 398 ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *); 399 400 static ibt_status_t 401 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t); 402 403 /* 404 * macros for manipulating resource objects. 405 * these macros can be used on objects that begin with a 406 * daplka_resource_t header. 407 */ 408 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt) 409 410 #define DAPLKA_RS_REF(rp) { \ 411 mutex_enter(&(rp)->header.rs_reflock); \ 412 (rp)->header.rs_refcnt++; \ 413 ASSERT((rp)->header.rs_refcnt != 0); \ 414 mutex_exit(&(rp)->header.rs_reflock); \ 415 } 416 417 #define DAPLKA_RS_UNREF(rp) { \ 418 mutex_enter(&(rp)->header.rs_reflock); \ 419 ASSERT((rp)->header.rs_refcnt != 0); \ 420 if (--(rp)->header.rs_refcnt == 0) { \ 421 ASSERT((rp)->header.rs_free != NULL); \ 422 mutex_exit(&(rp)->header.rs_reflock); \ 423 (rp)->header.rs_free((daplka_resource_t *)rp); \ 424 } else { \ 425 mutex_exit(&(rp)->header.rs_reflock); \ 426 } \ 427 } 428 429 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \ 430 (rp)->header.rs_refcnt = 1; \ 431 (rp)->header.rs_type = (type); \ 432 (rp)->header.rs_rnum = (rnum); \ 433 (rp)->header.rs_charged = 0; \ 434 (rp)->header.rs_free = (free_func); \ 435 mutex_init(&(rp)->header.rs_reflock, NULL, \ 436 MUTEX_DRIVER, NULL); \ 437 } 438 439 #define DAPLKA_RS_FINI(rp) { \ 440 mutex_destroy(&(rp)->header.rs_reflock); \ 441 } 442 443 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \ 444 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \ 445 } 446 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \ 447 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \ 448 } 449 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged) 450 451 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum) 452 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type) 453 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED) 454 455 /* 456 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig 457 */ 458 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \ 459 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \ 460 cv_timedwait_sig((cvp), (mp), (timeout)) 461 462 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++) 463 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--) 464 465 #define DAPLKA_HOLD_HCA(dp, hca) { \ 466 mutex_enter(&(dp)->daplka_mutex); \ 467 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \ 468 mutex_exit(&(dp)->daplka_mutex); \ 469 } 470 471 #define DAPLKA_RELE_HCA(dp, hca) { \ 472 mutex_enter(&(dp)->daplka_mutex); \ 473 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \ 474 mutex_exit(&(dp)->daplka_mutex); \ 475 } 476 477 #define DAPLKA_HCA_BUSY(hca) \ 478 ((hca)->hca_ref_cnt != 0 || \ 479 (hca)->hca_qp_count != 0 || \ 480 (hca)->hca_cq_count != 0 || \ 481 (hca)->hca_pd_count != 0 || \ 482 (hca)->hca_mw_count != 0 || \ 483 (hca)->hca_mr_count != 0) 484 485 486 static struct cb_ops daplka_cb_ops = { 487 daplka_open, /* cb_open */ 488 daplka_close, /* cb_close */ 489 nodev, /* cb_strategy */ 490 nodev, /* cb_print */ 491 nodev, /* cb_dump */ 492 nodev, /* cb_read */ 493 nodev, /* cb_write */ 494 daplka_ioctl, /* cb_ioctl */ 495 nodev, /* cb_devmap */ 496 nodev, /* cb_mmap */ 497 nodev, /* cb_segmap */ 498 nochpoll, /* cb_chpoll */ 499 ddi_prop_op, /* cb_prop_op */ 500 NULL, /* cb_stream */ 501 D_NEW | D_MP, /* cb_flag */ 502 CB_REV, /* rev */ 503 nodev, /* int (*cb_aread)() */ 504 nodev /* int (*cb_awrite)() */ 505 }; 506 507 static struct dev_ops daplka_ops = { 508 DEVO_REV, /* devo_rev */ 509 0, /* devo_refcnt */ 510 daplka_info, /* devo_getinfo */ 511 nulldev, /* devo_identify */ 512 nulldev, /* devo_probe */ 513 daplka_attach, /* devo_attach */ 514 daplka_detach, /* devo_detach */ 515 nodev, /* devo_reset */ 516 &daplka_cb_ops, /* devo_cb_ops */ 517 (struct bus_ops *)NULL, /* devo_bus_ops */ 518 nulldev, /* power */ 519 ddi_quiesce_not_needed, /* devo_quiesce */ 520 }; 521 522 /* 523 * Module linkage information for the kernel. 524 */ 525 static struct modldrv modldrv = { 526 &mod_driverops, 527 "uDAPL Service Driver", 528 &daplka_ops, 529 }; 530 531 static struct modlinkage modlinkage = { 532 #ifdef _LP64 533 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL } 534 #else 535 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL } 536 #endif 537 }; 538 539 /* 540 * daplka_dev holds global driver state and a list of HCAs 541 */ 542 static daplka_t *daplka_dev = NULL; 543 static void *daplka_state = NULL; 544 545 /* 546 * global SP hash table 547 */ 548 static daplka_hash_table_t daplka_global_sp_htbl; 549 550 /* 551 * timer_info hash table 552 */ 553 static daplka_hash_table_t daplka_timer_info_htbl; 554 static uint32_t daplka_timer_hkey = 0; 555 556 /* 557 * shared MR avl tree 558 */ 559 static avl_tree_t daplka_shared_mr_tree; 560 static kmutex_t daplka_shared_mr_lock; 561 static int daplka_shared_mr_cmp(const void *, const void *); 562 _NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock, 563 daplka_shared_mr_tree)) 564 565 /* 566 * default kmem flags used by this driver 567 */ 568 static int daplka_km_flags = KM_SLEEP; 569 570 /* 571 * taskq used for handling background tasks 572 */ 573 static taskq_t *daplka_taskq = NULL; 574 575 /* 576 * daplka_cm_delay is the length of time the active 577 * side needs to wait before timing out on the REP message. 578 */ 579 static clock_t daplka_cm_delay = 60000000; 580 581 /* 582 * modunload will fail if pending_close is non-zero 583 */ 584 static uint32_t daplka_pending_close = 0; 585 586 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = { 587 IBTI_V_CURR, 588 IBT_USER, 589 daplka_async_handler, 590 NULL, 591 DAPLKA_DRV_NAME 592 }; 593 594 /* 595 * Module Installation 596 */ 597 int 598 _init(void) 599 { 600 int status; 601 602 status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1); 603 if (status != 0) { 604 return (status); 605 } 606 607 mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL); 608 bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf)); 609 daplka_dbgnext = 0; 610 daplka_dbginit = 1; 611 612 daplka_resource_init(); 613 614 status = mod_install(&modlinkage); 615 if (status != DDI_SUCCESS) { 616 /* undo inits done before mod_install */ 617 daplka_resource_fini(); 618 mutex_destroy(&daplka_dbglock); 619 ddi_soft_state_fini(&daplka_state); 620 } 621 return (status); 622 } 623 624 /* 625 * Module Removal 626 */ 627 int 628 _fini(void) 629 { 630 int status; 631 632 /* 633 * mod_remove causes detach to be called 634 */ 635 if ((status = mod_remove(&modlinkage)) != 0) { 636 DERR("fini: mod_remove failed: 0x%x\n", status); 637 return (status); 638 } 639 640 daplka_resource_fini(); 641 mutex_destroy(&daplka_dbglock); 642 ddi_soft_state_fini(&daplka_state); 643 644 return (status); 645 } 646 647 /* 648 * Return Module Info. 649 */ 650 int 651 _info(struct modinfo *modinfop) 652 { 653 return (mod_info(&modlinkage, modinfop)); 654 } 655 656 static void 657 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca) 658 { 659 daplka_hca_t *h; 660 661 ASSERT(mutex_owned(&dp->daplka_mutex)); 662 663 if (dp->daplka_hca_list_head == NULL) { 664 dp->daplka_hca_list_head = hca; 665 } else { 666 h = dp->daplka_hca_list_head; 667 while (h->hca_next != NULL) 668 h = h->hca_next; 669 670 h->hca_next = hca; 671 } 672 } 673 674 static void 675 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca) 676 { 677 daplka_hca_t *h; 678 679 ASSERT(mutex_owned(&dp->daplka_mutex)); 680 681 if (dp->daplka_hca_list_head == hca) 682 dp->daplka_hca_list_head = hca->hca_next; 683 else { 684 h = dp->daplka_hca_list_head; 685 while (h->hca_next != hca) 686 h = h->hca_next; 687 h->hca_next = hca->hca_next; 688 } 689 } 690 691 static int 692 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid) 693 { 694 daplka_hca_t *hca; 695 ibt_hca_portinfo_t *pinfop; 696 uint_t size; 697 int j; 698 ibt_status_t status; 699 700 hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP); 701 702 hca->hca_guid = hca_guid; 703 704 /* 705 * open the HCA for use 706 */ 707 status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl); 708 if (status != IBT_SUCCESS) { 709 if (status == IBT_HCA_IN_USE) { 710 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n"); 711 } else { 712 DERR("ibt_open_hca() returned %d\n", status); 713 } 714 kmem_free(hca, sizeof (daplka_hca_t)); 715 return (status); 716 } 717 718 /* 719 * query HCA to get its info 720 */ 721 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 722 if (status != IBT_SUCCESS) { 723 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n", 724 status, (longlong_t)hca_guid); 725 goto out; 726 } 727 728 /* 729 * query HCA to get info of all ports 730 */ 731 status = ibt_query_hca_ports(hca->hca_hdl, 732 0, &pinfop, &hca->hca_nports, &size); 733 if (status != IBT_SUCCESS) { 734 DERR("ibt_query_all_ports returned %d " 735 "(hca_guid 0x%llx)\n", status, 736 (longlong_t)hca_guid); 737 goto out; 738 } 739 hca->hca_ports = pinfop; 740 hca->hca_pinfosz = size; 741 742 DERR("hca guid 0x%llx, nports %d\n", 743 (longlong_t)hca_guid, hca->hca_nports); 744 for (j = 0; j < hca->hca_nports; j++) { 745 DERR("port %d: state %d prefix 0x%016llx " 746 "guid %016llx\n", 747 pinfop[j].p_port_num, pinfop[j].p_linkstate, 748 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix, 749 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid); 750 } 751 752 mutex_enter(&dp->daplka_mutex); 753 daplka_enqueue_hca(dp, hca); 754 mutex_exit(&dp->daplka_mutex); 755 756 return (IBT_SUCCESS); 757 758 out: 759 (void) ibt_close_hca(hca->hca_hdl); 760 kmem_free(hca, sizeof (daplka_hca_t)); 761 return (status); 762 } 763 764 /* 765 * this function obtains the list of HCAs from IBTF. 766 * the HCAs are then opened and the returned handles 767 * and attributes are stored into the global daplka_dev 768 * structure. 769 */ 770 static int 771 daplka_init_hcas(daplka_t *dp) 772 { 773 int i; 774 ib_guid_t *hca_guids; 775 uint32_t hca_count; 776 777 /* 778 * get the num & list of HCAs present 779 */ 780 hca_count = ibt_get_hca_list(&hca_guids); 781 DERR("No. of HCAs present %d\n", hca_count); 782 783 if (hca_count != 0) { 784 /* 785 * get the info for each available HCA 786 */ 787 for (i = 0; i < hca_count; i++) 788 (void) daplka_init_hca(dp, hca_guids[i]); 789 790 ibt_free_hca_list(hca_guids, hca_count); 791 } 792 793 if (dp->daplka_hca_list_head != NULL) 794 return (IBT_SUCCESS); 795 else 796 return (IBT_FAILURE); 797 } 798 799 static int 800 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca) 801 { 802 ibt_status_t status; 803 804 if (hca->hca_hdl != NULL) { 805 status = ibt_close_hca(hca->hca_hdl); 806 if (status != IBT_SUCCESS) { 807 DERR("ibt_close_hca returned %d" 808 " (hca_guid 0x%llx)\n", status, 809 (longlong_t)hca->hca_guid); 810 811 mutex_enter(&dp->daplka_mutex); 812 daplka_enqueue_hca(dp, hca); 813 mutex_exit(&dp->daplka_mutex); 814 815 return (status); 816 } 817 } 818 819 if (hca->hca_ports != NULL) 820 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 821 822 kmem_free(hca, sizeof (daplka_hca_t)); 823 return (IBT_SUCCESS); 824 } 825 826 /* 827 * closes all HCAs and frees up the HCA list 828 */ 829 static int 830 daplka_fini_hcas(daplka_t *dp) 831 { 832 ibt_status_t status; 833 daplka_hca_t *hca; 834 835 mutex_enter(&daplka_dev->daplka_mutex); 836 while ((hca = dp->daplka_hca_list_head) != NULL) { 837 if (DAPLKA_HCA_BUSY(hca)) { 838 mutex_exit(&daplka_dev->daplka_mutex); 839 return (IBT_HCA_RESOURCES_NOT_FREED); 840 } 841 daplka_dequeue_hca(daplka_dev, hca); 842 mutex_exit(&daplka_dev->daplka_mutex); 843 844 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS) 845 return (status); 846 847 mutex_enter(&daplka_dev->daplka_mutex); 848 } 849 mutex_exit(&daplka_dev->daplka_mutex); 850 851 DERR("dapl kernel agent unloaded\n"); 852 return (IBT_SUCCESS); 853 } 854 855 856 /* 857 * Attach the device, create and fill in daplka_dev 858 */ 859 static int 860 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 861 { 862 daplka_t *dp; 863 int instance, retval, err; 864 boolean_t sp_htbl_allocated = B_FALSE; 865 boolean_t timer_htbl_allocated = B_FALSE; 866 boolean_t shared_mr_tree_allocated = B_FALSE; 867 868 switch (cmd) { 869 case DDI_ATTACH: 870 break; 871 case DDI_RESUME: 872 return (DDI_SUCCESS); 873 default: 874 return (DDI_FAILURE); 875 } 876 877 /* 878 * Allocate soft data structure 879 */ 880 instance = ddi_get_instance(dip); 881 if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) { 882 DERR("attach: bad state zalloc\n"); 883 return (DDI_FAILURE); 884 } 885 886 dp = ddi_get_soft_state(daplka_state, instance); 887 if (dp == NULL) { 888 ddi_soft_state_free(daplka_state, instance); 889 DERR("attach: cannot get soft state\n"); 890 return (DDI_FAILURE); 891 } 892 /* 893 * Stuff private info into dip. 894 */ 895 dp->daplka_dip = dip; 896 ddi_set_driver_private(dip, dp); 897 daplka_dev = dp; 898 mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL); 899 900 /* 901 * Register driver with IBTF 902 */ 903 retval = ibt_attach(&daplka_clnt_modinfo, dip, dp, 904 &dp->daplka_clnt_hdl); 905 if (retval != IBT_SUCCESS) { 906 DERR("attach: ibt_attach failed: error = %d\n", retval); 907 retval = DDI_FAILURE; 908 goto error; 909 } 910 /* Register to receive SM events */ 911 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 912 daplka_sm_notice_handler, NULL); 913 914 retval = daplka_init_hcas(dp); 915 if (retval != IBT_SUCCESS) { 916 DERR("attach: hca_init failed: error = %d\n", retval); 917 retval = DDI_FAILURE; 918 goto error; 919 } 920 /* 921 * this table is used by cr_handoff 922 */ 923 retval = daplka_hash_create(&daplka_global_sp_htbl, 924 DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref, 925 daplka_hash_generic_lookup); 926 if (retval != 0) { 927 DERR("attach: cannot create sp hash table\n"); 928 retval = DDI_FAILURE; 929 goto error; 930 } 931 sp_htbl_allocated = B_TRUE; 932 933 /* 934 * this table stores per EP timer information. 935 * timer_info_t objects are inserted into this table whenever 936 * a EP timer is set. timers get removed when they expire 937 * or when they get cancelled. 938 */ 939 retval = daplka_hash_create(&daplka_timer_info_htbl, 940 DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL); 941 if (retval != 0) { 942 DERR("attach: cannot create timer hash table\n"); 943 retval = DDI_FAILURE; 944 goto error; 945 } 946 timer_htbl_allocated = B_TRUE; 947 948 /* 949 * this taskq is currently only used for processing timers. 950 * other processing may also use this taskq in the future. 951 */ 952 daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS, 953 maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC); 954 if (daplka_taskq == NULL) { 955 DERR("attach: cannot create daplka_taskq\n"); 956 retval = DDI_FAILURE; 957 goto error; 958 } 959 960 /* 961 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that 962 * gets retrieved or created when daplka_mr_register_shared is 963 * called. 964 */ 965 mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL); 966 967 avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp, 968 sizeof (daplka_shared_mr_t), 969 offsetof(daplka_shared_mr_t, smr_node)); 970 shared_mr_tree_allocated = B_TRUE; 971 972 /* 973 * Create the filesystem device node. 974 */ 975 if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR, 976 0, DDI_PSEUDO, 0) != DDI_SUCCESS) { 977 DERR("attach: bad create_minor_node\n"); 978 retval = DDI_FAILURE; 979 goto error; 980 } 981 dp->daplka_status = DAPLKA_STATE_ATTACHED; 982 ddi_report_dev(dip); 983 return (DDI_SUCCESS); 984 985 error: 986 if (shared_mr_tree_allocated) { 987 avl_destroy(&daplka_shared_mr_tree); 988 mutex_destroy(&daplka_shared_mr_lock); 989 } 990 991 if (daplka_taskq) { 992 taskq_destroy(daplka_taskq); 993 daplka_taskq = NULL; 994 } 995 996 if (timer_htbl_allocated) { 997 daplka_hash_destroy(&daplka_timer_info_htbl); 998 } 999 1000 if (sp_htbl_allocated) { 1001 daplka_hash_destroy(&daplka_global_sp_htbl); 1002 } 1003 1004 err = daplka_fini_hcas(dp); 1005 if (err != IBT_SUCCESS) { 1006 DERR("attach: hca_fini returned %d\n", err); 1007 } 1008 1009 if (dp->daplka_clnt_hdl != NULL) { 1010 /* unregister SM event notification */ 1011 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 1012 (ibt_sm_notice_handler_t)NULL, NULL); 1013 err = ibt_detach(dp->daplka_clnt_hdl); 1014 1015 if (err != IBT_SUCCESS) { 1016 DERR("attach: ibt_detach returned %d\n", err); 1017 } 1018 } 1019 mutex_destroy(&dp->daplka_mutex); 1020 1021 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) { 1022 ddi_remove_minor_node(dip, NULL); 1023 } 1024 ddi_soft_state_free(daplka_state, instance); 1025 return (retval); 1026 } 1027 1028 /* 1029 * Detach - Free resources allocated in attach 1030 */ 1031 /* ARGSUSED */ 1032 static int 1033 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1034 { 1035 int instance, err; 1036 void *cookie = NULL; 1037 daplka_t *dp; 1038 1039 if (cmd != DDI_DETACH) { 1040 return (DDI_FAILURE); 1041 } 1042 if (daplka_resource.daplka_rc_cnt > 0 || 1043 daplka_pending_close > 0) { 1044 DERR("detach: driver in use\n"); 1045 return (DDI_FAILURE); 1046 } 1047 1048 instance = ddi_get_instance(dip); 1049 dp = ddi_get_soft_state(daplka_state, instance); 1050 if (dp == NULL) { 1051 DERR("detach: cannot get soft state\n"); 1052 return (DDI_FAILURE); 1053 } 1054 err = daplka_fini_hcas(dp); 1055 if (err != IBT_SUCCESS) { 1056 DERR("detach: hca_fini returned %d\n", err); 1057 return (DDI_FAILURE); 1058 } 1059 if (dp->daplka_clnt_hdl != NULL) { 1060 /* unregister SM event notification */ 1061 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 1062 (ibt_sm_notice_handler_t)NULL, NULL); 1063 err = ibt_detach(dp->daplka_clnt_hdl); 1064 if (err != IBT_SUCCESS) { 1065 DERR("detach: ibt_detach returned %d\n", err); 1066 return (DDI_FAILURE); 1067 } 1068 dp->daplka_clnt_hdl = NULL; 1069 } 1070 mutex_destroy(&dp->daplka_mutex); 1071 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) { 1072 ddi_remove_minor_node(dip, NULL); 1073 } 1074 dp->daplka_status = DAPLKA_STATE_DETACHED; 1075 ddi_soft_state_free(daplka_state, instance); 1076 daplka_dev = NULL; 1077 1078 /* 1079 * by the time we get here, all clients of dapl should 1080 * have exited and completed their cleanup properly. 1081 * we can assert that all global data structures are now 1082 * empty. 1083 */ 1084 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL); 1085 avl_destroy(&daplka_shared_mr_tree); 1086 mutex_destroy(&daplka_shared_mr_lock); 1087 1088 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0); 1089 daplka_hash_destroy(&daplka_timer_info_htbl); 1090 1091 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0); 1092 daplka_hash_destroy(&daplka_global_sp_htbl); 1093 1094 taskq_destroy(daplka_taskq); 1095 1096 return (DDI_SUCCESS); 1097 } 1098 1099 /* ARGSUSED */ 1100 static int 1101 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1102 { 1103 switch (infocmd) { 1104 case DDI_INFO_DEVT2DEVINFO: 1105 if (daplka_dev != NULL) { 1106 *result = daplka_dev->daplka_dip; 1107 return (DDI_SUCCESS); 1108 } else { 1109 return (DDI_FAILURE); 1110 } 1111 1112 case DDI_INFO_DEVT2INSTANCE: 1113 *result = 0; 1114 return (DDI_SUCCESS); 1115 1116 default: 1117 return (DDI_FAILURE); 1118 } 1119 } 1120 1121 /* 1122 * creates a EP resource. 1123 * A EP resource contains a RC channel. A EP resource holds a 1124 * reference to a send_evd (for the send CQ), recv_evd (for the 1125 * recv CQ), a connection evd and a PD. These references ensure 1126 * that the referenced resources are not freed until the EP itself 1127 * gets freed. 1128 */ 1129 /* ARGSUSED */ 1130 static int 1131 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1132 cred_t *cred, int *rvalp) 1133 { 1134 daplka_ep_resource_t *ep_rp; 1135 daplka_pd_resource_t *pd_rp; 1136 dapl_ep_create_t args; 1137 ibt_rc_chan_alloc_args_t chan_args; 1138 ibt_chan_alloc_flags_t achan_flags; 1139 ibt_chan_sizes_t chan_real_sizes; 1140 ibt_hca_attr_t *hca_attrp; 1141 uint64_t ep_hkey = 0; 1142 boolean_t inserted = B_FALSE; 1143 uint32_t old_state, new_state; 1144 int retval; 1145 ibt_status_t status; 1146 1147 D3("ep_create: enter\n"); 1148 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t), 1149 mode); 1150 if (retval != 0) { 1151 DERR("ep_create: copyin error %d\n", retval); 1152 return (EFAULT); 1153 } 1154 ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags); 1155 if (ep_rp == NULL) { 1156 DERR("ep_create: cannot allocate ep_rp\n"); 1157 return (ENOMEM); 1158 } 1159 DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP, 1160 DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy); 1161 1162 mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL); 1163 cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL); 1164 ep_rp->ep_hca = ia_rp->ia_hca; 1165 ep_rp->ep_cookie = args.ep_cookie; 1166 ep_rp->ep_timer_hkey = 0; 1167 1168 /* 1169 * we don't have to use ep_get_state here because ep_rp is not in 1170 * ep_htbl yet. refer to the description of daplka_ep_set_state 1171 * for details about the EP state machine. 1172 */ 1173 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING; 1174 new_state = old_state = DAPLKA_EP_STATE_CLOSED; 1175 1176 /* get reference to send evd and get cq handle */ 1177 ep_rp->ep_snd_evd = (daplka_evd_resource_t *) 1178 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey); 1179 if (ep_rp->ep_snd_evd == NULL) { 1180 DERR("ep_create: ep_snd_evd %llx not found\n", 1181 args.ep_snd_evd_hkey); 1182 retval = EINVAL; 1183 goto cleanup; 1184 } 1185 chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl; 1186 if (chan_args.rc_scq == NULL) { 1187 DERR("ep_create: ep_snd_evd cq invalid\n"); 1188 retval = EINVAL; 1189 goto cleanup; 1190 } 1191 1192 /* get reference to recv evd and get cq handle */ 1193 ep_rp->ep_rcv_evd = (daplka_evd_resource_t *) 1194 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey); 1195 if (ep_rp->ep_rcv_evd == NULL) { 1196 DERR("ep_create: ep_rcv_evd %llx not found\n", 1197 args.ep_rcv_evd_hkey); 1198 retval = EINVAL; 1199 goto cleanup; 1200 } 1201 chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl; 1202 if (chan_args.rc_rcq == NULL) { 1203 DERR("ep_create: ep_rcv_evd cq invalid\n"); 1204 retval = EINVAL; 1205 goto cleanup; 1206 } 1207 1208 /* get reference to conn evd */ 1209 ep_rp->ep_conn_evd = (daplka_evd_resource_t *) 1210 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey); 1211 if (ep_rp->ep_conn_evd == NULL) { 1212 DERR("ep_create: ep_conn_evd %llx not found\n", 1213 args.ep_conn_evd_hkey); 1214 retval = EINVAL; 1215 goto cleanup; 1216 } 1217 1218 /* get reference to SRQ if needed */ 1219 if (args.ep_srq_attached) { 1220 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup( 1221 &ia_rp->ia_srq_htbl, args.ep_srq_hkey); 1222 if (ep_rp->ep_srq_res == NULL) { 1223 DERR("ep_create: ep_srq %llx not found\n", 1224 (longlong_t)args.ep_srq_hkey); 1225 retval = EINVAL; 1226 goto cleanup; 1227 } 1228 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ); 1229 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res, 1230 (longlong_t)args.ep_srq_hkey); 1231 } else { 1232 ep_rp->ep_srq_res = NULL; 1233 } 1234 1235 /* get pd handle */ 1236 pd_rp = (daplka_pd_resource_t *) 1237 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey); 1238 if (pd_rp == NULL) { 1239 DERR("ep_create: cannot find pd resource\n"); 1240 retval = EINVAL; 1241 goto cleanup; 1242 } 1243 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 1244 ep_rp->ep_pd_res = pd_rp; 1245 chan_args.rc_pd = pd_rp->pd_hdl; 1246 1247 1248 /* 1249 * these checks ensure that the requested channel sizes 1250 * are within the limits supported by the chosen HCA. 1251 */ 1252 hca_attrp = &ia_rp->ia_hca->hca_attr; 1253 if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) { 1254 DERR("ep_create: invalid cs_sq_sgl %d\n", 1255 args.ep_ch_sizes.dcs_sq_sgl); 1256 retval = EINVAL; 1257 goto cleanup; 1258 } 1259 if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) { 1260 DERR("ep_create: invalid cs_rq_sgl %d\n", 1261 args.ep_ch_sizes.dcs_rq_sgl); 1262 retval = EINVAL; 1263 goto cleanup; 1264 } 1265 if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) { 1266 DERR("ep_create: invalid cs_sq %d\n", 1267 args.ep_ch_sizes.dcs_sq); 1268 retval = EINVAL; 1269 goto cleanup; 1270 } 1271 if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) { 1272 DERR("ep_create: invalid cs_rq %d\n", 1273 args.ep_ch_sizes.dcs_rq); 1274 retval = EINVAL; 1275 goto cleanup; 1276 } 1277 1278 chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl; 1279 chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl; 1280 chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq; 1281 chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq; 1282 chan_args.rc_flags = IBT_WR_SIGNALED; 1283 chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1284 chan_args.rc_hca_port_num = ia_rp->ia_port_num; 1285 chan_args.rc_clone_chan = NULL; 1286 if (args.ep_srq_attached) { 1287 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl; 1288 } else { 1289 chan_args.rc_srq = NULL; 1290 } 1291 1292 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, " 1293 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n", 1294 args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl, 1295 args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq, 1296 chan_args.rc_flags, chan_args.rc_control, 1297 chan_args.rc_hca_port_num, chan_args.rc_clone_chan); 1298 1299 if (args.ep_srq_attached) { 1300 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ; 1301 } else { 1302 achan_flags = IBT_ACHAN_USER_MAP; 1303 } 1304 /* create rc channel */ 1305 status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl, 1306 achan_flags, &chan_args, &ep_rp->ep_chan_hdl, 1307 &chan_real_sizes); 1308 if (status != IBT_SUCCESS) { 1309 DERR("ep_create: alloc_rc_channel returned %d\n", status); 1310 *rvalp = (int)status; 1311 retval = 0; 1312 goto cleanup; 1313 } 1314 1315 args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq; 1316 args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq; 1317 args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl; 1318 args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl; 1319 1320 /* 1321 * store ep ptr with chan_hdl. 1322 * this ep_ptr is used by the CM handlers (both active and 1323 * passive) 1324 * mutex is only needed for race of "destroy" and "async" 1325 */ 1326 mutex_enter(&daplka_dev->daplka_mutex); 1327 ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp); 1328 mutex_exit(&daplka_dev->daplka_mutex); 1329 1330 /* Get HCA-specific data_out info */ 1331 status = ibt_ci_data_out(ia_rp->ia_hca_hdl, 1332 IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl, 1333 &args.ep_qp_data_out, sizeof (args.ep_qp_data_out)); 1334 1335 if (status != IBT_SUCCESS) { 1336 DERR("ep_create: ibt_ci_data_out error(%d)\n", 1337 status); 1338 *rvalp = (int)status; 1339 retval = 0; 1340 goto cleanup; 1341 } 1342 1343 /* insert into ep hash table */ 1344 retval = daplka_hash_insert(&ia_rp->ia_ep_htbl, 1345 &ep_hkey, (void *)ep_rp); 1346 if (retval != 0) { 1347 DERR("ep_create: cannot insert ep resource into ep_htbl\n"); 1348 goto cleanup; 1349 } 1350 inserted = B_TRUE; 1351 1352 /* 1353 * at this point, the ep_rp can be looked up by other threads 1354 * if they manage to guess the correct hkey. but they are not 1355 * permitted to operate on ep_rp until we transition to the 1356 * CLOSED state. 1357 */ 1358 1359 /* return hkey to library */ 1360 args.ep_hkey = ep_hkey; 1361 1362 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t), 1363 mode); 1364 if (retval != 0) { 1365 DERR("ep_create: copyout error %d\n", retval); 1366 retval = EFAULT; 1367 goto cleanup; 1368 } 1369 1370 daplka_ep_set_state(ep_rp, old_state, new_state); 1371 D3("ep_create: exit\n"); 1372 return (0); 1373 1374 cleanup: 1375 if (inserted) { 1376 daplka_ep_resource_t *free_rp = NULL; 1377 1378 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey, 1379 (void **)&free_rp); 1380 if (free_rp != ep_rp) { 1381 /* 1382 * this case is impossible because ep_free will 1383 * wait until our state transition is complete. 1384 */ 1385 DERR("ep_create: cannot remove ep from hash table\n"); 1386 ASSERT(B_FALSE); 1387 return (retval); 1388 } 1389 } 1390 new_state = DAPLKA_EP_STATE_FREED; 1391 daplka_ep_set_state(ep_rp, old_state, new_state); 1392 DAPLKA_RS_UNREF(ep_rp); 1393 return (retval); 1394 } 1395 1396 /* 1397 * daplka_ep_get_state retrieves the current state of the EP and 1398 * sets the state to TRANSITIONING. if the current state is already 1399 * TRANSITIONING, this function will wait until the state becomes one 1400 * of the other EP states. Most of the EP related ioctls follow the 1401 * call sequence: 1402 * 1403 * new_state = old_state = daplka_ep_get_state(ep_rp); 1404 * ... 1405 * ...some code that affects the EP 1406 * ... 1407 * new_state = <NEW_STATE>; 1408 * daplka_ep_set_state(ep_rp, old_state, new_state); 1409 * 1410 * this call sequence ensures that only one thread may access the EP 1411 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state 1412 * transitions ep_state to new_state and wakes up any waiters blocking 1413 * on ep_cv. 1414 * 1415 */ 1416 static uint32_t 1417 daplka_ep_get_state(daplka_ep_resource_t *ep_rp) 1418 { 1419 uint32_t old_state = 0; 1420 1421 mutex_enter(&ep_rp->ep_lock); 1422 while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) { 1423 D2("get_state: wait for state transition to complete\n"); 1424 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock); 1425 D2("get_state: done, curr state = %d\n", ep_rp->ep_state); 1426 } 1427 ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING); 1428 old_state = ep_rp->ep_state; 1429 1430 /* 1431 * an ep that is in the FREED state cannot transition 1432 * back to any of the regular states 1433 */ 1434 if (old_state != DAPLKA_EP_STATE_FREED) { 1435 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING; 1436 } 1437 mutex_exit(&ep_rp->ep_lock); 1438 return (old_state); 1439 } 1440 1441 /* 1442 * EP state transition diagram 1443 * 1444 * CLOSED<------------------- 1445 * | | 1446 * | | 1447 * ------------------------ | 1448 * | | | 1449 * | | | 1450 * v v | 1451 * CONNECTING ACCEPTING | 1452 * | | | | | | 1453 * | | | | | | 1454 * | | | | | | 1455 * | | |_______|_______| | 1456 * | | | | | | 1457 * | |___________| | | | 1458 * | | | | | 1459 * | v | |---->DISCONNECTED 1460 * | CONNECTED | ^ 1461 * v | | | 1462 * ABORTING |---------|--------------| 1463 * | | | | 1464 * | | v | 1465 * | |-------->DISCONNECTING--| 1466 * | | 1467 * |---------------------------------| 1468 * 1469 * *not shown in this diagram: 1470 * -loopback transitions 1471 * -transitions to the FREED state 1472 */ 1473 static boolean_t 1474 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state) 1475 { 1476 boolean_t valid = B_FALSE; 1477 1478 /* 1479 * reseting to the same state is a no-op and is always 1480 * permitted. transitioning to the FREED state indicates 1481 * that the ep is about to be freed and no further operation 1482 * is allowed on it. to support abrupt close, the ep is 1483 * permitted to transition to the FREED state from any state. 1484 */ 1485 if (old_state == new_state || 1486 new_state == DAPLKA_EP_STATE_FREED) { 1487 return (B_TRUE); 1488 } 1489 1490 switch (old_state) { 1491 case DAPLKA_EP_STATE_CLOSED: 1492 /* 1493 * this is the initial ep_state. 1494 * a transition to CONNECTING or ACCEPTING may occur 1495 * upon calling daplka_ep_connect or daplka_cr_accept, 1496 * respectively. 1497 */ 1498 if (new_state == DAPLKA_EP_STATE_CONNECTING || 1499 new_state == DAPLKA_EP_STATE_ACCEPTING) { 1500 valid = B_TRUE; 1501 } 1502 break; 1503 case DAPLKA_EP_STATE_CONNECTING: 1504 /* 1505 * we transition to this state if daplka_ep_connect 1506 * is successful. from this state, we can transition 1507 * to CONNECTED if daplka_cm_rc_conn_est gets called; 1508 * or to DISCONNECTED if daplka_cm_rc_conn_closed or 1509 * daplka_cm_rc_event_failure gets called. If the 1510 * client calls daplka_ep_disconnect, we transition 1511 * to DISCONNECTING. If a timer was set at ep_connect 1512 * time and if the timer expires prior to any of the 1513 * CM callbacks, we transition to ABORTING and then 1514 * to DISCONNECTED. 1515 */ 1516 if (new_state == DAPLKA_EP_STATE_CONNECTED || 1517 new_state == DAPLKA_EP_STATE_DISCONNECTING || 1518 new_state == DAPLKA_EP_STATE_DISCONNECTED || 1519 new_state == DAPLKA_EP_STATE_ABORTING) { 1520 valid = B_TRUE; 1521 } 1522 break; 1523 case DAPLKA_EP_STATE_ACCEPTING: 1524 /* 1525 * we transition to this state if daplka_cr_accept 1526 * is successful. from this state, we can transition 1527 * to CONNECTED if daplka_cm_service_conn_est gets called; 1528 * or to DISCONNECTED if daplka_cm_service_conn_closed or 1529 * daplka_cm_service_event_failure gets called. If the 1530 * client calls daplka_ep_disconnect, we transition to 1531 * DISCONNECTING. 1532 */ 1533 if (new_state == DAPLKA_EP_STATE_CONNECTED || 1534 new_state == DAPLKA_EP_STATE_DISCONNECTING || 1535 new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1536 valid = B_TRUE; 1537 } 1538 break; 1539 case DAPLKA_EP_STATE_CONNECTED: 1540 /* 1541 * we transition to this state if a active or passive 1542 * connection gets established. if the client calls 1543 * daplka_ep_disconnect, we transition to the 1544 * DISCONNECTING state. subsequent CM callbacks will 1545 * cause ep_state to be set to DISCONNECTED. If the 1546 * remote peer terminates the connection before we do, 1547 * it is possible for us to transition directly from 1548 * CONNECTED to DISCONNECTED. 1549 */ 1550 if (new_state == DAPLKA_EP_STATE_DISCONNECTING || 1551 new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1552 valid = B_TRUE; 1553 } 1554 break; 1555 case DAPLKA_EP_STATE_DISCONNECTING: 1556 /* 1557 * we transition to this state if the client calls 1558 * daplka_ep_disconnect. 1559 */ 1560 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1561 valid = B_TRUE; 1562 } 1563 break; 1564 case DAPLKA_EP_STATE_ABORTING: 1565 /* 1566 * we transition to this state if the active side 1567 * EP timer has expired. this is only a transient 1568 * state that is set during timer processing. when 1569 * timer processing completes, ep_state will become 1570 * DISCONNECTED. 1571 */ 1572 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1573 valid = B_TRUE; 1574 } 1575 break; 1576 case DAPLKA_EP_STATE_DISCONNECTED: 1577 /* 1578 * we transition to this state if we get a closed 1579 * or event_failure CM callback. an expired timer 1580 * can also cause us to be in this state. this 1581 * is the only state in which we permit the 1582 * ep_reinit operation. 1583 */ 1584 if (new_state == DAPLKA_EP_STATE_CLOSED) { 1585 valid = B_TRUE; 1586 } 1587 break; 1588 default: 1589 break; 1590 } 1591 1592 if (!valid) { 1593 DERR("ep_transition: invalid state change %d -> %d\n", 1594 old_state, new_state); 1595 } 1596 return (valid); 1597 } 1598 1599 /* 1600 * first check if the transition is valid. then set ep_state 1601 * to new_state and wake up all waiters. 1602 */ 1603 static void 1604 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state, 1605 uint32_t new_state) 1606 { 1607 boolean_t valid; 1608 1609 ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING); 1610 1611 valid = daplka_ep_transition_is_valid(old_state, new_state); 1612 mutex_enter(&ep_rp->ep_lock); 1613 if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) { 1614 if (valid) { 1615 ep_rp->ep_state = new_state; 1616 } else { 1617 /* 1618 * this case is impossible. 1619 * we have a serious problem if we get here. 1620 * instead of panicing, we reset the state to 1621 * old_state. doing this would at least prevent 1622 * threads from hanging due to ep_state being 1623 * stuck in TRANSITIONING. 1624 */ 1625 ep_rp->ep_state = old_state; 1626 ASSERT(B_FALSE); 1627 } 1628 } 1629 cv_broadcast(&ep_rp->ep_cv); 1630 mutex_exit(&ep_rp->ep_lock); 1631 } 1632 1633 /* 1634 * modifies RC channel attributes. 1635 * currently, only the rdma_in and rdma_out attributes may 1636 * be modified. the channel must be in quiescent state when 1637 * this function is called. 1638 */ 1639 /* ARGSUSED */ 1640 static int 1641 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1642 cred_t *cred, int *rvalp) 1643 { 1644 daplka_ep_resource_t *ep_rp = NULL; 1645 ibt_cep_modify_flags_t good_flags; 1646 ibt_rc_chan_modify_attr_t rcm_attr; 1647 ibt_hca_attr_t *hca_attrp; 1648 dapl_ep_modify_t args; 1649 ibt_status_t status; 1650 uint32_t old_state, new_state; 1651 int retval = 0; 1652 1653 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t), 1654 mode); 1655 if (retval != 0) { 1656 DERR("ep_modify: copyin error %d\n", retval); 1657 return (EFAULT); 1658 } 1659 ep_rp = (daplka_ep_resource_t *) 1660 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey); 1661 if (ep_rp == NULL) { 1662 DERR("ep_modify: cannot find ep resource\n"); 1663 return (EINVAL); 1664 } 1665 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 1666 new_state = old_state = daplka_ep_get_state(ep_rp); 1667 1668 if (old_state != DAPLKA_EP_STATE_CLOSED && 1669 old_state != DAPLKA_EP_STATE_DISCONNECTED) { 1670 DERR("ep_modify: invalid state %d\n", old_state); 1671 retval = EINVAL; 1672 goto cleanup; 1673 } 1674 1675 good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN; 1676 if ((args.epm_flags & ~good_flags) != 0) { 1677 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags); 1678 retval = EINVAL; 1679 goto cleanup; 1680 } 1681 1682 hca_attrp = &ia_rp->ia_hca->hca_attr; 1683 1684 bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t)); 1685 if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) { 1686 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) { 1687 DERR("ep_modify: invalid epm_rdma_ra_out %d\n", 1688 args.epm_rdma_ra_out); 1689 retval = EINVAL; 1690 goto cleanup; 1691 } 1692 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out; 1693 } 1694 if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) { 1695 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) { 1696 DERR("ep_modify: epm_rdma_ra_in %d\n", 1697 args.epm_rdma_ra_in); 1698 retval = EINVAL; 1699 goto cleanup; 1700 } 1701 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in; 1702 } 1703 status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags, 1704 &rcm_attr, NULL); 1705 if (status != IBT_SUCCESS) { 1706 DERR("ep_modify: modify_rc_channel returned %d\n", status); 1707 *rvalp = (int)status; 1708 retval = 0; 1709 goto cleanup; 1710 } 1711 1712 /* 1713 * ep_modify does not change ep_state 1714 */ 1715 cleanup:; 1716 daplka_ep_set_state(ep_rp, old_state, new_state); 1717 DAPLKA_RS_UNREF(ep_rp); 1718 return (retval); 1719 } 1720 1721 /* 1722 * Frees a EP resource. 1723 * a EP may only be freed when it is in the CLOSED or 1724 * DISCONNECTED state. 1725 */ 1726 /* ARGSUSED */ 1727 static int 1728 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1729 cred_t *cred, int *rvalp) 1730 { 1731 daplka_ep_resource_t *ep_rp = NULL; 1732 dapl_ep_free_t args; 1733 uint32_t old_state, new_state; 1734 int retval; 1735 1736 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode); 1737 if (retval != 0) { 1738 DERR("ep_free: copyin error %d\n", retval); 1739 return (EFAULT); 1740 } 1741 ep_rp = (daplka_ep_resource_t *) 1742 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey); 1743 if (ep_rp == NULL) { 1744 DERR("ep_free: cannot find ep resource\n"); 1745 return (EINVAL); 1746 } 1747 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 1748 new_state = old_state = daplka_ep_get_state(ep_rp); 1749 1750 /* 1751 * ep cannot be freed if it is in an invalid state. 1752 */ 1753 if (old_state != DAPLKA_EP_STATE_CLOSED && 1754 old_state != DAPLKA_EP_STATE_DISCONNECTED) { 1755 DERR("ep_free: invalid state %d\n", old_state); 1756 retval = EINVAL; 1757 goto cleanup; 1758 } 1759 ep_rp = NULL; 1760 retval = daplka_hash_remove(&ia_rp->ia_ep_htbl, 1761 args.epf_hkey, (void **)&ep_rp); 1762 if (retval != 0 || ep_rp == NULL) { 1763 /* 1764 * this is only possible if we have two threads 1765 * calling ep_free in parallel. 1766 */ 1767 DERR("ep_free: cannot find ep resource\n"); 1768 goto cleanup; 1769 } 1770 /* there should not be any outstanding timers */ 1771 ASSERT(ep_rp->ep_timer_hkey == 0); 1772 1773 new_state = DAPLKA_EP_STATE_FREED; 1774 daplka_ep_set_state(ep_rp, old_state, new_state); 1775 1776 /* remove reference obtained by lookup */ 1777 DAPLKA_RS_UNREF(ep_rp); 1778 1779 /* UNREF calls the actual free function when refcnt is zero */ 1780 DAPLKA_RS_UNREF(ep_rp); 1781 return (0); 1782 1783 cleanup:; 1784 daplka_ep_set_state(ep_rp, old_state, new_state); 1785 1786 /* remove reference obtained by lookup */ 1787 DAPLKA_RS_UNREF(ep_rp); 1788 return (retval); 1789 } 1790 1791 /* 1792 * The following routines supports the timeout feature of ep_connect. 1793 * Refer to the description of ep_connect for details. 1794 */ 1795 1796 /* 1797 * this is the timer processing thread. 1798 */ 1799 static void 1800 daplka_timer_thread(void *arg) 1801 { 1802 daplka_timer_info_t *timerp = (daplka_timer_info_t *)arg; 1803 daplka_ep_resource_t *ep_rp; 1804 daplka_evd_event_t *disc_ev = NULL; 1805 ibt_status_t status; 1806 int old_state, new_state; 1807 1808 ep_rp = timerp->ti_ep_res; 1809 ASSERT(ep_rp != NULL); 1810 ASSERT(timerp->ti_tmo_id != 0); 1811 timerp->ti_tmo_id = 0; 1812 1813 new_state = old_state = daplka_ep_get_state(ep_rp); 1814 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 1815 /* unblock hash_ep_free */ 1816 mutex_enter(&ep_rp->ep_lock); 1817 ASSERT(ep_rp->ep_timer_hkey != 0); 1818 ep_rp->ep_timer_hkey = 0; 1819 cv_broadcast(&ep_rp->ep_cv); 1820 mutex_exit(&ep_rp->ep_lock); 1821 1822 /* reset state to original state */ 1823 daplka_ep_set_state(ep_rp, old_state, new_state); 1824 1825 /* this function will also unref ep_rp */ 1826 daplka_timer_info_free(timerp); 1827 return; 1828 } 1829 1830 ASSERT(ep_rp->ep_timer_hkey != 0); 1831 ep_rp->ep_timer_hkey = 0; 1832 1833 /* 1834 * we cannot keep ep_state in TRANSITIONING if we call 1835 * ibt_close_rc_channel in blocking mode. this would cause 1836 * a deadlock because the cm callbacks will be blocked and 1837 * will not be able to wake us up. 1838 */ 1839 new_state = DAPLKA_EP_STATE_ABORTING; 1840 daplka_ep_set_state(ep_rp, old_state, new_state); 1841 1842 /* 1843 * when we return from close_rc_channel, all callbacks should have 1844 * completed. we can also be certain that these callbacks did not 1845 * enqueue any events to conn_evd. 1846 */ 1847 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING, 1848 NULL, 0, NULL, NULL, NULL); 1849 if (status != IBT_SUCCESS) { 1850 DERR("timer_thread: ibt_close_rc_channel returned %d\n", 1851 status); 1852 } 1853 old_state = daplka_ep_get_state(ep_rp); 1854 1855 /* 1856 * this is the only thread that can transition ep_state out 1857 * of ABORTING. all other ep operations would fail when 1858 * ep_state is in ABORTING. 1859 */ 1860 ASSERT(old_state == DAPLKA_EP_STATE_ABORTING); 1861 1862 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP); 1863 ASSERT(disc_ev != NULL); 1864 1865 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 1866 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 1867 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 1868 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 1869 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 1870 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 1871 1872 D2("timer_thread: enqueue event(%p) evdp(%p)\n", 1873 disc_ev, ep_rp->ep_conn_evd); 1874 1875 new_state = DAPLKA_EP_STATE_DISCONNECTED; 1876 daplka_ep_set_state(ep_rp, old_state, new_state); 1877 1878 daplka_evd_wakeup(ep_rp->ep_conn_evd, 1879 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 1880 1881 /* this function will also unref ep_rp */ 1882 daplka_timer_info_free(timerp); 1883 } 1884 1885 /* 1886 * dispatches a thread to continue with timer processing. 1887 */ 1888 static void 1889 daplka_timer_dispatch(void *arg) 1890 { 1891 /* 1892 * keep rescheduling this function until 1893 * taskq_dispatch succeeds. 1894 */ 1895 if (taskq_dispatch(daplka_taskq, 1896 daplka_timer_thread, arg, TQ_NOSLEEP) == TASKQID_INVALID) { 1897 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n"); 1898 (void) timeout(daplka_timer_dispatch, arg, 10); 1899 } 1900 } 1901 1902 /* 1903 * this function is called by the kernel's callout thread. 1904 * we first attempt to remove the timer object from the 1905 * global timer table. if it is found, we dispatch a thread 1906 * to continue processing the timer object. if it is not 1907 * found, that means the timer has been cancelled by someone 1908 * else. 1909 */ 1910 static void 1911 daplka_timer_handler(void *arg) 1912 { 1913 uint64_t timer_hkey = (uintptr_t)arg; 1914 daplka_timer_info_t *timerp = NULL; 1915 1916 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey); 1917 1918 (void) daplka_hash_remove(&daplka_timer_info_htbl, 1919 timer_hkey, (void **)&timerp); 1920 if (timerp == NULL) { 1921 D2("timer_handler: timer already cancelled\n"); 1922 return; 1923 } 1924 daplka_timer_dispatch((void *)timerp); 1925 } 1926 1927 /* 1928 * allocates a timer_info object. 1929 * a reference to a EP is held by this object. this ensures 1930 * that the EP stays valid when a timer is outstanding. 1931 */ 1932 static daplka_timer_info_t * 1933 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp) 1934 { 1935 daplka_timer_info_t *timerp; 1936 1937 timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags); 1938 if (timerp == NULL) { 1939 DERR("timer_info_alloc: cannot allocate timer info\n"); 1940 return (NULL); 1941 } 1942 timerp->ti_ep_res = ep_rp; 1943 timerp->ti_tmo_id = 0; 1944 1945 return (timerp); 1946 } 1947 1948 /* 1949 * Frees the timer_info object. 1950 * we release the EP reference before freeing the object. 1951 */ 1952 static void 1953 daplka_timer_info_free(daplka_timer_info_t *timerp) 1954 { 1955 ASSERT(timerp->ti_ep_res != NULL); 1956 DAPLKA_RS_UNREF(timerp->ti_ep_res); 1957 timerp->ti_ep_res = NULL; 1958 ASSERT(timerp->ti_tmo_id == 0); 1959 kmem_free(timerp, sizeof (*timerp)); 1960 } 1961 1962 /* 1963 * cancels the timer set by ep_connect. 1964 * returns -1 if timer handling is in progress 1965 * and 0 otherwise. 1966 */ 1967 static int 1968 daplka_cancel_timer(daplka_ep_resource_t *ep_rp) 1969 { 1970 /* 1971 * this function can only be called when ep_state 1972 * is frozen. 1973 */ 1974 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING); 1975 if (ep_rp->ep_timer_hkey != 0) { 1976 daplka_timer_info_t *timerp = NULL; 1977 1978 (void) daplka_hash_remove(&daplka_timer_info_htbl, 1979 ep_rp->ep_timer_hkey, (void **)&timerp); 1980 if (timerp == NULL) { 1981 /* 1982 * this is possible if the timer_handler has 1983 * removed the timerp but the taskq thread has 1984 * not transitioned the ep_state to DISCONNECTED. 1985 * we need to reset the ep_state to allow the 1986 * taskq thread to continue with its work. the 1987 * taskq thread will set the ep_timer_hkey to 0 1988 * so we don't have to do it here. 1989 */ 1990 DERR("cancel_timer: timer is being processed\n"); 1991 return (-1); 1992 } 1993 /* 1994 * we got the timer object. if the handler fires at 1995 * this point, it will not be able to find the object 1996 * and will return immediately. normally, ti_tmo_id gets 1997 * cleared when the handler fires. 1998 */ 1999 ASSERT(timerp->ti_tmo_id != 0); 2000 2001 /* 2002 * note that untimeout can possibly call the handler. 2003 * we are safe because the handler will be a no-op. 2004 */ 2005 (void) untimeout(timerp->ti_tmo_id); 2006 timerp->ti_tmo_id = 0; 2007 daplka_timer_info_free(timerp); 2008 ep_rp->ep_timer_hkey = 0; 2009 } 2010 return (0); 2011 } 2012 2013 /* 2014 * this function is called by daplka_hash_destroy for 2015 * freeing timer_info objects 2016 */ 2017 static void 2018 daplka_hash_timer_free(void *obj) 2019 { 2020 daplka_timer_info_free((daplka_timer_info_t *)obj); 2021 } 2022 2023 /* ARGSUSED */ 2024 static uint16_t 2025 daplka_hellomsg_cksum(DAPL_PRIVATE *dp) 2026 { 2027 uint8_t *bp; 2028 int i; 2029 uint16_t cksum = 0; 2030 2031 bp = (uint8_t *)dp; 2032 for (i = 0; i < sizeof (DAPL_PRIVATE); i++) { 2033 cksum += bp[i]; 2034 } 2035 return (cksum); 2036 } 2037 2038 /* 2039 * ep_connect is called by the client to initiate a connection to a 2040 * remote service point. It is a non-blocking call. If a non-zero 2041 * timeout is specified by the client, a timer will be set just before 2042 * returning from ep_connect. Upon a successful return from ep_connect, 2043 * the client will call evd_wait to wait for the connection to complete. 2044 * If the connection is rejected or has failed due to an error, the 2045 * client will be notified with an event containing the appropriate error 2046 * code. If the connection is accepted, the client will be notified with 2047 * the CONN_ESTABLISHED event. If the timer expires before either of the 2048 * above events (error or established), a TIMED_OUT event will be delivered 2049 * to the client. 2050 * 2051 * the complicated part of the timer logic is the handling of race 2052 * conditions with CM callbacks. we need to ensure that either the CM or 2053 * the timer thread gets to deliver an event, but not both. when the 2054 * CM callback is about to deliver an event, it always tries to cancel 2055 * the outstanding timer. if cancel_timer indicates a that the timer is 2056 * already being processed, the CM callback will simply return without 2057 * delivering an event. when the timer thread executes, it tries to check 2058 * if the EP is still in CONNECTING state (timers only work on the active 2059 * side). if the EP is not in this state, the timer thread will return 2060 * without delivering an event. 2061 */ 2062 /* ARGSUSED */ 2063 static int 2064 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2065 cred_t *cred, int *rvalp) 2066 { 2067 daplka_ep_resource_t *ep_rp = NULL; 2068 dapl_ep_connect_t args; 2069 daplka_timer_info_t *timerp = NULL; 2070 uint32_t old_state, new_state; 2071 boolean_t timer_inserted = B_FALSE; 2072 uint64_t timer_hkey = 0; 2073 ibt_path_info_t path_info; 2074 ibt_path_attr_t path_attr; 2075 ibt_hca_attr_t *hca_attrp; 2076 ibt_chan_open_args_t chan_args; 2077 ibt_status_t status = IBT_SUCCESS; 2078 uint8_t num_paths; 2079 void *priv_data; 2080 DAPL_PRIVATE *dp; 2081 int retval = 0; 2082 ib_gid_t *sgid; 2083 ib_gid_t *dgid; 2084 uint64_t dgid_ored; 2085 ibt_ar_t ar_query_s; 2086 ibt_ar_t ar_result_s; 2087 ibt_path_flags_t pathflags; 2088 2089 D3("ep_connect: enter\n"); 2090 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t), 2091 mode); 2092 if (retval != 0) { 2093 DERR("ep_connect: copyin error %d\n", retval); 2094 return (EFAULT); 2095 } 2096 ep_rp = (daplka_ep_resource_t *) 2097 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey); 2098 if (ep_rp == NULL) { 2099 DERR("ep_connect: cannot find ep resource\n"); 2100 return (EINVAL); 2101 } 2102 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2103 2104 new_state = old_state = daplka_ep_get_state(ep_rp); 2105 if (old_state != DAPLKA_EP_STATE_CLOSED) { 2106 DERR("ep_connect: invalid state %d\n", old_state); 2107 retval = EINVAL; 2108 goto cleanup; 2109 } 2110 if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) { 2111 DERR("ep_connect: private data len (%d) exceeded " 2112 "max size %d\n", args.epc_priv_sz, 2113 DAPL_MAX_PRIVATE_DATA_SIZE); 2114 retval = EINVAL; 2115 goto cleanup; 2116 } 2117 2118 /* 2119 * check for remote ipaddress to dgid resolution needs ATS 2120 */ 2121 dgid = &args.epc_dgid; 2122 dgid_ored = dgid->gid_guid | dgid->gid_prefix; 2123 #if defined(DAPLKA_DEBUG_FORCE_ATS) 2124 dgid_ored = 0ULL; 2125 #endif /* DAPLKA_DEBUG_FORCE_ATS */ 2126 /* check for unidentified dgid */ 2127 if (dgid_ored == 0ULL) { 2128 /* 2129 * setup for ibt_query_ar() 2130 */ 2131 sgid = &ia_rp->ia_hca_sgid; 2132 ar_query_s.ar_gid.gid_guid = 0ULL; 2133 ar_query_s.ar_gid.gid_prefix = 0ULL; 2134 ar_query_s.ar_pkey = 0; 2135 bcopy(args.epc_raddr_sadata.iad_sadata, 2136 ar_query_s.ar_data, DAPL_ATS_NBYTES); 2137 #define UR(b) ar_query_s.ar_data[(b)] 2138 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n", 2139 UR(8), UR(9), UR(10), UR(11)); 2140 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n", 2141 UR(12), UR(13), UR(14), UR(15)); 2142 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s); 2143 if (status != IBT_SUCCESS) { 2144 DERR("ep_connect: ibt_query_ar returned %d\n", status); 2145 *rvalp = (int)status; 2146 retval = 0; 2147 goto cleanup; 2148 } 2149 /* 2150 * dgid identified from SA record 2151 */ 2152 dgid = &ar_result_s.ar_gid; 2153 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n", 2154 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid); 2155 } 2156 2157 bzero(&path_info, sizeof (ibt_path_info_t)); 2158 bzero(&path_attr, sizeof (ibt_path_attr_t)); 2159 bzero(&chan_args, sizeof (ibt_chan_open_args_t)); 2160 2161 path_attr.pa_dgids = dgid; 2162 path_attr.pa_num_dgids = 1; 2163 /* 2164 * don't set sid in path_attr saves 1 SA query 2165 * Also makes server side not to write the service record 2166 */ 2167 path_attr.pa_sgid = ia_rp->ia_hca_sgid; 2168 path_attr.pa_pkey = ia_rp->ia_port_pkey; 2169 2170 /* save the connection ep - struct copy */ 2171 ep_rp->ep_sgid = ia_rp->ia_hca_sgid; 2172 ep_rp->ep_dgid = *dgid; 2173 2174 num_paths = 0; 2175 pathflags = IBT_PATH_PKEY; 2176 /* enable APM on remote port but not on loopback case */ 2177 if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) || 2178 (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) { 2179 pathflags |= IBT_PATH_APM; 2180 } 2181 status = ibt_get_paths(daplka_dev->daplka_clnt_hdl, 2182 pathflags, &path_attr, 1, &path_info, &num_paths); 2183 2184 if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) { 2185 DERR("ep_connect: ibt_get_paths returned %d paths %d\n", 2186 status, num_paths); 2187 *rvalp = (int)status; 2188 retval = 0; 2189 goto cleanup; 2190 } 2191 /* fill in the sid directly to path_info */ 2192 path_info.pi_sid = args.epc_sid; 2193 hca_attrp = &ia_rp->ia_hca->hca_attr; 2194 2195 /* fill in open channel args */ 2196 chan_args.oc_path = &path_info; 2197 chan_args.oc_cm_handler = daplka_cm_rc_handler; 2198 chan_args.oc_cm_clnt_private = (void *)ep_rp; 2199 chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan; 2200 chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan; 2201 chan_args.oc_path_retry_cnt = 7; /* 3-bit field */ 2202 chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY; 2203 2204 ASSERT(args.epc_priv_sz > 0); 2205 priv_data = (void *)args.epc_priv; 2206 2207 chan_args.oc_priv_data_len = args.epc_priv_sz; 2208 chan_args.oc_priv_data = priv_data; 2209 2210 /* 2211 * calculate checksum value of hello message and 2212 * put hello message in networking byte order 2213 */ 2214 dp = (DAPL_PRIVATE *)priv_data; 2215 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp)) 2216 dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port); 2217 dp->hello_msg.hi_checksum = 0; 2218 dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp)); 2219 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp)) 2220 2221 if (args.epc_timeout > 0) { 2222 /* 2223 * increment refcnt before passing reference to 2224 * timer_info_alloc. 2225 */ 2226 DAPLKA_RS_REF(ep_rp); 2227 timerp = daplka_timer_info_alloc(ep_rp); 2228 if (timerp == NULL) { 2229 DERR("ep_connect: cannot allocate timer\n"); 2230 /* 2231 * we need to remove the reference if 2232 * allocation failed. 2233 */ 2234 DAPLKA_RS_UNREF(ep_rp); 2235 retval = ENOMEM; 2236 goto cleanup; 2237 } 2238 /* 2239 * We generate our own hkeys so that timer_hkey can fit 2240 * into a pointer and passed as an arg to timeout() 2241 */ 2242 timer_hkey = (uint64_t)daplka_timer_hkey_gen(); 2243 retval = daplka_hash_insert(&daplka_timer_info_htbl, 2244 &timer_hkey, (void *)timerp); 2245 if (retval != 0) { 2246 DERR("ep_connect: cannot insert timer info\n"); 2247 goto cleanup; 2248 } 2249 ASSERT(ep_rp->ep_timer_hkey == 0); 2250 ep_rp->ep_timer_hkey = timer_hkey; 2251 timer_inserted = B_TRUE; 2252 D2("ep_connect: timer_hkey = 0x%llx\n", 2253 (longlong_t)timer_hkey); 2254 } 2255 status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS, 2256 IBT_NONBLOCKING, &chan_args, NULL); 2257 2258 if (status != IBT_SUCCESS) { 2259 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status); 2260 *rvalp = (int)status; 2261 retval = 0; 2262 goto cleanup; 2263 } 2264 /* 2265 * if a cm callback gets called at this point, it'll have to wait until 2266 * ep_state becomes connecting (or some other state if another thread 2267 * manages to get ahead of the callback). this guarantees that the 2268 * callback will not touch the timer until it gets set. 2269 */ 2270 if (timerp != NULL) { 2271 clock_t tmo; 2272 2273 tmo = drv_usectohz((clock_t)args.epc_timeout); 2274 /* 2275 * We generate our own 32 bit timer_hkey so that it can fit 2276 * into a pointer 2277 */ 2278 ASSERT(timer_hkey != 0); 2279 timerp->ti_tmo_id = timeout(daplka_timer_handler, 2280 (void *)(uintptr_t)timer_hkey, tmo); 2281 } 2282 new_state = DAPLKA_EP_STATE_CONNECTING; 2283 2284 cleanup:; 2285 if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) { 2286 /* 2287 * if ibt_open_rc_channel failed, the timerp must still 2288 * be in daplka_timer_info_htbl because neither the cm 2289 * callback nor the timer_handler will be called. 2290 */ 2291 if (timer_inserted) { 2292 daplka_timer_info_t *new_timerp = NULL; 2293 2294 ASSERT(timer_hkey != 0); 2295 (void) daplka_hash_remove(&daplka_timer_info_htbl, 2296 timer_hkey, (void **)&new_timerp); 2297 ASSERT(new_timerp == timerp); 2298 ep_rp->ep_timer_hkey = 0; 2299 } 2300 daplka_timer_info_free(timerp); 2301 } 2302 daplka_ep_set_state(ep_rp, old_state, new_state); 2303 DAPLKA_RS_UNREF(ep_rp); 2304 D3("ep_connect: exit\n"); 2305 return (retval); 2306 } 2307 2308 /* 2309 * ep_disconnect closes a connection with a remote peer. 2310 * if a connection has not been established, ep_disconnect 2311 * will instead flush all recv bufs posted to this channel. 2312 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon 2313 * entry to ep_disconnect, the EP state will transition to 2314 * DISCONNECTING upon exit. the CM callbacks triggered by 2315 * ibt_close_rc_channel will cause EP state to become 2316 * DISCONNECTED. This function is a no-op if EP state is 2317 * DISCONNECTED. 2318 */ 2319 /* ARGSUSED */ 2320 static int 2321 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2322 cred_t *cred, int *rvalp) 2323 { 2324 daplka_ep_resource_t *ep_rp = NULL; 2325 dapl_ep_disconnect_t args; 2326 ibt_status_t status; 2327 uint32_t old_state, new_state; 2328 int retval = 0; 2329 2330 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t), 2331 mode); 2332 if (retval != 0) { 2333 DERR("ep_disconnect: copyin error %d\n", retval); 2334 return (EFAULT); 2335 } 2336 ep_rp = (daplka_ep_resource_t *) 2337 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey); 2338 if (ep_rp == NULL) { 2339 DERR("ep_disconnect: cannot find ep resource\n"); 2340 return (EINVAL); 2341 } 2342 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2343 2344 new_state = old_state = daplka_ep_get_state(ep_rp); 2345 if (old_state != DAPLKA_EP_STATE_CONNECTED && 2346 old_state != DAPLKA_EP_STATE_CONNECTING && 2347 old_state != DAPLKA_EP_STATE_ACCEPTING && 2348 old_state != DAPLKA_EP_STATE_DISCONNECTED && 2349 old_state != DAPLKA_EP_STATE_DISCONNECTING && 2350 old_state != DAPLKA_EP_STATE_CLOSED) { 2351 DERR("ep_disconnect: invalid state %d\n", old_state); 2352 retval = EINVAL; 2353 goto cleanup; 2354 } 2355 2356 if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) || 2357 (old_state == DAPLKA_EP_STATE_DISCONNECTING)) { 2358 D2("ep_disconnect: ep already disconnected\n"); 2359 retval = 0; 2360 /* we leave the state as DISCONNECTED */ 2361 goto cleanup; 2362 } 2363 if (old_state == DAPLKA_EP_STATE_CONNECTING || 2364 old_state == DAPLKA_EP_STATE_ACCEPTING) { 2365 D2("ep_disconnect: aborting, old_state = %d\n", old_state); 2366 } 2367 2368 /* 2369 * according to the udapl spec, ep_disconnect should 2370 * flush the channel if the channel is not CONNECTED. 2371 */ 2372 if (old_state == DAPLKA_EP_STATE_CLOSED) { 2373 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 2374 if (status != IBT_SUCCESS) { 2375 DERR("ep_disconnect: ibt_flush_channel failed %d\n", 2376 status); 2377 *rvalp = (int)status; 2378 } 2379 retval = 0; 2380 /* we leave the state as CLOSED */ 2381 goto cleanup; 2382 } 2383 2384 new_state = DAPLKA_EP_STATE_DISCONNECTING; 2385 daplka_ep_set_state(ep_rp, old_state, new_state); 2386 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING, 2387 NULL, 0, NULL, NULL, NULL); 2388 2389 if (status == IBT_SUCCESS) { 2390 DAPLKA_RS_UNREF(ep_rp); 2391 return (retval); 2392 } else { 2393 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n", 2394 status); 2395 *rvalp = (int)status; 2396 retval = 0; 2397 new_state = old_state; 2398 } 2399 2400 cleanup:; 2401 daplka_ep_set_state(ep_rp, old_state, new_state); 2402 DAPLKA_RS_UNREF(ep_rp); 2403 return (retval); 2404 } 2405 2406 /* 2407 * this function resets the EP to a usable state (ie. from 2408 * DISCONNECTED to CLOSED). this function is best implemented using 2409 * the ibt_recycle_channel interface. until that is available, we will 2410 * instead clone and tear down the existing channel and replace the 2411 * existing channel with the cloned one. 2412 */ 2413 /* ARGSUSED */ 2414 static int 2415 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2416 cred_t *cred, int *rvalp) 2417 { 2418 daplka_ep_resource_t *ep_rp = NULL; 2419 dapl_ep_reinit_t args; 2420 ibt_status_t status; 2421 uint32_t old_state, new_state; 2422 int retval = 0; 2423 2424 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t), 2425 mode); 2426 if (retval != 0) { 2427 DERR("reinit: copyin error %d\n", retval); 2428 return (EFAULT); 2429 } 2430 ep_rp = (daplka_ep_resource_t *) 2431 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey); 2432 if (ep_rp == NULL) { 2433 DERR("reinit: cannot find ep resource\n"); 2434 return (EINVAL); 2435 } 2436 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2437 new_state = old_state = daplka_ep_get_state(ep_rp); 2438 if ((old_state != DAPLKA_EP_STATE_CLOSED) && 2439 (old_state != DAPLKA_EP_STATE_DISCONNECTED)) { 2440 DERR("reinit: invalid state %d\n", old_state); 2441 retval = EINVAL; 2442 goto cleanup; 2443 } 2444 2445 status = ibt_recycle_rc(ep_rp->ep_chan_hdl, 2446 IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR, 2447 ia_rp->ia_port_num, NULL, NULL); 2448 if (status != IBT_SUCCESS) { 2449 DERR("reinit: unable to clone channel\n"); 2450 *rvalp = (int)status; 2451 retval = 0; 2452 goto cleanup; 2453 } 2454 new_state = DAPLKA_EP_STATE_CLOSED; 2455 2456 cleanup:; 2457 daplka_ep_set_state(ep_rp, old_state, new_state); 2458 DAPLKA_RS_UNREF(ep_rp); 2459 return (retval); 2460 } 2461 2462 /* 2463 * destroys a EP resource. 2464 * called when refcnt drops to zero. 2465 */ 2466 static int 2467 daplka_ep_destroy(daplka_resource_t *gen_rp) 2468 { 2469 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)gen_rp; 2470 ibt_status_t status; 2471 2472 ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0); 2473 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED); 2474 2475 /* 2476 * by the time we get here, we can be sure that 2477 * there is no outstanding timer. 2478 */ 2479 ASSERT(ep_rp->ep_timer_hkey == 0); 2480 2481 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n", 2482 ep_rp, DAPLKA_RS_RNUM(ep_rp)); 2483 /* 2484 * free rc channel 2485 */ 2486 if (ep_rp->ep_chan_hdl != NULL) { 2487 mutex_enter(&daplka_dev->daplka_mutex); 2488 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL); 2489 mutex_exit(&daplka_dev->daplka_mutex); 2490 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl); 2491 if (status != IBT_SUCCESS) { 2492 DERR("ep_free: ibt_free_channel returned %d\n", 2493 status); 2494 } 2495 ep_rp->ep_chan_hdl = NULL; 2496 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp)); 2497 } 2498 /* 2499 * release all references 2500 */ 2501 if (ep_rp->ep_snd_evd != NULL) { 2502 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd); 2503 ep_rp->ep_snd_evd = NULL; 2504 } 2505 if (ep_rp->ep_rcv_evd != NULL) { 2506 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd); 2507 ep_rp->ep_rcv_evd = NULL; 2508 } 2509 if (ep_rp->ep_conn_evd != NULL) { 2510 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd); 2511 ep_rp->ep_conn_evd = NULL; 2512 } 2513 if (ep_rp->ep_srq_res != NULL) { 2514 DAPLKA_RS_UNREF(ep_rp->ep_srq_res); 2515 ep_rp->ep_srq_res = NULL; 2516 } 2517 if (ep_rp->ep_pd_res != NULL) { 2518 DAPLKA_RS_UNREF(ep_rp->ep_pd_res); 2519 ep_rp->ep_pd_res = NULL; 2520 } 2521 cv_destroy(&ep_rp->ep_cv); 2522 mutex_destroy(&ep_rp->ep_lock); 2523 2524 DAPLKA_RS_FINI(ep_rp); 2525 kmem_free(ep_rp, sizeof (daplka_ep_resource_t)); 2526 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp); 2527 return (0); 2528 } 2529 2530 /* 2531 * this function is called by daplka_hash_destroy for 2532 * freeing EP resource objects 2533 */ 2534 static void 2535 daplka_hash_ep_free(void *obj) 2536 { 2537 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)obj; 2538 ibt_status_t status; 2539 uint32_t old_state, new_state; 2540 int retval; 2541 2542 old_state = daplka_ep_get_state(ep_rp); 2543 retval = daplka_cancel_timer(ep_rp); 2544 new_state = DAPLKA_EP_STATE_FREED; 2545 daplka_ep_set_state(ep_rp, old_state, new_state); 2546 2547 if (retval != 0) { 2548 D2("hash_ep_free: ep_rp 0x%p " 2549 "timer is still being processed\n", ep_rp); 2550 mutex_enter(&ep_rp->ep_lock); 2551 if (ep_rp->ep_timer_hkey != 0) { 2552 D2("hash_ep_free: ep_rp 0x%p " 2553 "waiting for timer_hkey to be 0\n", ep_rp); 2554 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock); 2555 } 2556 mutex_exit(&ep_rp->ep_lock); 2557 } 2558 2559 /* call ibt_close_rc_channel regardless of what state we are in */ 2560 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING, 2561 NULL, 0, NULL, NULL, NULL); 2562 if (status != IBT_SUCCESS) { 2563 if (old_state == DAPLKA_EP_STATE_CONNECTED || 2564 old_state == DAPLKA_EP_STATE_CONNECTING || 2565 old_state == DAPLKA_EP_STATE_ACCEPTING) { 2566 DERR("hash_ep_free: ep_rp 0x%p state %d " 2567 "unexpected error %d from close_rc_channel\n", 2568 ep_rp, old_state, status); 2569 } 2570 D2("hash_ep_free: close_rc_channel, status %d\n", status); 2571 } 2572 2573 DAPLKA_RS_UNREF(ep_rp); 2574 } 2575 2576 /* 2577 * creates a EVD resource. 2578 * a EVD is used by the client to wait for events from one 2579 * or more sources. 2580 */ 2581 /* ARGSUSED */ 2582 static int 2583 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2584 cred_t *cred, int *rvalp) 2585 { 2586 daplka_evd_resource_t *evd_rp = NULL; 2587 daplka_async_evd_hkey_t *async_evd; 2588 ibt_hca_attr_t *hca_attrp; 2589 ibt_cq_attr_t cq_attr; 2590 dapl_evd_create_t args; 2591 uint64_t evd_hkey = 0; 2592 boolean_t inserted = B_FALSE; 2593 int retval = 0; 2594 ibt_status_t status; 2595 2596 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t), 2597 mode); 2598 if (retval != 0) { 2599 DERR("evd_create: copyin error %d", retval); 2600 return (EFAULT); 2601 } 2602 if ((args.evd_flags & 2603 ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) { 2604 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags); 2605 return (EINVAL); 2606 } 2607 2608 evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags); 2609 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp)) 2610 DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD, 2611 DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy); 2612 2613 mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL); 2614 cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL); 2615 evd_rp->evd_hca = ia_rp->ia_hca; 2616 evd_rp->evd_flags = args.evd_flags; 2617 evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl; 2618 evd_rp->evd_cookie = args.evd_cookie; 2619 evd_rp->evd_cno_res = NULL; 2620 evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS; 2621 evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS; 2622 evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS; 2623 2624 /* 2625 * if the client specified a non-zero cno_hkey, we 2626 * lookup the cno and save the reference for later use. 2627 */ 2628 if (args.evd_cno_hkey > 0) { 2629 daplka_cno_resource_t *cno_rp; 2630 2631 cno_rp = (daplka_cno_resource_t *) 2632 daplka_hash_lookup(&ia_rp->ia_cno_htbl, 2633 args.evd_cno_hkey); 2634 if (cno_rp == NULL) { 2635 DERR("evd_create: cannot find cno resource\n"); 2636 goto cleanup; 2637 } 2638 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 2639 evd_rp->evd_cno_res = cno_rp; 2640 } 2641 hca_attrp = &ia_rp->ia_hca->hca_attr; 2642 if ((evd_rp->evd_flags & 2643 (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) { 2644 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) { 2645 DERR("evd_create: invalid cq size %d", 2646 args.evd_cq_size); 2647 retval = EINVAL; 2648 goto cleanup; 2649 } 2650 cq_attr.cq_size = args.evd_cq_size; 2651 cq_attr.cq_sched = NULL; 2652 cq_attr.cq_flags = IBT_CQ_USER_MAP; 2653 2654 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl, 2655 &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size); 2656 2657 if (status != IBT_SUCCESS) { 2658 DERR("evd_create: ibt_alloc_cq returned %d", status); 2659 *rvalp = (int)status; 2660 retval = 0; 2661 goto cleanup; 2662 } 2663 2664 /* 2665 * store evd ptr with cq_hdl 2666 * mutex is only needed for race of "destroy" and "async" 2667 */ 2668 mutex_enter(&daplka_dev->daplka_mutex); 2669 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp); 2670 mutex_exit(&daplka_dev->daplka_mutex); 2671 2672 /* Get HCA-specific data_out info */ 2673 status = ibt_ci_data_out(evd_rp->evd_hca_hdl, 2674 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl, 2675 &args.evd_cq_data_out, sizeof (args.evd_cq_data_out)); 2676 2677 if (status != IBT_SUCCESS) { 2678 DERR("evd_create: ibt_ci_data_out error(%d)", status); 2679 *rvalp = (int)status; 2680 retval = 0; 2681 goto cleanup; 2682 } 2683 2684 args.evd_cq_real_size = evd_rp->evd_cq_real_size; 2685 2686 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler, 2687 (void *)evd_rp); 2688 } 2689 2690 retval = daplka_hash_insert(&ia_rp->ia_evd_htbl, 2691 &evd_hkey, (void *)evd_rp); 2692 if (retval != 0) { 2693 DERR("evd_ceate: cannot insert evd %d\n", retval); 2694 goto cleanup; 2695 } 2696 inserted = B_TRUE; 2697 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp)) 2698 2699 /* 2700 * If this evd handles async events need to add to the IA resource 2701 * async evd list 2702 */ 2703 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) { 2704 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t), 2705 daplka_km_flags); 2706 /* add the evd to the head of the list */ 2707 mutex_enter(&ia_rp->ia_lock); 2708 async_evd->aeh_evd_hkey = evd_hkey; 2709 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys; 2710 ia_rp->ia_async_evd_hkeys = async_evd; 2711 mutex_exit(&ia_rp->ia_lock); 2712 } 2713 2714 args.evd_hkey = evd_hkey; 2715 retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t)); 2716 if (retval != 0) { 2717 DERR("evd_create: copyout error %d\n", retval); 2718 retval = EFAULT; 2719 goto cleanup; 2720 } 2721 return (0); 2722 2723 cleanup:; 2724 if (inserted) { 2725 daplka_evd_resource_t *free_rp = NULL; 2726 2727 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey, 2728 (void **)&free_rp); 2729 if (free_rp != evd_rp) { 2730 DERR("evd_create: cannot remove evd\n"); 2731 /* 2732 * we can only get here if another thread 2733 * has completed the cleanup in evd_free 2734 */ 2735 return (retval); 2736 } 2737 } 2738 DAPLKA_RS_UNREF(evd_rp); 2739 return (retval); 2740 } 2741 2742 /* 2743 * resizes CQ and returns new mapping info to library. 2744 */ 2745 /* ARGSUSED */ 2746 static int 2747 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2748 cred_t *cred, int *rvalp) 2749 { 2750 daplka_evd_resource_t *evd_rp = NULL; 2751 ibt_hca_attr_t *hca_attrp; 2752 dapl_cq_resize_t args; 2753 ibt_status_t status; 2754 int retval = 0; 2755 2756 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t), 2757 mode); 2758 if (retval != 0) { 2759 DERR("cq_resize: copyin error %d\n", retval); 2760 return (EFAULT); 2761 } 2762 2763 /* get evd resource */ 2764 evd_rp = (daplka_evd_resource_t *) 2765 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey); 2766 if (evd_rp == NULL) { 2767 DERR("cq_resize: cannot find evd resource\n"); 2768 return (EINVAL); 2769 } 2770 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 2771 2772 hca_attrp = &ia_rp->ia_hca->hca_attr; 2773 if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) { 2774 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size); 2775 retval = EINVAL; 2776 goto cleanup; 2777 } 2778 /* 2779 * If ibt_resize_cq fails that it is primarily due to resource 2780 * shortage. Per IB spec resize will never loose events and 2781 * a resize error leaves the CQ intact. Therefore even if the 2782 * resize request fails we proceed and get the mapping data 2783 * from the CQ so that the library can mmap it. 2784 */ 2785 status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size, 2786 &args.cqr_cq_real_size); 2787 if (status != IBT_SUCCESS) { 2788 /* we return the size of the old CQ if resize fails */ 2789 args.cqr_cq_real_size = evd_rp->evd_cq_real_size; 2790 ASSERT(status != IBT_CQ_HDL_INVALID); 2791 DERR("cq_resize: ibt_resize_cq failed:%d\n", status); 2792 } else { 2793 mutex_enter(&evd_rp->evd_lock); 2794 evd_rp->evd_cq_real_size = args.cqr_cq_real_size; 2795 mutex_exit(&evd_rp->evd_lock); 2796 } 2797 2798 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n", 2799 DAPLKA_RS_RNUM(evd_rp), 2800 args.cqr_cq_new_size, args.cqr_cq_real_size); 2801 2802 /* Get HCA-specific data_out info */ 2803 status = ibt_ci_data_out(evd_rp->evd_hca_hdl, 2804 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl, 2805 &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out)); 2806 if (status != IBT_SUCCESS) { 2807 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status); 2808 /* return ibt_ci_data_out status */ 2809 *rvalp = (int)status; 2810 retval = 0; 2811 goto cleanup; 2812 } 2813 2814 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t), 2815 mode); 2816 if (retval != 0) { 2817 DERR("cq_resize: copyout error %d\n", retval); 2818 retval = EFAULT; 2819 goto cleanup; 2820 } 2821 2822 cleanup:; 2823 if (evd_rp != NULL) { 2824 DAPLKA_RS_UNREF(evd_rp); 2825 } 2826 return (retval); 2827 } 2828 2829 /* 2830 * Routine to copyin the event poll message so that 32 bit libraries 2831 * can be safely supported 2832 */ 2833 int 2834 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode) 2835 { 2836 int retval; 2837 2838 #ifdef _MULTI_DATAMODEL 2839 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2840 dapl_event_poll32_t args32; 2841 2842 retval = ddi_copyin((void *)inarg, &args32, 2843 sizeof (dapl_event_poll32_t), mode); 2844 if (retval != 0) { 2845 DERR("event_poll_copyin: 32bit error %d\n", retval); 2846 return (EFAULT); 2847 } 2848 2849 outarg->evp_evd_hkey = args32.evp_evd_hkey; 2850 outarg->evp_threshold = args32.evp_threshold; 2851 outarg->evp_timeout = args32.evp_timeout; 2852 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep; 2853 outarg->evp_num_ev = args32.evp_num_ev; 2854 outarg->evp_num_polled = args32.evp_num_polled; 2855 return (0); 2856 } 2857 #endif 2858 retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t), 2859 mode); 2860 if (retval != 0) { 2861 DERR("event_poll: copyin error %d\n", retval); 2862 return (EFAULT); 2863 } 2864 2865 return (0); 2866 } 2867 2868 /* 2869 * Routine to copyout the event poll message so that 32 bit libraries 2870 * can be safely supported 2871 */ 2872 int 2873 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode) 2874 { 2875 int retval; 2876 2877 #ifdef _MULTI_DATAMODEL 2878 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2879 dapl_event_poll32_t args32; 2880 2881 args32.evp_evd_hkey = inarg->evp_evd_hkey; 2882 args32.evp_threshold = inarg->evp_threshold; 2883 args32.evp_timeout = inarg->evp_timeout; 2884 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep; 2885 args32.evp_num_ev = inarg->evp_num_ev; 2886 args32.evp_num_polled = inarg->evp_num_polled; 2887 2888 retval = ddi_copyout((void *)&args32, (void *)outarg, 2889 sizeof (dapl_event_poll32_t), mode); 2890 if (retval != 0) { 2891 DERR("event_poll_copyout: 32bit error %d\n", retval); 2892 return (EFAULT); 2893 } 2894 return (0); 2895 } 2896 #endif 2897 retval = ddi_copyout((void *)inarg, (void *)outarg, 2898 sizeof (dapl_event_poll_t), mode); 2899 if (retval != 0) { 2900 DERR("event_poll_copyout: error %d\n", retval); 2901 return (EFAULT); 2902 } 2903 2904 return (0); 2905 } 2906 2907 /* 2908 * fucntion to handle CM REQ RCV private data from Solaris or third parties 2909 */ 2910 /* ARGSUSED */ 2911 static void 2912 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp, 2913 dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev) 2914 { 2915 DAPL_PRIVATE *dp; 2916 ib_gid_t *lgid; 2917 ibt_ar_t ar_query_s; 2918 ibt_ar_t ar_result_s; 2919 DAPL_HELLO_MSG *hip; 2920 uint32_t ipaddr_ord; 2921 ibt_priv_data_len_t clen; 2922 ibt_priv_data_len_t olen; 2923 ibt_status_t status; 2924 uint16_t cksum; 2925 2926 /* 2927 * get private data and len 2928 */ 2929 dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data; 2930 clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len; 2931 #if defined(DAPLKA_DEBUG_FORCE_ATS) 2932 /* skip the DAPL_PRIVATE chekcsum check */ 2933 #else 2934 /* for remote connects */ 2935 /* look up hello message in the CM private data area */ 2936 if (clen >= sizeof (DAPL_PRIVATE) && 2937 (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) { 2938 cksum = ntohs(dp->hello_msg.hi_checksum); 2939 dp->hello_msg.hi_checksum = 0; 2940 if (daplka_hellomsg_cksum(dp) == cksum) { 2941 D2("daplka_crevent_privdata_post: Solaris msg\n"); 2942 evd_rp->ibe_ce.ibce_priv_data_size = clen; 2943 dp->hello_msg.hi_checksum = DAPL_CHECKSUM; 2944 dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port); 2945 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen); 2946 kmem_free(dp, clen); 2947 return; 2948 } 2949 } 2950 #endif /* DAPLKA_DEBUG_FORCE_ATS */ 2951 2952 D2("daplka_crevent_privdata_post: 3rd party msg\n"); 2953 /* transpose CM private data into hello message */ 2954 if (clen) { 2955 olen = clen; 2956 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) { 2957 clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE; 2958 } 2959 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen); 2960 kmem_free(dp, olen); 2961 } else { 2962 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr, 2963 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE); 2964 } 2965 evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE); 2966 dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr; 2967 /* 2968 * fill in hello message 2969 */ 2970 hip = &dp->hello_msg; 2971 hip->hi_checksum = DAPL_CHECKSUM; 2972 hip->hi_clen = clen; 2973 hip->hi_mid = 0; 2974 hip->hi_vers = DAPL_HELLO_MSG_VERS; 2975 hip->hi_port = 0; 2976 2977 /* assign sgid and dgid */ 2978 lgid = &ia_rp->ia_hca_sgid; 2979 ar_query_s.ar_gid.gid_prefix = 2980 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix; 2981 ar_query_s.ar_gid.gid_guid = 2982 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid; 2983 ar_query_s.ar_pkey = ia_rp->ia_port_pkey; 2984 bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES); 2985 2986 /* reverse ip address lookup through ATS */ 2987 status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s); 2988 if (status == IBT_SUCCESS) { 2989 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES); 2990 /* determine the address families */ 2991 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] | 2992 hip->hi_v4pad[2]; 2993 if (ipaddr_ord == 0) { 2994 hip->hi_ipv = AF_INET; 2995 } else { 2996 hip->hi_ipv = AF_INET6; 2997 } 2998 2999 #define UL(b) ar_result_s.ar_data[(b)] 3000 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n", 3001 hip->hi_ipv, UL(8), UL(9), UL(10), UL(11)); 3002 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n", 3003 UL(12), UL(13), UL(14), UL(15)); 3004 } else { 3005 /* non-conformed third parties */ 3006 hip->hi_ipv = AF_UNSPEC; 3007 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES); 3008 } 3009 } 3010 3011 /* 3012 * this function is called by evd_wait and evd_dequeue to wait for 3013 * connection events and CQ notifications. typically this function 3014 * is called when the userland CQ is empty and the client has 3015 * specified a non-zero timeout to evd_wait. if the client is 3016 * interested in CQ events, the CQ must be armed in userland prior 3017 * to calling this function. 3018 */ 3019 /* ARGSUSED */ 3020 static int 3021 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3022 cred_t *cred, int *rvalp) 3023 { 3024 daplka_evd_resource_t *evd_rp = NULL; 3025 dapl_event_poll_t args; 3026 daplka_evd_event_t *head; 3027 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL]; 3028 dapl_ib_event_t *evp; 3029 dapl_ib_event_t *evp_start; 3030 size_t evp_size; 3031 int threshold; 3032 clock_t timeout; 3033 uint32_t max_events; 3034 uint32_t num_events = 0; 3035 void *pd; 3036 ibt_priv_data_len_t n; 3037 int retval = 0; 3038 int rc; 3039 3040 retval = daplka_event_poll_copyin(arg, &args, mode); 3041 if (retval != 0) { 3042 return (EFAULT); 3043 } 3044 3045 if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) { 3046 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d", 3047 args.evp_num_ev); 3048 return (EINVAL); 3049 } 3050 /* 3051 * Note: dequeue requests have a threshold = 0, timeout = 0 3052 */ 3053 threshold = args.evp_threshold; 3054 3055 max_events = args.evp_num_ev; 3056 /* ensure library is passing sensible values */ 3057 if (max_events < threshold) { 3058 DERR("event_poll: max_events(%d) < threshold(%d)\n", 3059 max_events, threshold); 3060 return (EINVAL); 3061 } 3062 /* Do a sanity check to avoid excessive memory allocation */ 3063 if (max_events > DAPL_EVD_MAX_EVENTS) { 3064 DERR("event_poll: max_events(%d) > %d", 3065 max_events, DAPL_EVD_MAX_EVENTS); 3066 return (EINVAL); 3067 } 3068 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n", 3069 threshold, (longlong_t)args.evp_timeout, max_events); 3070 3071 /* get evd resource */ 3072 evd_rp = (daplka_evd_resource_t *) 3073 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey); 3074 if (evd_rp == NULL) { 3075 DERR("event_poll: cannot find evd resource\n"); 3076 return (EINVAL); 3077 } 3078 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3079 3080 /* 3081 * Use event array on the stack if possible 3082 */ 3083 if (max_events <= NUM_EVENTS_PER_POLL) { 3084 evp_start = evp = &evp_arr[0]; 3085 } else { 3086 evp_size = max_events * sizeof (dapl_ib_event_t); 3087 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags); 3088 if (evp == NULL) { 3089 DERR("event_poll: kmem_zalloc failed, evp_size %d", 3090 evp_size); 3091 retval = ENOMEM; 3092 goto cleanup; 3093 } 3094 } 3095 3096 /* 3097 * The Event poll algorithm is as follows - 3098 * The library passes a buffer big enough to hold "max_events" 3099 * events. max_events is >= threshold. If at any stage we get 3100 * max_events no. of events we bail. The events are polled in 3101 * the following order - 3102 * 1) Check for CR events in the evd_cr_events list 3103 * 2) Check for Connection events in the evd_connection_events list 3104 * 3105 * If after the above 2 steps we don't have enough(>= threshold) events 3106 * we block for CQ notification and sleep. Upon being woken up we start 3107 * at step 1 again. 3108 */ 3109 3110 /* 3111 * Note: this could be 0 or INFINITE or anyother value in microsec 3112 */ 3113 if (args.evp_timeout > 0) { 3114 if (args.evp_timeout >= LONG_MAX) { 3115 timeout = LONG_MAX; 3116 } else { 3117 clock_t curr_time = ddi_get_lbolt(); 3118 3119 timeout = curr_time + 3120 drv_usectohz((clock_t)args.evp_timeout); 3121 /* 3122 * use the max value if we wrapped around 3123 */ 3124 if (timeout <= curr_time) { 3125 timeout = LONG_MAX; 3126 } 3127 } 3128 } else { 3129 timeout = 0; 3130 } 3131 3132 mutex_enter(&evd_rp->evd_lock); 3133 for (;;) { 3134 /* 3135 * If this evd is waiting for CM events check that now. 3136 */ 3137 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) && 3138 (evd_rp->evd_cr_events.eel_num_elements > 0)) { 3139 /* dequeue events from evd_cr_events list */ 3140 while (head = daplka_evd_event_dequeue( 3141 &evd_rp->evd_cr_events)) { 3142 /* 3143 * populate the evp array 3144 */ 3145 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS; 3146 evp[num_events].ibe_ce.ibce_event = 3147 head->ee_cmev.ec_cm_ev_type; 3148 evp[num_events].ibe_ce.ibce_cookie = 3149 (uint64_t)head->ee_cmev.ec_cm_cookie; 3150 evp[num_events].ibe_ce.ibce_psep_cookie = 3151 head->ee_cmev.ec_cm_psep_cookie; 3152 daplka_crevent_privdata_post(ia_rp, 3153 &evp[num_events], head); 3154 kmem_free(head, sizeof (daplka_evd_event_t)); 3155 3156 if (++num_events == max_events) { 3157 mutex_exit(&evd_rp->evd_lock); 3158 goto maxevent_reached; 3159 } 3160 } 3161 } 3162 3163 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) && 3164 (evd_rp->evd_conn_events.eel_num_elements > 0)) { 3165 /* dequeue events from evd_connection_events list */ 3166 while ((head = daplka_evd_event_dequeue 3167 (&evd_rp->evd_conn_events))) { 3168 /* 3169 * populate the evp array - 3170 * 3171 */ 3172 if (head->ee_cmev.ec_cm_is_passive) { 3173 evp[num_events].ibe_ev_family = 3174 DAPL_PASSIVE_CONNECTION_EVENTS; 3175 } else { 3176 evp[num_events].ibe_ev_family = 3177 DAPL_ACTIVE_CONNECTION_EVENTS; 3178 } 3179 evp[num_events].ibe_ce.ibce_event = 3180 head->ee_cmev.ec_cm_ev_type; 3181 evp[num_events].ibe_ce.ibce_cookie = 3182 (uint64_t)head->ee_cmev.ec_cm_cookie; 3183 evp[num_events].ibe_ce.ibce_psep_cookie = 3184 head->ee_cmev.ec_cm_psep_cookie; 3185 3186 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) { 3187 pd = head->ee_cmev.ec_cm_ev_priv_data; 3188 n = head-> 3189 ee_cmev.ec_cm_ev_priv_data_len; 3190 bcopy(pd, (void *)evp[num_events]. 3191 ibe_ce.ibce_priv_data_ptr, n); 3192 evp[num_events].ibe_ce. 3193 ibce_priv_data_size = n; 3194 kmem_free(pd, n); 3195 } 3196 3197 kmem_free(head, sizeof (daplka_evd_event_t)); 3198 3199 if (++num_events == max_events) { 3200 mutex_exit(&evd_rp->evd_lock); 3201 goto maxevent_reached; 3202 } 3203 } 3204 } 3205 3206 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) && 3207 (evd_rp->evd_async_events.eel_num_elements > 0)) { 3208 /* dequeue events from evd_async_events list */ 3209 while (head = daplka_evd_event_dequeue( 3210 &evd_rp->evd_async_events)) { 3211 /* 3212 * populate the evp array 3213 */ 3214 evp[num_events].ibe_ev_family = 3215 DAPL_ASYNC_EVENTS; 3216 evp[num_events].ibe_async.ibae_type = 3217 head->ee_aev.ibae_type; 3218 evp[num_events].ibe_async.ibae_hca_guid = 3219 head->ee_aev.ibae_hca_guid; 3220 evp[num_events].ibe_async.ibae_cookie = 3221 head->ee_aev.ibae_cookie; 3222 evp[num_events].ibe_async.ibae_port = 3223 head->ee_aev.ibae_port; 3224 3225 kmem_free(head, sizeof (daplka_evd_event_t)); 3226 3227 if (++num_events == max_events) { 3228 break; 3229 } 3230 } 3231 } 3232 3233 /* 3234 * We have sufficient events for this call so no need to wait 3235 */ 3236 if ((threshold > 0) && (num_events >= threshold)) { 3237 mutex_exit(&evd_rp->evd_lock); 3238 break; 3239 } 3240 3241 evd_rp->evd_waiters++; 3242 /* 3243 * There are no new events and a timeout was specified. 3244 * Note: for CQ events threshold is 0 but timeout is 3245 * not necessarily 0. 3246 */ 3247 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) && 3248 timeout) { 3249 retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv, 3250 &evd_rp->evd_lock, timeout); 3251 if (retval == 0) { 3252 retval = EINTR; 3253 break; 3254 } else if (retval == -1) { 3255 retval = ETIME; 3256 break; 3257 } else { 3258 retval = 0; 3259 continue; 3260 } 3261 } 3262 evd_rp->evd_waiters--; 3263 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) { 3264 /* 3265 * If we got woken up by the CQ handler due to events 3266 * in the CQ. Need to go to userland to check for 3267 * CQ events. Or if we were woken up due to S/W events 3268 */ 3269 3270 /* check for userland events only */ 3271 if (!(evd_rp->evd_newevents & 3272 ~DAPLKA_EVD_ULAND_EVENTS)) { 3273 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS; 3274 mutex_exit(&evd_rp->evd_lock); 3275 break; 3276 } 3277 /* 3278 * Clear newevents since we are going to loopback 3279 * back and check for both CM and CQ events 3280 */ 3281 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS; 3282 } else { /* error */ 3283 mutex_exit(&evd_rp->evd_lock); 3284 break; 3285 } 3286 } 3287 3288 maxevent_reached: 3289 args.evp_num_polled = num_events; 3290 3291 /* 3292 * At this point retval might have a value that we want to return 3293 * back to the user. So the copyouts shouldn't tamper retval. 3294 */ 3295 if (args.evp_num_polled > 0) { /* copyout the events */ 3296 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled * 3297 sizeof (dapl_ib_event_t), mode); 3298 if (rc != 0) { /* XXX: we are losing events here */ 3299 DERR("event_poll: event array copyout error %d", rc); 3300 retval = EFAULT; 3301 goto cleanup; 3302 } 3303 rc = daplka_event_poll_copyout(&args, arg, mode); 3304 if (rc != 0) { /* XXX: we are losing events here */ 3305 DERR("event_poll: copyout error %d\n", rc); 3306 retval = EFAULT; 3307 goto cleanup; 3308 } 3309 } 3310 3311 cleanup:; 3312 if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) { 3313 kmem_free(evp_start, evp_size); 3314 } 3315 3316 if (evd_rp != NULL) { 3317 DAPLKA_RS_UNREF(evd_rp); 3318 } 3319 return (retval); 3320 } 3321 3322 /* ARGSUSED */ 3323 static int 3324 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3325 cred_t *cred, int *rvalp) 3326 { 3327 dapl_event_wakeup_t args; 3328 daplka_evd_resource_t *evd_rp; 3329 int retval; 3330 3331 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t), 3332 mode); 3333 if (retval != 0) { 3334 DERR("event_wakeup: copyin error %d\n", retval); 3335 return (EFAULT); 3336 } 3337 3338 /* get evd resource */ 3339 evd_rp = (daplka_evd_resource_t *) 3340 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey); 3341 if (evd_rp == NULL) { 3342 DERR("event_wakeup: cannot find evd resource\n"); 3343 return (EINVAL); 3344 } 3345 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3346 3347 daplka_evd_wakeup(evd_rp, NULL, NULL); 3348 3349 DAPLKA_RS_UNREF(evd_rp); 3350 3351 return (retval); 3352 } 3353 3354 /* ARGSUSED */ 3355 static int 3356 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3357 cred_t *cred, int *rvalp) 3358 { 3359 dapl_evd_modify_cno_t args; 3360 daplka_evd_resource_t *evd_rp; 3361 daplka_cno_resource_t *cno_rp; 3362 daplka_cno_resource_t *old_cno_rp; 3363 int retval; 3364 3365 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t), 3366 mode); 3367 if (retval != 0) { 3368 DERR("evd_modify_cno: copyin error %d\n", retval); 3369 return (EFAULT); 3370 } 3371 3372 /* get evd resource */ 3373 evd_rp = (daplka_evd_resource_t *) 3374 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey); 3375 if (evd_rp == NULL) { 3376 DERR("evd_modify_cno: cannot find evd resource\n"); 3377 retval = EINVAL; 3378 goto cleanup; 3379 } 3380 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3381 3382 if (args.evmc_cno_hkey > 0) { 3383 /* get cno resource corresponding to the new CNO */ 3384 cno_rp = (daplka_cno_resource_t *) 3385 daplka_hash_lookup(&ia_rp->ia_cno_htbl, 3386 args.evmc_cno_hkey); 3387 if (cno_rp == NULL) { 3388 DERR("evd_modify_cno: cannot find CNO resource\n"); 3389 retval = EINVAL; 3390 goto cleanup; 3391 } 3392 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3393 } else { 3394 cno_rp = NULL; 3395 } 3396 3397 mutex_enter(&evd_rp->evd_lock); 3398 old_cno_rp = evd_rp->evd_cno_res; 3399 evd_rp->evd_cno_res = cno_rp; 3400 mutex_exit(&evd_rp->evd_lock); 3401 3402 /* 3403 * drop the refcnt on the old CNO, the refcnt on the new CNO is 3404 * retained since the evd holds a reference to it. 3405 */ 3406 if (old_cno_rp) { 3407 DAPLKA_RS_UNREF(old_cno_rp); 3408 } 3409 3410 cleanup: 3411 if (evd_rp) { 3412 DAPLKA_RS_UNREF(evd_rp); 3413 } 3414 3415 return (retval); 3416 } 3417 3418 /* 3419 * Frees the EVD and associated resources. 3420 * If there are other threads still using this EVD, the destruction 3421 * will defer until the EVD's refcnt drops to zero. 3422 */ 3423 /* ARGSUSED */ 3424 static int 3425 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3426 cred_t *cred, int *rvalp) 3427 { 3428 daplka_evd_resource_t *evd_rp = NULL; 3429 daplka_async_evd_hkey_t *curr; 3430 daplka_async_evd_hkey_t *prev; 3431 dapl_evd_free_t args; 3432 int retval = 0; 3433 3434 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode); 3435 if (retval != 0) { 3436 DERR("evd_free: copyin error %d\n", retval); 3437 return (EFAULT); 3438 } 3439 retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey, 3440 (void **)&evd_rp); 3441 if (retval != 0 || evd_rp == NULL) { 3442 DERR("evd_free: cannot find evd resource\n"); 3443 return (EINVAL); 3444 } 3445 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3446 3447 /* If this is an async evd remove it from the IA's async evd list */ 3448 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) { 3449 mutex_enter(&ia_rp->ia_lock); 3450 curr = prev = ia_rp->ia_async_evd_hkeys; 3451 while (curr != NULL) { 3452 if (curr->aeh_evd_hkey == args.evf_hkey) { 3453 /* unlink curr from the list */ 3454 if (curr == prev) { 3455 /* 3456 * if first element in the list update 3457 * the list head 3458 */ 3459 ia_rp->ia_async_evd_hkeys = 3460 curr->aeh_next; 3461 } else { 3462 prev->aeh_next = curr->aeh_next; 3463 } 3464 break; 3465 } 3466 prev = curr; 3467 curr = curr->aeh_next; 3468 } 3469 mutex_exit(&ia_rp->ia_lock); 3470 /* free the curr entry */ 3471 kmem_free(curr, sizeof (daplka_async_evd_hkey_t)); 3472 } 3473 3474 /* UNREF calls the actual free function when refcnt is zero */ 3475 DAPLKA_RS_UNREF(evd_rp); 3476 return (0); 3477 } 3478 3479 /* 3480 * destroys EVD resource. 3481 * called when refcnt drops to zero. 3482 */ 3483 static int 3484 daplka_evd_destroy(daplka_resource_t *gen_rp) 3485 { 3486 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)gen_rp; 3487 ibt_status_t status; 3488 daplka_evd_event_t *evt; 3489 ibt_priv_data_len_t len; 3490 3491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp)) 3492 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n", 3493 evd_rp, DAPLKA_RS_RNUM(evd_rp)); 3494 /* 3495 * free CQ 3496 */ 3497 if (evd_rp->evd_cq_hdl) { 3498 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL); 3499 mutex_enter(&daplka_dev->daplka_mutex); 3500 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL); 3501 mutex_exit(&daplka_dev->daplka_mutex); 3502 3503 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl); 3504 if (status != IBT_SUCCESS) { 3505 DERR("evd_destroy: ibt_free_cq returned %d\n", status); 3506 } 3507 evd_rp->evd_cq_hdl = NULL; 3508 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp)); 3509 } 3510 3511 /* 3512 * release reference on CNO 3513 */ 3514 if (evd_rp->evd_cno_res != NULL) { 3515 mutex_enter(&evd_rp->evd_cno_res->cno_lock); 3516 if (evd_rp->evd_cno_res->cno_evd_cookie == 3517 evd_rp->evd_cookie) { 3518 evd_rp->evd_cno_res->cno_evd_cookie = 0; 3519 } 3520 mutex_exit(&evd_rp->evd_cno_res->cno_lock); 3521 DAPLKA_RS_UNREF(evd_rp->evd_cno_res); 3522 evd_rp->evd_cno_res = NULL; 3523 } 3524 3525 /* 3526 * discard all remaining events 3527 */ 3528 mutex_enter(&evd_rp->evd_lock); 3529 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) { 3530 D2("evd_destroy: discarding CR event: %d\n", 3531 evt->ee_cmev.ec_cm_ev_type); 3532 len = evt->ee_cmev.ec_cm_ev_priv_data_len; 3533 if (len > 0) { 3534 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len); 3535 evt->ee_cmev.ec_cm_ev_priv_data = NULL; 3536 evt->ee_cmev.ec_cm_ev_priv_data_len = 0; 3537 } 3538 kmem_free(evt, sizeof (*evt)); 3539 } 3540 ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0); 3541 3542 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) { 3543 D2("evd_destroy: discarding CONN event: %d\n", 3544 evt->ee_cmev.ec_cm_ev_type); 3545 len = evt->ee_cmev.ec_cm_ev_priv_data_len; 3546 if (len > 0) { 3547 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len); 3548 evt->ee_cmev.ec_cm_ev_priv_data = NULL; 3549 evt->ee_cmev.ec_cm_ev_priv_data_len = 0; 3550 } 3551 kmem_free(evt, sizeof (*evt)); 3552 } 3553 ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0); 3554 3555 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) { 3556 DERR("evd_destroy: discarding ASYNC event: %d\n", 3557 evt->ee_aev.ibae_type); 3558 kmem_free(evt, sizeof (*evt)); 3559 } 3560 ASSERT(evd_rp->evd_async_events.eel_num_elements == 0); 3561 mutex_exit(&evd_rp->evd_lock); 3562 3563 mutex_destroy(&evd_rp->evd_lock); 3564 DAPLKA_RS_FINI(evd_rp); 3565 kmem_free(evd_rp, sizeof (daplka_evd_resource_t)); 3566 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp); 3567 return (0); 3568 } 3569 3570 static void 3571 daplka_hash_evd_free(void *obj) 3572 { 3573 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj; 3574 3575 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3576 DAPLKA_RS_UNREF(evd_rp); 3577 } 3578 3579 /* 3580 * this handler fires when new completions arrive. 3581 */ 3582 /* ARGSUSED */ 3583 static void 3584 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg) 3585 { 3586 D3("cq_handler: fired setting evd_newevents\n"); 3587 daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL); 3588 } 3589 3590 /* 3591 * this routine wakes up a client from evd_wait. if evtq and evt 3592 * are non-null, the event evt will be enqueued prior to waking 3593 * up the client. if the evd is associated with a CNO and if there 3594 * are no waiters on the evd, the CNO will be notified. 3595 */ 3596 static void 3597 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq, 3598 daplka_evd_event_t *evt) 3599 { 3600 uint32_t waiters = 0; 3601 3602 mutex_enter(&evd_rp->evd_lock); 3603 if (evtq != NULL && evt != NULL) { 3604 ASSERT(evtq == &evd_rp->evd_cr_events || 3605 evtq == &evd_rp->evd_conn_events || 3606 evtq == &evd_rp->evd_async_events); 3607 daplka_evd_event_enqueue(evtq, evt); 3608 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) || 3609 (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS)); 3610 evd_rp->evd_newevents |= evtq->eel_event_type; 3611 } else { 3612 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS; 3613 } 3614 waiters = evd_rp->evd_waiters; 3615 cv_broadcast(&evd_rp->evd_cv); 3616 mutex_exit(&evd_rp->evd_lock); 3617 3618 /* 3619 * only wakeup the CNO if there are no waiters on this evd. 3620 */ 3621 if (evd_rp->evd_cno_res != NULL && waiters == 0) { 3622 mutex_enter(&evd_rp->evd_cno_res->cno_lock); 3623 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie; 3624 cv_broadcast(&evd_rp->evd_cno_res->cno_cv); 3625 mutex_exit(&evd_rp->evd_cno_res->cno_lock); 3626 } 3627 } 3628 3629 /* 3630 * daplka_evd_event_enqueue adds elem to the end of the event list 3631 * The caller is expected to acquire appropriate locks before 3632 * calling enqueue 3633 */ 3634 static void 3635 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist, 3636 daplka_evd_event_t *elem) 3637 { 3638 if (evlist->eel_tail) { 3639 evlist->eel_tail->ee_next = elem; 3640 evlist->eel_tail = elem; 3641 } else { 3642 /* list is empty */ 3643 ASSERT(evlist->eel_head == NULL); 3644 evlist->eel_head = elem; 3645 evlist->eel_tail = elem; 3646 } 3647 evlist->eel_num_elements++; 3648 } 3649 3650 /* 3651 * daplka_evd_event_dequeue removes and returns the first element of event 3652 * list. NULL is returned if the list is empty. The caller is expected to 3653 * acquire appropriate locks before calling enqueue. 3654 */ 3655 static daplka_evd_event_t * 3656 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist) 3657 { 3658 daplka_evd_event_t *head; 3659 3660 head = evlist->eel_head; 3661 if (head == NULL) { 3662 return (NULL); 3663 } 3664 3665 evlist->eel_head = head->ee_next; 3666 evlist->eel_num_elements--; 3667 /* if it was the last element update the tail pointer too */ 3668 if (evlist->eel_head == NULL) { 3669 ASSERT(evlist->eel_num_elements == 0); 3670 evlist->eel_tail = NULL; 3671 } 3672 return (head); 3673 } 3674 3675 /* 3676 * A CNO allows the client to wait for notifications from multiple EVDs. 3677 * To use a CNO, the client needs to follow the procedure below: 3678 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO. 3679 * 2. create one or more EVDs using the returned cno_hkey. 3680 * 3. call cno_wait. when one of the associated EVDs get notified, the 3681 * CNO will also get notified. cno_wait will then return with a 3682 * evd_cookie identifying the EVD that triggered the event. 3683 * 3684 * A note about cno_wait: 3685 * -unlike a EVD, a CNO does not maintain a queue of notifications. For 3686 * example, suppose multiple EVDs triggered a CNO before the client calls 3687 * cno_wait; when the client calls cno_wait, it will return with the 3688 * evd_cookie that identifies the *last* EVD that triggered the CNO. It 3689 * is the responsibility of the client, upon returning from cno_wait, to 3690 * check on all EVDs that can potentially trigger the CNO. the returned 3691 * evd_cookie is only meant to be a hint. there is no guarantee that the 3692 * EVD identified by the evd_cookie still contains an event or still 3693 * exists by the time cno_wait returns. 3694 */ 3695 3696 /* 3697 * allocates a CNO. 3698 * the returned cno_hkey may subsequently be used in evd_create. 3699 */ 3700 /* ARGSUSED */ 3701 static int 3702 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3703 cred_t *cred, int *rvalp) 3704 { 3705 dapl_cno_alloc_t args; 3706 daplka_cno_resource_t *cno_rp = NULL; 3707 uint64_t cno_hkey = 0; 3708 boolean_t inserted = B_FALSE; 3709 int retval = 0; 3710 3711 cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags); 3712 if (cno_rp == NULL) { 3713 DERR("cno_alloc: cannot allocate cno resource\n"); 3714 return (ENOMEM); 3715 } 3716 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp)) 3717 DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO, 3718 DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy); 3719 3720 mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL); 3721 cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL); 3722 cno_rp->cno_evd_cookie = 0; 3723 3724 /* insert into cno hash table */ 3725 retval = daplka_hash_insert(&ia_rp->ia_cno_htbl, 3726 &cno_hkey, (void *)cno_rp); 3727 if (retval != 0) { 3728 DERR("cno_alloc: cannot insert cno resource\n"); 3729 goto cleanup; 3730 } 3731 inserted = B_TRUE; 3732 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp)) 3733 3734 /* return hkey to library */ 3735 args.cno_hkey = cno_hkey; 3736 3737 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t), 3738 mode); 3739 if (retval != 0) { 3740 DERR("cno_alloc: copyout error %d\n", retval); 3741 retval = EFAULT; 3742 goto cleanup; 3743 } 3744 return (0); 3745 3746 cleanup:; 3747 if (inserted) { 3748 daplka_cno_resource_t *free_rp = NULL; 3749 3750 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey, 3751 (void **)&free_rp); 3752 if (free_rp != cno_rp) { 3753 DERR("cno_alloc: cannot remove cno\n"); 3754 /* 3755 * we can only get here if another thread 3756 * has completed the cleanup in cno_free 3757 */ 3758 return (retval); 3759 } 3760 } 3761 DAPLKA_RS_UNREF(cno_rp); 3762 return (retval); 3763 } 3764 3765 /* 3766 * destroys a CNO. 3767 * this gets called when a CNO resource's refcnt drops to zero. 3768 */ 3769 static int 3770 daplka_cno_destroy(daplka_resource_t *gen_rp) 3771 { 3772 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp; 3773 3774 ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0); 3775 D2("cno_destroy: entering, cno_rp %p, rnum %d\n", 3776 cno_rp, DAPLKA_RS_RNUM(cno_rp)); 3777 3778 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3779 cv_destroy(&cno_rp->cno_cv); 3780 mutex_destroy(&cno_rp->cno_lock); 3781 3782 DAPLKA_RS_FINI(cno_rp); 3783 kmem_free(cno_rp, sizeof (daplka_cno_resource_t)); 3784 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp); 3785 return (0); 3786 } 3787 3788 static void 3789 daplka_hash_cno_free(void *obj) 3790 { 3791 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj; 3792 3793 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3794 DAPLKA_RS_UNREF(cno_rp); 3795 } 3796 3797 /* 3798 * removes the CNO from the cno hash table and frees the CNO 3799 * if there are no references to it. if there are references to 3800 * it, the CNO will be destroyed when the last of the references 3801 * is released. once the CNO is removed from the cno hash table, 3802 * the client will no longer be able to call cno_wait on the CNO. 3803 */ 3804 /* ARGSUSED */ 3805 static int 3806 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3807 cred_t *cred, int *rvalp) 3808 { 3809 daplka_cno_resource_t *cno_rp = NULL; 3810 dapl_cno_free_t args; 3811 int retval = 0; 3812 3813 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode); 3814 if (retval != 0) { 3815 DERR("cno_free: copyin error %d\n", retval); 3816 return (EINVAL); 3817 } 3818 3819 retval = daplka_hash_remove(&ia_rp->ia_cno_htbl, 3820 args.cnf_hkey, (void **)&cno_rp); 3821 if (retval != 0 || cno_rp == NULL) { 3822 DERR("cno_free: cannot find cno resource\n"); 3823 return (EINVAL); 3824 } 3825 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3826 3827 /* UNREF calls the actual free function when refcnt is zero */ 3828 DAPLKA_RS_UNREF(cno_rp); 3829 return (0); 3830 } 3831 3832 /* 3833 * wait for a notification from one of the associated EVDs. 3834 */ 3835 /* ARGSUSED */ 3836 static int 3837 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3838 cred_t *cred, int *rvalp) 3839 { 3840 daplka_cno_resource_t *cno_rp = NULL; 3841 dapl_cno_wait_t args; 3842 int retval = 0; 3843 uint64_t evd_cookie = 0; 3844 clock_t timeout, curr_time; 3845 3846 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode); 3847 if (retval != 0) { 3848 DERR("cno_wait: copyin error %d\n", retval); 3849 return (EINVAL); 3850 } 3851 /* get cno resource */ 3852 cno_rp = (daplka_cno_resource_t *) 3853 daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey); 3854 if (cno_rp == NULL) { 3855 DERR("cno_wait: cannot find cno resource\n"); 3856 return (EINVAL); 3857 } 3858 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3859 3860 curr_time = ddi_get_lbolt(); 3861 timeout = curr_time + drv_usectohz(args.cnw_timeout); 3862 3863 /* 3864 * use the max value if we wrapped around 3865 */ 3866 if (args.cnw_timeout > 0 && timeout <= curr_time) { 3867 /* 3868 * clock_t (size long) changes between 32 and 64-bit kernels 3869 */ 3870 timeout = LONG_MAX >> 4; 3871 } 3872 mutex_enter(&cno_rp->cno_lock); 3873 while (cno_rp->cno_evd_cookie == 0) { 3874 int rval = 0; 3875 3876 rval = cv_timedwait_sig(&cno_rp->cno_cv, 3877 &cno_rp->cno_lock, timeout); 3878 if (rval == 0) { 3879 DERR("cno_wait: interrupted\n"); 3880 mutex_exit(&cno_rp->cno_lock); 3881 retval = EINTR; 3882 goto cleanup; 3883 } else if (rval == -1) { 3884 DERR("cno_wait: timed out\n"); 3885 mutex_exit(&cno_rp->cno_lock); 3886 retval = ETIME; 3887 goto cleanup; 3888 } 3889 } 3890 evd_cookie = cno_rp->cno_evd_cookie; 3891 cno_rp->cno_evd_cookie = 0; 3892 mutex_exit(&cno_rp->cno_lock); 3893 3894 ASSERT(evd_cookie != 0); 3895 D2("cno_wait: returning evd_cookie 0x%p\n", 3896 (void *)(uintptr_t)evd_cookie); 3897 args.cnw_evd_cookie = evd_cookie; 3898 retval = ddi_copyout((void *)&args, (void *)arg, 3899 sizeof (dapl_cno_wait_t), mode); 3900 if (retval != 0) { 3901 DERR("cno_wait: copyout error %d\n", retval); 3902 retval = EFAULT; 3903 goto cleanup; 3904 } 3905 3906 cleanup:; 3907 if (cno_rp != NULL) { 3908 DAPLKA_RS_UNREF(cno_rp); 3909 } 3910 return (retval); 3911 } 3912 3913 /* 3914 * this function is called by the client when it decides to 3915 * accept a connection request. a connection request is generated 3916 * when the active side generates REQ MAD to a service point on 3917 * the destination node. this causes the CM service handler 3918 * (daplka_cm_service_req) on the passive side to be callee. This 3919 * handler will then enqueue this connection request to the backlog 3920 * array of the service point. A connection event containing the 3921 * backlog array index and connection request private data is passed 3922 * to the client's service point EVD (sp_evd_res). once the event 3923 * is passed up to the userland, the client may examine the request 3924 * to decide whether to call daplka_cr_accept or dapka_cr_reject. 3925 */ 3926 /* ARGSUSED */ 3927 static int 3928 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3929 cred_t *cred, int *rvalp) 3930 { 3931 daplka_ep_resource_t *ep_rp = NULL; 3932 daplka_sp_resource_t *sp_rp = NULL; 3933 dapl_cr_accept_t args; 3934 daplka_sp_conn_pend_t *conn; 3935 ibt_cm_proceed_reply_t proc_reply; 3936 ibt_status_t status; 3937 uint16_t bkl_index; 3938 uint32_t old_state, new_state; 3939 int retval = 0; 3940 void *priv_data = NULL, *sid; 3941 3942 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t), 3943 mode); 3944 if (retval != 0) { 3945 DERR("cr_accept: copyin error %d\n", retval); 3946 return (EFAULT); 3947 } 3948 if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) { 3949 DERR("cr_accept: private data len (%d) exceeded " 3950 "max size %d\n", args.cra_priv_sz, 3951 DAPL_MAX_PRIVATE_DATA_SIZE); 3952 return (EINVAL); 3953 } 3954 priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL; 3955 3956 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data, 3957 args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie); 3958 3959 /* get sp resource */ 3960 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 3961 args.cra_sp_hkey); 3962 if (sp_rp == NULL) { 3963 DERR("cr_accept: cannot find sp resource\n"); 3964 return (EINVAL); 3965 } 3966 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 3967 3968 /* get ep resource */ 3969 ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl, 3970 args.cra_ep_hkey); 3971 if (ep_rp == NULL) { 3972 DERR("cr_accept: cannot find ep resource\n"); 3973 retval = EINVAL; 3974 goto cleanup; 3975 } 3976 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 3977 3978 /* 3979 * accept is only allowed if ep_state is CLOSED. 3980 * note that after this point, the ep_state is frozen 3981 * (i.e. TRANSITIONING) until we transition ep_state 3982 * to ACCEPTING or back to CLOSED if we get an error. 3983 */ 3984 new_state = old_state = daplka_ep_get_state(ep_rp); 3985 if (old_state != DAPLKA_EP_STATE_CLOSED) { 3986 DERR("cr_accept: invalid ep state %d\n", old_state); 3987 retval = EINVAL; 3988 goto cleanup; 3989 } 3990 3991 mutex_enter(&sp_rp->sp_lock); 3992 bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie); 3993 /* 3994 * make sure the backlog index is not bogus. 3995 */ 3996 if (bkl_index >= sp_rp->sp_backlog_size) { 3997 DERR("cr_accept: invalid backlog index 0x%llx %d\n", 3998 (longlong_t)args.cra_bkl_cookie, bkl_index); 3999 mutex_exit(&sp_rp->sp_lock); 4000 retval = EINVAL; 4001 goto cleanup; 4002 } 4003 /* 4004 * make sure the backlog index indeed refers 4005 * to a pending connection. 4006 */ 4007 conn = &sp_rp->sp_backlog[bkl_index]; 4008 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 4009 DERR("cr_accept: invalid conn state %d\n", 4010 conn->spcp_state); 4011 mutex_exit(&sp_rp->sp_lock); 4012 retval = EINVAL; 4013 goto cleanup; 4014 } 4015 if (conn->spcp_sid == NULL) { 4016 DERR("cr_accept: sid == NULL\n"); 4017 mutex_exit(&sp_rp->sp_lock); 4018 retval = EINVAL; 4019 goto cleanup; 4020 } 4021 if (ep_rp->ep_chan_hdl == NULL) { 4022 /* 4023 * a ep_rp with a NULL chan_hdl is impossible. 4024 */ 4025 DERR("cr_accept: ep_chan_hdl == NULL\n"); 4026 mutex_exit(&sp_rp->sp_lock); 4027 ASSERT(B_FALSE); 4028 retval = EINVAL; 4029 goto cleanup; 4030 } 4031 proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl; 4032 proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out; 4033 proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in; 4034 proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY; 4035 sid = conn->spcp_sid; 4036 4037 /* 4038 * this clears our slot in the backlog array. 4039 * this slot may now be used by other pending connections. 4040 */ 4041 conn->spcp_sid = NULL; 4042 conn->spcp_state = DAPLKA_SPCP_INIT; 4043 conn->spcp_req_len = 0; 4044 mutex_exit(&sp_rp->sp_lock); 4045 4046 /* 4047 * Set the unique cookie corresponding to the CR to this EP 4048 * so that is can be used in passive side CM callbacks 4049 */ 4050 ep_rp->ep_psep_cookie = args.cra_bkl_cookie; 4051 4052 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT, 4053 &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz); 4054 4055 if (status != IBT_SUCCESS) { 4056 DERR("cr_accept: ibt_cm_proceed returned %d\n", status); 4057 *rvalp = (int)status; 4058 retval = 0; 4059 } 4060 /* 4061 * note that the CM handler may actually be called at this 4062 * point. but since ep_state is still in TRANSITIONING, the 4063 * handler will wait until we transition to ACCEPTING. this 4064 * prevents the case where we set ep_state to ACCEPTING after 4065 * daplka_service_conn_est sets ep_state to CONNECTED. 4066 */ 4067 new_state = DAPLKA_EP_STATE_ACCEPTING; 4068 4069 cleanup:; 4070 if (sp_rp != NULL) { 4071 DAPLKA_RS_UNREF(sp_rp); 4072 } 4073 if (ep_rp != NULL) { 4074 daplka_ep_set_state(ep_rp, old_state, new_state); 4075 DAPLKA_RS_UNREF(ep_rp); 4076 } 4077 return (retval); 4078 } 4079 4080 /* 4081 * this function is called by the client to reject a 4082 * connection request. 4083 */ 4084 /* ARGSUSED */ 4085 static int 4086 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4087 cred_t *cred, int *rvalp) 4088 { 4089 dapl_cr_reject_t args; 4090 daplka_sp_resource_t *sp_rp = NULL; 4091 daplka_sp_conn_pend_t *conn; 4092 ibt_cm_proceed_reply_t proc_reply; 4093 ibt_cm_status_t proc_status; 4094 ibt_status_t status; 4095 uint16_t bkl_index; 4096 int retval = 0; 4097 void *sid; 4098 4099 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t), 4100 mode); 4101 if (retval != 0) { 4102 DERR("cr_reject: copyin error %d\n", retval); 4103 return (EFAULT); 4104 } 4105 /* get sp resource */ 4106 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 4107 args.crr_sp_hkey); 4108 if (sp_rp == NULL) { 4109 DERR("cr_reject: cannot find sp resource\n"); 4110 return (EINVAL); 4111 } 4112 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4113 4114 D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie); 4115 4116 mutex_enter(&sp_rp->sp_lock); 4117 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie); 4118 /* 4119 * make sure the backlog index is not bogus. 4120 */ 4121 if (bkl_index >= sp_rp->sp_backlog_size) { 4122 DERR("cr_reject: invalid backlog index 0x%llx %d\n", 4123 (longlong_t)args.crr_bkl_cookie, bkl_index); 4124 mutex_exit(&sp_rp->sp_lock); 4125 retval = EINVAL; 4126 goto cleanup; 4127 } 4128 /* 4129 * make sure the backlog index indeed refers 4130 * to a pending connection. 4131 */ 4132 conn = &sp_rp->sp_backlog[bkl_index]; 4133 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 4134 DERR("cr_reject: invalid conn state %d\n", 4135 conn->spcp_state); 4136 mutex_exit(&sp_rp->sp_lock); 4137 retval = EINVAL; 4138 goto cleanup; 4139 } 4140 if (conn->spcp_sid == NULL) { 4141 DERR("cr_reject: sid == NULL\n"); 4142 mutex_exit(&sp_rp->sp_lock); 4143 retval = EINVAL; 4144 goto cleanup; 4145 } 4146 bzero(&proc_reply, sizeof (proc_reply)); 4147 sid = conn->spcp_sid; 4148 4149 /* 4150 * this clears our slot in the backlog array. 4151 * this slot may now be used by other pending connections. 4152 */ 4153 conn->spcp_sid = NULL; 4154 conn->spcp_state = DAPLKA_SPCP_INIT; 4155 conn->spcp_req_len = 0; 4156 4157 switch (args.crr_reason) { 4158 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ: 4159 /* results in IBT_CM_CONSUMER as the reason for reject */ 4160 proc_status = IBT_CM_REJECT; 4161 break; 4162 case DAPL_IB_CME_LOCAL_FAILURE: 4163 /*FALLTHRU*/ 4164 case DAPL_IB_CME_DESTINATION_UNREACHABLE: 4165 /* results in IBT_CM_NO_RESC as the reason for reject */ 4166 proc_status = IBT_CM_NO_RESOURCE; 4167 break; 4168 default: 4169 /* unexpect reason code */ 4170 ASSERT(!"unexpected reject reason code"); 4171 proc_status = IBT_CM_NO_RESOURCE; 4172 break; 4173 } 4174 4175 mutex_exit(&sp_rp->sp_lock); 4176 4177 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status, 4178 &proc_reply, NULL, 0); 4179 4180 if (status != IBT_SUCCESS) { 4181 DERR("cr_reject: ibt_cm_proceed returned %d\n", status); 4182 *rvalp = (int)status; 4183 retval = 0; 4184 } 4185 4186 cleanup:; 4187 if (sp_rp != NULL) { 4188 DAPLKA_RS_UNREF(sp_rp); 4189 } 4190 return (retval); 4191 } 4192 4193 4194 /* 4195 * daplka_sp_match is used by daplka_hash_walk for finding SPs 4196 */ 4197 typedef struct daplka_sp_match_s { 4198 uint64_t spm_conn_qual; 4199 daplka_sp_resource_t *spm_sp_rp; 4200 } daplka_sp_match_t; 4201 _NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp)) 4202 4203 static int 4204 daplka_sp_match(void *objp, void *arg) 4205 { 4206 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)objp; 4207 4208 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4209 if (sp_rp->sp_conn_qual == 4210 ((daplka_sp_match_t *)arg)->spm_conn_qual) { 4211 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp; 4212 D2("daplka_sp_match: found sp, conn_qual %016llu\n", 4213 (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual); 4214 DAPLKA_RS_REF(sp_rp); 4215 return (1); 4216 } 4217 return (0); 4218 } 4219 4220 /* 4221 * cr_handoff allows the client to handoff a connection request from 4222 * one service point to another. 4223 */ 4224 /* ARGSUSED */ 4225 static int 4226 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4227 cred_t *cred, int *rvalp) 4228 { 4229 dapl_cr_handoff_t args; 4230 daplka_sp_resource_t *sp_rp = NULL, *new_sp_rp = NULL; 4231 daplka_sp_conn_pend_t *conn; 4232 daplka_sp_match_t sp_match; 4233 ibt_cm_event_t fake_event; 4234 ibt_cm_status_t cm_status; 4235 ibt_status_t status; 4236 uint16_t bkl_index; 4237 void *sid, *priv = NULL; 4238 int retval = 0, priv_len = 0; 4239 4240 D3("cr_handoff: entering\n"); 4241 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t), 4242 mode); 4243 if (retval != 0) { 4244 DERR("cr_handoff: copyin error %d\n", retval); 4245 return (EFAULT); 4246 } 4247 /* get sp resource */ 4248 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 4249 args.crh_sp_hkey); 4250 if (sp_rp == NULL) { 4251 DERR("cr_handoff: cannot find sp resource\n"); 4252 return (EINVAL); 4253 } 4254 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4255 4256 /* 4257 * find the destination service point. 4258 */ 4259 sp_match.spm_conn_qual = args.crh_conn_qual; 4260 sp_match.spm_sp_rp = NULL; 4261 daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match, 4262 (void *)&sp_match, RW_READER); 4263 4264 /* 4265 * return if we cannot find the service point 4266 */ 4267 if (sp_match.spm_sp_rp == NULL) { 4268 DERR("cr_handoff: new sp not found, conn qual = %llu\n", 4269 (longlong_t)args.crh_conn_qual); 4270 retval = EINVAL; 4271 goto cleanup; 4272 } 4273 new_sp_rp = sp_match.spm_sp_rp; 4274 4275 /* 4276 * the spec does not discuss the security implications of this 4277 * function. to be safe, we currently only allow processes 4278 * owned by the same user to handoff connection requests 4279 * to each other. 4280 */ 4281 if (crgetruid(cred) != new_sp_rp->sp_ruid) { 4282 DERR("cr_handoff: permission denied\n"); 4283 retval = EPERM; 4284 goto cleanup; 4285 } 4286 4287 D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie); 4288 4289 mutex_enter(&sp_rp->sp_lock); 4290 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie); 4291 /* 4292 * make sure the backlog index is not bogus. 4293 */ 4294 if (bkl_index >= sp_rp->sp_backlog_size) { 4295 DERR("cr_handoff: invalid backlog index 0x%llx %d\n", 4296 (longlong_t)args.crh_bkl_cookie, bkl_index); 4297 mutex_exit(&sp_rp->sp_lock); 4298 retval = EINVAL; 4299 goto cleanup; 4300 } 4301 /* 4302 * make sure the backlog index indeed refers 4303 * to a pending connection. 4304 */ 4305 conn = &sp_rp->sp_backlog[bkl_index]; 4306 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 4307 DERR("cr_handoff: invalid conn state %d\n", 4308 conn->spcp_state); 4309 mutex_exit(&sp_rp->sp_lock); 4310 retval = EINVAL; 4311 goto cleanup; 4312 } 4313 if (conn->spcp_sid == NULL) { 4314 DERR("cr_handoff: sid == NULL\n"); 4315 mutex_exit(&sp_rp->sp_lock); 4316 retval = EINVAL; 4317 goto cleanup; 4318 } 4319 sid = conn->spcp_sid; 4320 priv = NULL; 4321 priv_len = conn->spcp_req_len; 4322 if (priv_len > 0) { 4323 priv = kmem_zalloc(priv_len, daplka_km_flags); 4324 if (priv == NULL) { 4325 mutex_exit(&sp_rp->sp_lock); 4326 retval = ENOMEM; 4327 goto cleanup; 4328 } 4329 bcopy(conn->spcp_req_data, priv, priv_len); 4330 } 4331 /* 4332 * this clears our slot in the backlog array. 4333 * this slot may now be used by other pending connections. 4334 */ 4335 conn->spcp_sid = NULL; 4336 conn->spcp_state = DAPLKA_SPCP_INIT; 4337 conn->spcp_req_len = 0; 4338 mutex_exit(&sp_rp->sp_lock); 4339 4340 /* fill fake_event and call service_req handler */ 4341 bzero(&fake_event, sizeof (fake_event)); 4342 fake_event.cm_type = IBT_CM_EVENT_REQ_RCV; 4343 fake_event.cm_session_id = sid; 4344 fake_event.cm_priv_data_len = priv_len; 4345 fake_event.cm_priv_data = priv; 4346 4347 cm_status = daplka_cm_service_req(new_sp_rp, 4348 &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len); 4349 if (cm_status != IBT_CM_DEFER) { 4350 ibt_cm_proceed_reply_t proc_reply; 4351 4352 DERR("cr_handoff: service_req returned %d\n", cm_status); 4353 /* 4354 * if for some reason cm_service_req failed, we 4355 * reject the connection. 4356 */ 4357 bzero(&proc_reply, sizeof (proc_reply)); 4358 4359 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, 4360 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0); 4361 if (status != IBT_SUCCESS) { 4362 DERR("cr_handoff: ibt_cm_proceed returned %d\n", 4363 status); 4364 } 4365 *rvalp = (int)status; 4366 retval = 0; 4367 } 4368 4369 cleanup:; 4370 if (priv_len > 0 && priv != NULL) { 4371 kmem_free(priv, priv_len); 4372 } 4373 if (new_sp_rp != NULL) { 4374 DAPLKA_RS_UNREF(new_sp_rp); 4375 } 4376 if (sp_rp != NULL) { 4377 DAPLKA_RS_UNREF(sp_rp); 4378 } 4379 D3("cr_handoff: exiting\n"); 4380 return (retval); 4381 } 4382 4383 /* 4384 * returns a list of hca attributes 4385 */ 4386 /* ARGSUSED */ 4387 static int 4388 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4389 cred_t *cred, int *rvalp) 4390 { 4391 dapl_ia_query_t args; 4392 int retval; 4393 ibt_hca_attr_t *hcap; 4394 4395 hcap = &ia_rp->ia_hca->hca_attr; 4396 4397 /* 4398 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t 4399 */ 4400 args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id; 4401 args.hca_attr.dhca_device_id = hcap->hca_device_id; 4402 args.hca_attr.dhca_version_id = hcap->hca_version_id; 4403 args.hca_attr.dhca_max_chans = hcap->hca_max_chans; 4404 args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz; 4405 args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl; 4406 args.hca_attr.dhca_max_cq = hcap->hca_max_cq; 4407 args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz; 4408 args.hca_attr.dhca_max_memr = hcap->hca_max_memr; 4409 args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len; 4410 args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win; 4411 args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan; 4412 args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan; 4413 args.hca_attr.dhca_max_partitions = hcap->hca_max_partitions; 4414 args.hca_attr.dhca_nports = hcap->hca_nports; 4415 args.hca_attr.dhca_node_guid = hcap->hca_node_guid; 4416 args.hca_attr.dhca_max_pd = hcap->hca_max_pd; 4417 args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs; 4418 args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz; 4419 args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl; 4420 4421 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t), 4422 mode); 4423 if (retval != 0) { 4424 DERR("ia_query: copyout error %d\n", retval); 4425 return (EFAULT); 4426 } 4427 return (0); 4428 } 4429 4430 /* 4431 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback, 4432 * it frees the mw embedded in the mw resource object. 4433 */ 4434 4435 /* ARGSUSED */ 4436 static int 4437 daplka_mr_cb_freemw(void *objp, void *arg) 4438 { 4439 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)objp; 4440 ibt_mw_hdl_t mw_hdl; 4441 ibt_status_t status; 4442 4443 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp); 4444 DAPLKA_RS_REF(mw_rp); 4445 4446 mutex_enter(&mw_rp->mw_lock); 4447 mw_hdl = mw_rp->mw_hdl; 4448 /* 4449 * we set mw_hdl to NULL so it won't get freed again 4450 */ 4451 mw_rp->mw_hdl = NULL; 4452 mutex_exit(&mw_rp->mw_lock); 4453 4454 if (mw_hdl != NULL) { 4455 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl); 4456 if (status != IBT_SUCCESS) { 4457 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status); 4458 } 4459 D3("mr_cb_freemw: mw freed\n"); 4460 } 4461 4462 DAPLKA_RS_UNREF(mw_rp); 4463 return (0); 4464 } 4465 4466 /* 4467 * This routine is called from HCA driver's umem lock undo callback 4468 * when the memory associated with an MR is being unmapped. In this callback 4469 * we free all the MW associated with the IA and post an unaffiliated 4470 * async event to tell the app that there was a catastrophic event. 4471 * This allows the HCA to deregister the MR in its callback processing. 4472 */ 4473 static void 4474 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/) 4475 { 4476 daplka_mr_resource_t *mr_rp; 4477 daplka_ia_resource_t *ia_rp; 4478 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB 4479 ibt_async_event_t event; 4480 ibt_hca_attr_t *hca_attrp; 4481 #endif 4482 minor_t rnum; 4483 4484 mr_rp = (daplka_mr_resource_t *)arg1; 4485 rnum = DAPLKA_RS_RNUM(mr_rp); 4486 daplka_shared_mr_free(mr_rp); 4487 4488 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum); 4489 if (ia_rp == NULL) { 4490 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n", 4491 rnum); 4492 return; 4493 } 4494 4495 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum); 4496 4497 mutex_enter(&ia_rp->ia_lock); 4498 /* 4499 * MW is being alloced OR MW freeze has already begun. In 4500 * both these cases we wait for that to complete before 4501 * continuing. 4502 */ 4503 while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) || 4504 (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) { 4505 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock); 4506 } 4507 4508 switch (ia_rp->ia_state) { 4509 case DAPLKA_IA_INIT: 4510 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS; 4511 mutex_exit(&ia_rp->ia_lock); 4512 break; 4513 case DAPLKA_IA_MW_FROZEN: 4514 /* the mw on this ia have been freed */ 4515 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n", 4516 ia_rp->ia_state); 4517 mutex_exit(&ia_rp->ia_lock); 4518 goto cleanup; 4519 default: 4520 ASSERT(!"daplka_mr_unlock_callback: IA state invalid"); 4521 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n", 4522 ia_rp->ia_state); 4523 mutex_exit(&ia_rp->ia_lock); 4524 goto cleanup; 4525 } 4526 4527 /* 4528 * Walk the mw hash table and free the mws. Acquire a writer 4529 * lock since we don't want anyone else traversing this tree 4530 * while we are freeing the MW. 4531 */ 4532 daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL, 4533 RW_WRITER); 4534 4535 mutex_enter(&ia_rp->ia_lock); 4536 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS); 4537 ia_rp->ia_state = DAPLKA_IA_MW_FROZEN; 4538 cv_broadcast(&ia_rp->ia_cv); 4539 mutex_exit(&ia_rp->ia_lock); 4540 4541 /* 4542 * Currently commented out because Oracle skgxp is incapable 4543 * of handling async events correctly. 4544 */ 4545 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB 4546 /* 4547 * Enqueue an unaffiliated async error event to indicate this 4548 * IA has encountered a problem that caused the MW to freed up 4549 */ 4550 4551 /* Create a fake event, only relevant field is the hca_guid */ 4552 bzero(&event, sizeof (ibt_async_event_t)); 4553 hca_attrp = &ia_rp->ia_hca->hca_attr; 4554 event.ev_hca_guid = hca_attrp->hca_node_guid; 4555 4556 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0, 4557 ia_rp); 4558 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */ 4559 4560 cleanup:; 4561 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp); 4562 DAPLKA_RS_UNREF(ia_rp); 4563 } 4564 4565 /* 4566 * registers a memory region. 4567 * memory locking will be done by the HCA driver. 4568 */ 4569 /* ARGSUSED */ 4570 static int 4571 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4572 cred_t *cred, int *rvalp) 4573 { 4574 boolean_t inserted = B_FALSE; 4575 daplka_mr_resource_t *mr_rp; 4576 daplka_pd_resource_t *pd_rp; 4577 dapl_mr_register_t args; 4578 ibt_mr_data_in_t mr_cb_data_in; 4579 uint64_t mr_hkey = 0; 4580 ibt_status_t status; 4581 int retval; 4582 4583 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t), 4584 mode); 4585 if (retval != 0) { 4586 DERR("mr_register: copyin error %d\n", retval); 4587 return (EINVAL); 4588 } 4589 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 4590 if (mr_rp == NULL) { 4591 DERR("mr_register: cannot allocate mr resource\n"); 4592 return (ENOMEM); 4593 } 4594 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 4595 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 4596 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 4597 4598 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 4599 mr_rp->mr_hca = ia_rp->ia_hca; 4600 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 4601 mr_rp->mr_next = NULL; 4602 mr_rp->mr_shared_mr = NULL; 4603 4604 /* get pd handle */ 4605 pd_rp = (daplka_pd_resource_t *) 4606 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey); 4607 if (pd_rp == NULL) { 4608 DERR("mr_register: cannot find pd resource\n"); 4609 retval = EINVAL; 4610 goto cleanup; 4611 } 4612 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 4613 mr_rp->mr_pd_res = pd_rp; 4614 4615 mr_rp->mr_attr.mr_vaddr = args.mr_vaddr; 4616 mr_rp->mr_attr.mr_len = args.mr_len; 4617 mr_rp->mr_attr.mr_as = curproc->p_as; 4618 mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP; 4619 4620 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n", 4621 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr, 4622 (longlong_t)mr_rp->mr_attr.mr_len, 4623 mr_rp->mr_attr.mr_flags); 4624 4625 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl, 4626 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl, 4627 &mr_rp->mr_desc); 4628 4629 if (status != IBT_SUCCESS) { 4630 DERR("mr_register: ibt_register_mr error %d\n", status); 4631 *rvalp = (int)status; 4632 retval = 0; 4633 goto cleanup; 4634 } 4635 4636 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 4637 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 4638 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 4639 mr_cb_data_in.mr_arg2 = NULL; 4640 4641 /* Pass the service driver mr cleanup handler to the hca driver */ 4642 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 4643 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 4644 &mr_cb_data_in, sizeof (mr_cb_data_in)); 4645 4646 if (status != IBT_SUCCESS) { 4647 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)", 4648 status, mr_cb_data_in.mr_rev); 4649 *rvalp = (int)status; 4650 retval = 0; 4651 goto cleanup; 4652 } 4653 4654 /* insert into mr hash table */ 4655 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, 4656 &mr_hkey, (void *)mr_rp); 4657 if (retval != 0) { 4658 DERR("mr_register: cannot insert mr resource into mr_htbl\n"); 4659 goto cleanup; 4660 } 4661 inserted = B_TRUE; 4662 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp)) 4663 4664 args.mr_lkey = mr_rp->mr_desc.md_lkey; 4665 args.mr_rkey = mr_rp->mr_desc.md_rkey; 4666 args.mr_hkey = mr_hkey; 4667 4668 retval = ddi_copyout((void *)&args, (void *)arg, 4669 sizeof (dapl_mr_register_t), mode); 4670 if (retval != 0) { 4671 DERR("mr_register: copyout error %d\n", retval); 4672 retval = EFAULT; 4673 goto cleanup; 4674 } 4675 return (0); 4676 4677 cleanup:; 4678 if (inserted) { 4679 daplka_mr_resource_t *free_rp = NULL; 4680 4681 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 4682 (void **)&free_rp); 4683 if (free_rp != mr_rp) { 4684 DERR("mr_register: cannot remove mr from hash table\n"); 4685 /* 4686 * we can only get here if another thread 4687 * has completed the cleanup in mr_deregister 4688 */ 4689 return (retval); 4690 } 4691 } 4692 DAPLKA_RS_UNREF(mr_rp); 4693 return (retval); 4694 } 4695 4696 /* 4697 * registers a shared memory region. 4698 * the client calls this function with the intention to share the memory 4699 * region with other clients. it is assumed that, prior to calling this 4700 * function, the client(s) are already sharing parts of their address 4701 * space using a mechanism such as SYSV shared memory. the first client 4702 * that calls this function will create and insert a daplka_shared_mr_t 4703 * object into the global daplka_shared_mr_tree. this shared mr object 4704 * will be identified by a unique 40-byte key and will maintain a list 4705 * of mr resources. every time this function gets called with the same 4706 * 40-byte key, a new mr resource (containing a new mr handle generated 4707 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted 4708 * into this list. similarly, every time a shared mr gets deregistered 4709 * or invalidated by a callback, the mr resource gets removed from this 4710 * list. the shared mr object has a reference count. when it drops to 4711 * zero, the shared mr object will be removed from the global avl tree 4712 * and be freed. 4713 */ 4714 /* ARGSUSED */ 4715 static int 4716 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4717 cred_t *cred, int *rvalp) 4718 { 4719 dapl_mr_register_shared_t args; 4720 daplka_shared_mr_t *smrp = NULL; 4721 daplka_shared_mr_t tmp_smr; 4722 ibt_mr_data_in_t mr_cb_data_in; 4723 avl_index_t where; 4724 boolean_t inserted = B_FALSE; 4725 daplka_mr_resource_t *mr_rp = NULL; 4726 daplka_pd_resource_t *pd_rp; 4727 uint64_t mr_hkey = 0; 4728 ibt_status_t status; 4729 int retval; 4730 4731 retval = ddi_copyin((void *)arg, &args, 4732 sizeof (dapl_mr_register_shared_t), mode); 4733 if (retval != 0) { 4734 DERR("mr_register_shared: copyin error %d\n", retval); 4735 return (EINVAL); 4736 } 4737 4738 mutex_enter(&daplka_shared_mr_lock); 4739 /* 4740 * find smrp from the global avl tree. 4741 * the 40-byte key is used as the lookup key. 4742 */ 4743 tmp_smr.smr_cookie = args.mrs_shm_cookie; 4744 smrp = (daplka_shared_mr_t *) 4745 avl_find(&daplka_shared_mr_tree, &tmp_smr, &where); 4746 if (smrp != NULL) { 4747 D2("mr_register_shared: smrp 0x%p, found cookie:\n" 4748 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp, 4749 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4], 4750 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3], 4751 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2], 4752 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1], 4753 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]); 4754 4755 /* 4756 * if the smrp exists, other threads could still be 4757 * accessing it. we wait until they are done before 4758 * we continue. 4759 */ 4760 smrp->smr_refcnt++; 4761 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) { 4762 D2("mr_register_shared: smrp 0x%p, " 4763 "waiting in transitioning state, refcnt %d\n", 4764 smrp, smrp->smr_refcnt); 4765 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock); 4766 } 4767 ASSERT(smrp->smr_state == DAPLKA_SMR_READY); 4768 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n", 4769 smrp, smrp->smr_refcnt); 4770 4771 /* 4772 * we set smr_state to TRANSITIONING to temporarily 4773 * prevent other threads from trying to access smrp. 4774 */ 4775 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 4776 } else { 4777 D2("mr_register_shared: cannot find cookie:\n" 4778 "0x%016llx%016llx%016llx%016llx%016llx\n", 4779 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4], 4780 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3], 4781 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2], 4782 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1], 4783 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]); 4784 4785 /* 4786 * if we cannot find smrp, we need to create and 4787 * insert one into daplka_shared_mr_tree 4788 */ 4789 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t), 4790 daplka_km_flags); 4791 if (smrp == NULL) { 4792 retval = ENOMEM; 4793 mutex_exit(&daplka_shared_mr_lock); 4794 goto cleanup; 4795 } 4796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp)) 4797 smrp->smr_refcnt = 1; 4798 smrp->smr_cookie = args.mrs_shm_cookie; 4799 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 4800 smrp->smr_mr_list = NULL; 4801 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL); 4802 avl_insert(&daplka_shared_mr_tree, smrp, where); 4803 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp)) 4804 } 4805 mutex_exit(&daplka_shared_mr_lock); 4806 4807 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 4808 if (mr_rp == NULL) { 4809 DERR("mr_register_shared: cannot allocate mr resource\n"); 4810 goto cleanup; 4811 } 4812 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 4813 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 4814 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 4815 4816 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 4817 mr_rp->mr_hca = ia_rp->ia_hca; 4818 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 4819 mr_rp->mr_next = NULL; 4820 mr_rp->mr_shared_mr = NULL; 4821 4822 /* get pd handle */ 4823 pd_rp = (daplka_pd_resource_t *) 4824 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey); 4825 if (pd_rp == NULL) { 4826 DERR("mr_register_shared: cannot find pd resource\n"); 4827 retval = EINVAL; 4828 goto cleanup; 4829 } 4830 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 4831 mr_rp->mr_pd_res = pd_rp; 4832 4833 mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr; 4834 mr_rp->mr_attr.mr_len = args.mrs_len; 4835 mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP; 4836 mr_rp->mr_attr.mr_as = curproc->p_as; 4837 4838 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, " 4839 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n", 4840 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr, 4841 (longlong_t)mr_rp->mr_attr.mr_len, 4842 mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as, 4843 (int)(smrp->smr_mr_list != NULL), smrp); 4844 4845 /* 4846 * since we are in TRANSITIONING state, we are guaranteed 4847 * that we have exclusive access to smr_mr_list. 4848 */ 4849 if (smrp->smr_mr_list != NULL) { 4850 ibt_smr_attr_t mem_sattr; 4851 4852 /* 4853 * a non-null smr_mr_list indicates that someone 4854 * else has already inserted an mr_resource into 4855 * smr_mr_list. we use the mr_handle from the first 4856 * element as an arg to ibt_register_shared_mr. 4857 */ 4858 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr; 4859 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags; 4860 4861 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n", 4862 (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags); 4863 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl, 4864 smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl, 4865 &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc); 4866 4867 if (status != IBT_SUCCESS) { 4868 DERR("mr_register_shared: " 4869 "ibt_register_shared_mr error %d\n", status); 4870 *rvalp = (int)status; 4871 retval = 0; 4872 goto cleanup; 4873 } 4874 } else { 4875 /* 4876 * an mr does not exist yet. we need to create one 4877 * using ibt_register_mr. 4878 */ 4879 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl, 4880 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, 4881 &mr_rp->mr_hdl, &mr_rp->mr_desc); 4882 4883 if (status != IBT_SUCCESS) { 4884 DERR("mr_register_shared: " 4885 "ibt_register_mr error %d\n", status); 4886 *rvalp = (int)status; 4887 retval = 0; 4888 goto cleanup; 4889 } 4890 } 4891 4892 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 4893 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 4894 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 4895 mr_cb_data_in.mr_arg2 = NULL; 4896 4897 /* Pass the service driver mr cleanup handler to the hca driver */ 4898 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 4899 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 4900 &mr_cb_data_in, sizeof (mr_cb_data_in)); 4901 4902 if (status != IBT_SUCCESS) { 4903 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)", 4904 status, mr_cb_data_in.mr_rev); 4905 *rvalp = (int)status; 4906 retval = 0; 4907 goto cleanup; 4908 } 4909 4910 /* 4911 * we bump reference of mr_rp and enqueue it onto smrp. 4912 */ 4913 DAPLKA_RS_REF(mr_rp); 4914 mr_rp->mr_next = smrp->smr_mr_list; 4915 smrp->smr_mr_list = mr_rp; 4916 mr_rp->mr_shared_mr = smrp; 4917 4918 /* insert into mr hash table */ 4919 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, 4920 &mr_hkey, (void *)mr_rp); 4921 if (retval != 0) { 4922 DERR("mr_register_shared: cannot insert mr resource\n"); 4923 goto cleanup; 4924 } 4925 inserted = B_TRUE; 4926 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp)) 4927 4928 /* 4929 * at this point, there are two references to our mr resource. 4930 * one is kept in ia_mr_htbl. the other is kept in the list 4931 * within this shared mr object (smrp). when we deregister this 4932 * mr or when a callback invalidates this mr, the reference kept 4933 * by this shared mr object will be removed. 4934 */ 4935 4936 args.mrs_lkey = mr_rp->mr_desc.md_lkey; 4937 args.mrs_rkey = mr_rp->mr_desc.md_rkey; 4938 args.mrs_hkey = mr_hkey; 4939 4940 retval = ddi_copyout((void *)&args, (void *)arg, 4941 sizeof (dapl_mr_register_shared_t), mode); 4942 if (retval != 0) { 4943 DERR("mr_register_shared: copyout error %d\n", retval); 4944 retval = EFAULT; 4945 goto cleanup; 4946 } 4947 4948 /* 4949 * set the state to READY to allow others to continue 4950 */ 4951 mutex_enter(&daplka_shared_mr_lock); 4952 smrp->smr_state = DAPLKA_SMR_READY; 4953 cv_broadcast(&smrp->smr_cv); 4954 mutex_exit(&daplka_shared_mr_lock); 4955 return (0); 4956 4957 cleanup:; 4958 if (inserted) { 4959 daplka_mr_resource_t *free_rp = NULL; 4960 4961 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 4962 (void **)&free_rp); 4963 if (free_rp != mr_rp) { 4964 DERR("mr_register_shared: " 4965 "cannot remove mr from hash table\n"); 4966 /* 4967 * we can only get here if another thread 4968 * has completed the cleanup in mr_deregister 4969 */ 4970 return (retval); 4971 } 4972 } 4973 if (smrp != NULL) { 4974 mutex_enter(&daplka_shared_mr_lock); 4975 ASSERT(smrp->smr_refcnt > 0); 4976 smrp->smr_refcnt--; 4977 4978 if (smrp->smr_refcnt == 0) { 4979 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp); 4980 avl_remove(&daplka_shared_mr_tree, smrp); 4981 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp)) 4982 if (smrp->smr_mr_list != NULL) { 4983 /* 4984 * the refcnt is 0. if there is anything 4985 * left on the list, it must be ours. 4986 */ 4987 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 4988 ASSERT(smrp->smr_mr_list == mr_rp); 4989 DAPLKA_RS_UNREF(mr_rp); 4990 smrp->smr_mr_list = NULL; 4991 ASSERT(mr_rp->mr_shared_mr == smrp); 4992 mr_rp->mr_shared_mr = NULL; 4993 ASSERT(mr_rp->mr_next == NULL); 4994 } 4995 smrp->smr_state = DAPLKA_SMR_FREED; 4996 cv_destroy(&smrp->smr_cv); 4997 kmem_free(smrp, sizeof (daplka_shared_mr_t)); 4998 } else { 4999 DERR("mr_register_shared: resetting smr_state " 5000 "smrp 0x%p, %d waiters remain\n", smrp, 5001 smrp->smr_refcnt); 5002 ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING); 5003 if (smrp->smr_mr_list != NULL && mr_rp != NULL) { 5004 daplka_mr_resource_t **mpp; 5005 5006 /* 5007 * search and remove mr_rp from smr_mr_list 5008 */ 5009 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 5010 mpp = &smrp->smr_mr_list; 5011 while (*mpp != NULL) { 5012 if (*mpp == mr_rp) { 5013 *mpp = (*mpp)->mr_next; 5014 DAPLKA_RS_UNREF(mr_rp); 5015 ASSERT(mr_rp->mr_shared_mr == 5016 smrp); 5017 mr_rp->mr_shared_mr = NULL; 5018 mr_rp->mr_next = NULL; 5019 break; 5020 } 5021 mpp = &(*mpp)->mr_next; 5022 } 5023 } 5024 /* 5025 * note that smr_state == READY does not necessarily 5026 * mean that smr_mr_list is non empty. for this case, 5027 * we are doing cleanup because of a failure. we set 5028 * the state to READY to allow other threads to 5029 * continue. 5030 */ 5031 smrp->smr_state = DAPLKA_SMR_READY; 5032 cv_broadcast(&smrp->smr_cv); 5033 } 5034 mutex_exit(&daplka_shared_mr_lock); 5035 } 5036 if (mr_rp != NULL) { 5037 DAPLKA_RS_UNREF(mr_rp); 5038 } 5039 return (retval); 5040 } 5041 5042 /* 5043 * registers a memory region using the attributes of an 5044 * existing region. 5045 */ 5046 /* ARGSUSED */ 5047 static int 5048 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5049 cred_t *cred, int *rvalp) 5050 { 5051 boolean_t inserted = B_FALSE; 5052 dapl_mr_register_lmr_t args; 5053 ibt_mr_data_in_t mr_cb_data_in; 5054 daplka_mr_resource_t *orig_mr_rp = NULL; 5055 daplka_mr_resource_t *mr_rp; 5056 ibt_smr_attr_t mem_sattr; 5057 uint64_t mr_hkey = 0; 5058 ibt_status_t status; 5059 int retval; 5060 5061 retval = ddi_copyin((void *)arg, &args, 5062 sizeof (dapl_mr_register_lmr_t), mode); 5063 if (retval != 0) { 5064 DERR("mr_register_lmr: copyin error %d\n", retval); 5065 return (EINVAL); 5066 } 5067 orig_mr_rp = (daplka_mr_resource_t *) 5068 daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey); 5069 if (orig_mr_rp == NULL) { 5070 DERR("mr_register_lmr: cannot find mr resource\n"); 5071 return (EINVAL); 5072 } 5073 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR); 5074 5075 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 5076 if (mr_rp == NULL) { 5077 DERR("mr_register_lmr: cannot allocate mr resource\n"); 5078 retval = ENOMEM; 5079 goto cleanup; 5080 } 5081 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 5082 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 5083 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 5084 5085 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 5086 mr_rp->mr_hca = ia_rp->ia_hca; 5087 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 5088 mr_rp->mr_next = NULL; 5089 mr_rp->mr_shared_mr = NULL; 5090 5091 DAPLKA_RS_REF(orig_mr_rp->mr_pd_res); 5092 mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res; 5093 mr_rp->mr_attr = orig_mr_rp->mr_attr; 5094 5095 /* Pass the IO addr that was returned while allocating the orig MR */ 5096 mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr; 5097 mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP; 5098 5099 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl, 5100 orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr, 5101 &mr_rp->mr_hdl, &mr_rp->mr_desc); 5102 5103 if (status != IBT_SUCCESS) { 5104 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n", 5105 status); 5106 *rvalp = (int)status; 5107 retval = 0; 5108 goto cleanup; 5109 } 5110 5111 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 5112 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 5113 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 5114 mr_cb_data_in.mr_arg2 = NULL; 5115 5116 /* Pass the service driver mr cleanup handler to the hca driver */ 5117 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 5118 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 5119 &mr_cb_data_in, sizeof (mr_cb_data_in)); 5120 5121 if (status != IBT_SUCCESS) { 5122 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)", 5123 status, mr_cb_data_in.mr_rev); 5124 *rvalp = (int)status; 5125 retval = 0; 5126 goto cleanup; 5127 } 5128 mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len; 5129 mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags; 5130 5131 /* insert into mr hash table */ 5132 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey, 5133 (void *)mr_rp); 5134 if (retval != 0) { 5135 DERR("mr_register: cannot insert mr resource into mr_htbl\n"); 5136 goto cleanup; 5137 } 5138 inserted = B_TRUE; 5139 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp)) 5140 5141 args.mrl_lkey = mr_rp->mr_desc.md_lkey; 5142 args.mrl_rkey = mr_rp->mr_desc.md_rkey; 5143 args.mrl_hkey = mr_hkey; 5144 5145 retval = ddi_copyout((void *)&args, (void *)arg, 5146 sizeof (dapl_mr_register_lmr_t), mode); 5147 if (retval != 0) { 5148 DERR("mr_register_lmr: copyout error %d\n", retval); 5149 retval = EFAULT; 5150 goto cleanup; 5151 } 5152 if (orig_mr_rp != NULL) { 5153 DAPLKA_RS_UNREF(orig_mr_rp); 5154 } 5155 return (0); 5156 5157 cleanup:; 5158 if (inserted) { 5159 daplka_mr_resource_t *free_rp = NULL; 5160 5161 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 5162 (void **)&free_rp); 5163 if (free_rp != mr_rp) { 5164 DERR("mr_register: cannot remove mr from hash table\n"); 5165 /* 5166 * we can only get here if another thread 5167 * has completed the cleanup in mr_deregister 5168 */ 5169 return (retval); 5170 } 5171 } 5172 if (orig_mr_rp != NULL) { 5173 DAPLKA_RS_UNREF(orig_mr_rp); 5174 } 5175 if (mr_rp != NULL) { 5176 DAPLKA_RS_UNREF(mr_rp); 5177 } 5178 return (retval); 5179 } 5180 5181 /* 5182 * this function is called by mr_deregister and mr_cleanup_callback to 5183 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr. 5184 * if mr_shared_mr is already NULL, that means the region being 5185 * deregistered or invalidated is not a shared mr region and we can 5186 * return immediately. 5187 */ 5188 static void 5189 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp) 5190 { 5191 daplka_shared_mr_t *smrp; 5192 5193 /* 5194 * we need a lock because mr_callback also checks this field. 5195 * for the rare case that mr_deregister and mr_cleanup_callback 5196 * gets called simultaneously, we are guaranteed that smrp won't 5197 * be dereferenced twice because either function will find 5198 * mr_shared_mr to be NULL. 5199 */ 5200 mutex_enter(&mr_rp->mr_lock); 5201 smrp = mr_rp->mr_shared_mr; 5202 mr_rp->mr_shared_mr = NULL; 5203 mutex_exit(&mr_rp->mr_lock); 5204 5205 if (smrp != NULL) { 5206 daplka_mr_resource_t **mpp; 5207 boolean_t mr_found = B_FALSE; 5208 5209 mutex_enter(&daplka_shared_mr_lock); 5210 ASSERT(smrp->smr_refcnt > 0); 5211 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) { 5212 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock); 5213 } 5214 ASSERT(smrp->smr_state == DAPLKA_SMR_READY); 5215 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 5216 smrp->smr_refcnt--; 5217 5218 /* 5219 * search and remove mr_rp from smr_mr_list. 5220 * also UNREF mr_rp because it is no longer 5221 * on the list. 5222 */ 5223 mpp = &smrp->smr_mr_list; 5224 while (*mpp != NULL) { 5225 if (*mpp == mr_rp) { 5226 *mpp = (*mpp)->mr_next; 5227 DAPLKA_RS_UNREF(mr_rp); 5228 mr_rp->mr_next = NULL; 5229 mr_found = B_TRUE; 5230 break; 5231 } 5232 mpp = &(*mpp)->mr_next; 5233 } 5234 /* 5235 * since mr_clean_callback may not touch smr_mr_list 5236 * at this time (due to smr_state), we can be sure 5237 * that we can find and remove mr_rp from smr_mr_list 5238 */ 5239 ASSERT(mr_found); 5240 if (smrp->smr_refcnt == 0) { 5241 D3("shared_mr_free: freeing smrp 0x%p\n", smrp); 5242 avl_remove(&daplka_shared_mr_tree, smrp); 5243 ASSERT(smrp->smr_mr_list == NULL); 5244 smrp->smr_state = DAPLKA_SMR_FREED; 5245 cv_destroy(&smrp->smr_cv); 5246 kmem_free(smrp, sizeof (daplka_shared_mr_t)); 5247 } else { 5248 D3("shared_mr_free: smrp 0x%p, refcnt %d\n", 5249 smrp, smrp->smr_refcnt); 5250 smrp->smr_state = DAPLKA_SMR_READY; 5251 cv_broadcast(&smrp->smr_cv); 5252 } 5253 mutex_exit(&daplka_shared_mr_lock); 5254 } 5255 } 5256 5257 /* 5258 * deregisters a memory region. 5259 * if mr is shared, remove reference from global shared mr object. 5260 * release the initial reference to the mr. if the mr's refcnt is 5261 * zero, call mr_destroy to free mr. 5262 */ 5263 /* ARGSUSED */ 5264 static int 5265 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5266 cred_t *cred, int *rvalp) 5267 { 5268 daplka_mr_resource_t *mr_rp; 5269 dapl_mr_deregister_t args; 5270 int retval; 5271 5272 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t), 5273 mode); 5274 if (retval != 0) { 5275 DERR("mr_deregister: copyin error %d\n", retval); 5276 return (EINVAL); 5277 } 5278 retval = daplka_hash_remove(&ia_rp->ia_mr_htbl, 5279 args.mrd_hkey, (void **)&mr_rp); 5280 if (retval != 0 || mr_rp == NULL) { 5281 DERR("mr_deregister: cannot find mr resource\n"); 5282 return (EINVAL); 5283 } 5284 ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR); 5285 5286 daplka_shared_mr_free(mr_rp); 5287 DAPLKA_RS_UNREF(mr_rp); 5288 return (0); 5289 } 5290 5291 /* 5292 * sync local memory regions on RDMA read or write. 5293 */ 5294 /* ARGSUSED */ 5295 static int 5296 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5297 cred_t *cred, int *rvalp) 5298 { 5299 dapl_mr_sync_t args; 5300 daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC]; 5301 ibt_mr_sync_t mrs[DAPL_MR_PER_SYNC]; 5302 uint32_t sync_direction_flags; 5303 ibt_status_t status; 5304 int i, j; 5305 int retval; 5306 5307 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode); 5308 if (retval != 0) { 5309 DERR("mr_sync: copyin error %d\n", retval); 5310 return (EFAULT); 5311 } 5312 5313 /* number of segments bound check */ 5314 if (args.mrs_numseg > DAPL_MR_PER_SYNC) { 5315 DERR("mr_sync: number of segments too large\n"); 5316 return (EINVAL); 5317 } 5318 5319 /* translate MR sync direction flag */ 5320 if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) { 5321 sync_direction_flags = IBT_SYNC_READ; 5322 } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) { 5323 sync_direction_flags = IBT_SYNC_WRITE; 5324 } else { 5325 DERR("mr_sync: unknown flags\n"); 5326 return (EINVAL); 5327 } 5328 5329 /* 5330 * all the segments are going to be sync'd by ibtl together 5331 */ 5332 for (i = 0; i < args.mrs_numseg; i++) { 5333 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup( 5334 &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey); 5335 if (mr_rp[i] == NULL) { 5336 for (j = 0; j < i; j++) { 5337 DAPLKA_RS_UNREF(mr_rp[j]); 5338 } 5339 DERR("mr_sync: lookup error\n"); 5340 return (EINVAL); 5341 } 5342 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR); 5343 mrs[i].ms_handle = mr_rp[i]->mr_hdl; 5344 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va; 5345 mrs[i].ms_len = args.mrs_vec[i].mrsv_len; 5346 mrs[i].ms_flags = sync_direction_flags; 5347 } 5348 5349 status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg); 5350 if (status != IBT_SUCCESS) { 5351 DERR("mr_sync: ibt_sync_mr error %d\n", status); 5352 *rvalp = (int)status; 5353 } 5354 for (i = 0; i < args.mrs_numseg; i++) { 5355 DAPLKA_RS_UNREF(mr_rp[i]); 5356 } 5357 return (0); 5358 } 5359 5360 /* 5361 * destroys a memory region. 5362 * called when refcnt drops to zero. 5363 */ 5364 static int 5365 daplka_mr_destroy(daplka_resource_t *gen_rp) 5366 { 5367 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)gen_rp; 5368 ibt_status_t status; 5369 5370 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp)) 5371 ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0); 5372 ASSERT(mr_rp->mr_shared_mr == NULL); 5373 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n", 5374 mr_rp, DAPLKA_RS_RNUM(mr_rp)); 5375 5376 /* 5377 * deregister mr 5378 */ 5379 if (mr_rp->mr_hdl) { 5380 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl, 5381 mr_rp->mr_hdl); 5382 if (status != IBT_SUCCESS) { 5383 DERR("mr_destroy: ibt_deregister_mr returned %d\n", 5384 status); 5385 } 5386 mr_rp->mr_hdl = NULL; 5387 D3("mr_destroy: mr deregistered\n"); 5388 } 5389 mr_rp->mr_attr.mr_vaddr = 0; 5390 5391 /* 5392 * release reference on PD 5393 */ 5394 if (mr_rp->mr_pd_res != NULL) { 5395 DAPLKA_RS_UNREF(mr_rp->mr_pd_res); 5396 mr_rp->mr_pd_res = NULL; 5397 } 5398 mutex_destroy(&mr_rp->mr_lock); 5399 DAPLKA_RS_FINI(mr_rp); 5400 kmem_free(mr_rp, sizeof (daplka_mr_resource_t)); 5401 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp); 5402 return (0); 5403 } 5404 5405 /* 5406 * this function is called by daplka_hash_destroy for 5407 * freeing MR resource objects 5408 */ 5409 static void 5410 daplka_hash_mr_free(void *obj) 5411 { 5412 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)obj; 5413 5414 daplka_shared_mr_free(mr_rp); 5415 DAPLKA_RS_UNREF(mr_rp); 5416 } 5417 5418 /* 5419 * comparison function used for finding a shared mr object 5420 * from the global shared mr avl tree. 5421 */ 5422 static int 5423 daplka_shared_mr_cmp(const void *smr1, const void *smr2) 5424 { 5425 daplka_shared_mr_t *s1 = (daplka_shared_mr_t *)smr1; 5426 daplka_shared_mr_t *s2 = (daplka_shared_mr_t *)smr2; 5427 int i; 5428 5429 for (i = 4; i >= 0; i--) { 5430 if (s1->smr_cookie.mc_uint_arr[i] < 5431 s2->smr_cookie.mc_uint_arr[i]) { 5432 return (-1); 5433 } 5434 if (s1->smr_cookie.mc_uint_arr[i] > 5435 s2->smr_cookie.mc_uint_arr[i]) { 5436 return (1); 5437 } 5438 } 5439 return (0); 5440 } 5441 5442 /* 5443 * allocates a protection domain. 5444 */ 5445 /* ARGSUSED */ 5446 static int 5447 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5448 cred_t *cred, int *rvalp) 5449 { 5450 dapl_pd_alloc_t args; 5451 daplka_pd_resource_t *pd_rp; 5452 ibt_status_t status; 5453 uint64_t pd_hkey = 0; 5454 boolean_t inserted = B_FALSE; 5455 int retval; 5456 5457 pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags); 5458 if (pd_rp == NULL) { 5459 DERR("pd_alloc: cannot allocate pd resource\n"); 5460 return (ENOMEM); 5461 } 5462 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp)) 5463 DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD, 5464 DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy); 5465 5466 pd_rp->pd_hca = ia_rp->ia_hca; 5467 pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl; 5468 status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl, 5469 IBT_PD_NO_FLAGS, &pd_rp->pd_hdl); 5470 if (status != IBT_SUCCESS) { 5471 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status); 5472 *rvalp = (int)status; 5473 retval = 0; 5474 goto cleanup; 5475 } 5476 5477 /* insert into pd hash table */ 5478 retval = daplka_hash_insert(&ia_rp->ia_pd_htbl, 5479 &pd_hkey, (void *)pd_rp); 5480 if (retval != 0) { 5481 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n"); 5482 goto cleanup; 5483 } 5484 inserted = B_TRUE; 5485 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp)) 5486 5487 /* return hkey to library */ 5488 args.pda_hkey = pd_hkey; 5489 5490 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t), 5491 mode); 5492 if (retval != 0) { 5493 DERR("pd_alloc: copyout error %d\n", retval); 5494 retval = EFAULT; 5495 goto cleanup; 5496 } 5497 return (0); 5498 5499 cleanup:; 5500 if (inserted) { 5501 daplka_pd_resource_t *free_rp = NULL; 5502 5503 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey, 5504 (void **)&free_rp); 5505 if (free_rp != pd_rp) { 5506 DERR("pd_alloc: cannot remove pd from hash table\n"); 5507 /* 5508 * we can only get here if another thread 5509 * has completed the cleanup in pd_free 5510 */ 5511 return (retval); 5512 } 5513 } 5514 DAPLKA_RS_UNREF(pd_rp); 5515 return (retval); 5516 } 5517 5518 /* 5519 * destroys a protection domain. 5520 * called when refcnt drops to zero. 5521 */ 5522 static int 5523 daplka_pd_destroy(daplka_resource_t *gen_rp) 5524 { 5525 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp; 5526 ibt_status_t status; 5527 5528 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp)) 5529 ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0); 5530 D3("pd_destroy: entering, pd_rp %p, rnum %d\n", 5531 pd_rp, DAPLKA_RS_RNUM(pd_rp)); 5532 5533 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5534 if (pd_rp->pd_hdl != NULL) { 5535 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl, 5536 pd_rp->pd_hdl); 5537 if (status != IBT_SUCCESS) { 5538 DERR("pd_destroy: ibt_free_pd returned %d\n", status); 5539 } 5540 } 5541 DAPLKA_RS_FINI(pd_rp); 5542 kmem_free(pd_rp, sizeof (daplka_pd_resource_t)); 5543 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp); 5544 return (0); 5545 } 5546 5547 static void 5548 daplka_hash_pd_free(void *obj) 5549 { 5550 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj; 5551 5552 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5553 DAPLKA_RS_UNREF(pd_rp); 5554 } 5555 5556 /* 5557 * removes the pd reference from ia_pd_htbl and releases the 5558 * initial reference to the pd. also destroys the pd if the refcnt 5559 * is zero. 5560 */ 5561 /* ARGSUSED */ 5562 static int 5563 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5564 cred_t *cred, int *rvalp) 5565 { 5566 daplka_pd_resource_t *pd_rp; 5567 dapl_pd_free_t args; 5568 int retval; 5569 5570 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode); 5571 if (retval != 0) { 5572 DERR("pd_free: copyin error %d\n", retval); 5573 return (EINVAL); 5574 } 5575 5576 retval = daplka_hash_remove(&ia_rp->ia_pd_htbl, 5577 args.pdf_hkey, (void **)&pd_rp); 5578 if (retval != 0 || pd_rp == NULL) { 5579 DERR("pd_free: cannot find pd resource\n"); 5580 return (EINVAL); 5581 } 5582 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5583 5584 /* UNREF calls the actual free function when refcnt is zero */ 5585 DAPLKA_RS_UNREF(pd_rp); 5586 return (0); 5587 } 5588 5589 /* 5590 * allocates a memory window 5591 */ 5592 /* ARGSUSED */ 5593 static int 5594 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5595 cred_t *cred, int *rvalp) 5596 { 5597 daplka_pd_resource_t *pd_rp; 5598 daplka_mw_resource_t *mw_rp; 5599 dapl_mw_alloc_t args; 5600 ibt_status_t status; 5601 boolean_t inserted = B_FALSE; 5602 uint64_t mw_hkey; 5603 ibt_rkey_t mw_rkey; 5604 int retval; 5605 5606 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode); 5607 if (retval != 0) { 5608 DERR("mw_alloc: copyin error %d\n", retval); 5609 return (EFAULT); 5610 } 5611 5612 /* 5613 * Allocate and initialize a MW resource 5614 */ 5615 mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags); 5616 if (mw_rp == NULL) { 5617 DERR("mw_alloc: cannot allocate mw resource\n"); 5618 return (ENOMEM); 5619 } 5620 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp)) 5621 DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW, 5622 DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy); 5623 5624 mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL); 5625 mw_rp->mw_hca = ia_rp->ia_hca; 5626 mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl; 5627 5628 /* get pd handle */ 5629 pd_rp = (daplka_pd_resource_t *) 5630 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey); 5631 if (pd_rp == NULL) { 5632 DERR("mw_alloc: cannot find pd resource\n"); 5633 goto cleanup; 5634 } 5635 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5636 5637 mw_rp->mw_pd_res = pd_rp; 5638 5639 status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl, 5640 pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey); 5641 5642 if (status != IBT_SUCCESS) { 5643 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status); 5644 *rvalp = (int)status; 5645 retval = 0; 5646 goto cleanup; 5647 } 5648 5649 mutex_enter(&ia_rp->ia_lock); 5650 switch (ia_rp->ia_state) { 5651 case DAPLKA_IA_INIT: 5652 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS; 5653 ia_rp->ia_mw_alloccnt++; 5654 retval = 0; 5655 break; 5656 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS: 5657 /* another mw_alloc is already in progress increase cnt */ 5658 ia_rp->ia_mw_alloccnt++; 5659 retval = 0; 5660 break; 5661 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS: 5662 /* FALLTHRU */ 5663 case DAPLKA_IA_MW_FROZEN: 5664 /* 5665 * IA is being or already frozen don't allow more MWs to be 5666 * allocated. 5667 */ 5668 DERR("mw_alloc: IA is freezing MWs (state=%d)\n", 5669 ia_rp->ia_state); 5670 retval = EINVAL; 5671 break; 5672 default: 5673 ASSERT(!"Invalid IA state in mw_alloc"); 5674 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state); 5675 retval = EINVAL; 5676 break; 5677 } 5678 mutex_exit(&ia_rp->ia_lock); 5679 /* retval is 0 when ia_mw_alloccnt is incremented */ 5680 if (retval != 0) { 5681 goto cleanup; 5682 } 5683 5684 /* insert into mw hash table */ 5685 mw_hkey = 0; 5686 retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey, 5687 (void *)mw_rp); 5688 if (retval != 0) { 5689 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n"); 5690 mutex_enter(&ia_rp->ia_lock); 5691 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS); 5692 ia_rp->ia_mw_alloccnt--; 5693 if (ia_rp->ia_mw_alloccnt == 0) { 5694 ia_rp->ia_state = DAPLKA_IA_INIT; 5695 cv_broadcast(&ia_rp->ia_cv); 5696 } 5697 mutex_exit(&ia_rp->ia_lock); 5698 goto cleanup; 5699 } 5700 inserted = B_TRUE; 5701 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp)) 5702 5703 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n", 5704 mw_rp->mw_hdl, (longlong_t)mw_rkey); 5705 5706 mutex_enter(&ia_rp->ia_lock); 5707 /* 5708 * We are done with mw_alloc if this was the last mw_alloc 5709 * change state back to DAPLKA_IA_INIT and wake up waiters 5710 * specifically the unlock callback. 5711 */ 5712 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS); 5713 ia_rp->ia_mw_alloccnt--; 5714 if (ia_rp->ia_mw_alloccnt == 0) { 5715 ia_rp->ia_state = DAPLKA_IA_INIT; 5716 cv_broadcast(&ia_rp->ia_cv); 5717 } 5718 mutex_exit(&ia_rp->ia_lock); 5719 5720 args.mw_hkey = mw_hkey; 5721 args.mw_rkey = mw_rkey; 5722 5723 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t), 5724 mode); 5725 if (retval != 0) { 5726 DERR("mw_alloc: copyout error %d\n", retval); 5727 retval = EFAULT; 5728 goto cleanup; 5729 } 5730 return (0); 5731 5732 cleanup:; 5733 if (inserted) { 5734 daplka_mw_resource_t *free_rp = NULL; 5735 5736 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey, 5737 (void **)&free_rp); 5738 if (free_rp != mw_rp) { 5739 DERR("mw_alloc: cannot remove mw from hash table\n"); 5740 /* 5741 * we can only get here if another thread 5742 * has completed the cleanup in mw_free 5743 */ 5744 return (retval); 5745 } 5746 } 5747 DAPLKA_RS_UNREF(mw_rp); 5748 return (retval); 5749 } 5750 5751 /* 5752 * removes the mw reference from ia_mw_htbl and releases the 5753 * initial reference to the mw. also destroys the mw if the refcnt 5754 * is zero. 5755 */ 5756 /* ARGSUSED */ 5757 static int 5758 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5759 cred_t *cred, int *rvalp) 5760 { 5761 daplka_mw_resource_t *mw_rp = NULL; 5762 dapl_mw_free_t args; 5763 int retval = 0; 5764 5765 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode); 5766 if (retval != 0) { 5767 DERR("mw_free: copyin error %d\n", retval); 5768 return (EFAULT); 5769 } 5770 5771 retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey, 5772 (void **)&mw_rp); 5773 if (retval != 0 || mw_rp == NULL) { 5774 DERR("mw_free: cannot find mw resrc (0x%llx)\n", 5775 (longlong_t)args.mw_hkey); 5776 return (EINVAL); 5777 } 5778 5779 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW); 5780 5781 /* UNREF calls the actual free function when refcnt is zero */ 5782 DAPLKA_RS_UNREF(mw_rp); 5783 return (retval); 5784 } 5785 5786 /* 5787 * destroys the memory window. 5788 * called when refcnt drops to zero. 5789 */ 5790 static int 5791 daplka_mw_destroy(daplka_resource_t *gen_rp) 5792 { 5793 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)gen_rp; 5794 ibt_status_t status; 5795 5796 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp)) 5797 ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0); 5798 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n", 5799 mw_rp, DAPLKA_RS_RNUM(mw_rp)); 5800 5801 /* 5802 * free memory window 5803 */ 5804 if (mw_rp->mw_hdl) { 5805 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, 5806 mw_rp->mw_hdl); 5807 if (status != IBT_SUCCESS) { 5808 DERR("mw_destroy: ibt_free_mw returned %d\n", status); 5809 } 5810 mw_rp->mw_hdl = NULL; 5811 D3("mw_destroy: mw freed\n"); 5812 } 5813 5814 /* 5815 * release reference on PD 5816 */ 5817 if (mw_rp->mw_pd_res != NULL) { 5818 DAPLKA_RS_UNREF(mw_rp->mw_pd_res); 5819 mw_rp->mw_pd_res = NULL; 5820 } 5821 mutex_destroy(&mw_rp->mw_lock); 5822 DAPLKA_RS_FINI(mw_rp); 5823 kmem_free(mw_rp, sizeof (daplka_mw_resource_t)); 5824 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp); 5825 return (0); 5826 } 5827 5828 static void 5829 daplka_hash_mw_free(void *obj) 5830 { 5831 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj; 5832 5833 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW); 5834 DAPLKA_RS_UNREF(mw_rp); 5835 } 5836 5837 /* 5838 * SRQ ioctls and supporting functions 5839 */ 5840 /* ARGSUSED */ 5841 static int 5842 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5843 cred_t *cred, int *rvalp) 5844 { 5845 daplka_srq_resource_t *srq_rp; 5846 daplka_pd_resource_t *pd_rp; 5847 dapl_srq_create_t args; 5848 ibt_srq_sizes_t srq_sizes; 5849 ibt_srq_sizes_t srq_real_sizes; 5850 ibt_hca_attr_t *hca_attrp; 5851 uint64_t srq_hkey = 0; 5852 boolean_t inserted = B_FALSE; 5853 int retval; 5854 ibt_status_t status; 5855 5856 D3("srq_create: enter\n"); 5857 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t), 5858 mode); 5859 if (retval != 0) { 5860 DERR("srq_create: copyin error %d\n", retval); 5861 return (EFAULT); 5862 } 5863 srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags); 5864 if (srq_rp == NULL) { 5865 DERR("srq_create: cannot allocate ep_rp\n"); 5866 return (ENOMEM); 5867 } 5868 DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ, 5869 DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy); 5870 5871 srq_rp->srq_hca = ia_rp->ia_hca; 5872 srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl; 5873 mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL); 5874 5875 /* get pd handle */ 5876 pd_rp = (daplka_pd_resource_t *) 5877 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey); 5878 if (pd_rp == NULL) { 5879 DERR("srq_create: cannot find pd resource\n"); 5880 retval = EINVAL; 5881 goto cleanup; 5882 } 5883 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5884 srq_rp->srq_pd_res = pd_rp; 5885 5886 /* 5887 * these checks ensure that the requested SRQ sizes 5888 * are within the limits supported by the chosen HCA. 5889 */ 5890 hca_attrp = &ia_rp->ia_hca->hca_attr; 5891 if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) { 5892 DERR("srq_create: invalid srqs_sz %d\n", 5893 args.srqc_sizes.srqs_sz); 5894 retval = EINVAL; 5895 goto cleanup; 5896 } 5897 if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) { 5898 DERR("srq_create: invalid srqs_sgl %d\n", 5899 args.srqc_sizes.srqs_sgl); 5900 retval = EINVAL; 5901 goto cleanup; 5902 } 5903 5904 D3("srq_create: srq_sgl %d, srq_sz %d\n", 5905 args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz); 5906 5907 srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz; 5908 srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl; 5909 5910 /* create srq */ 5911 status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl, 5912 IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl, 5913 &srq_real_sizes); 5914 if (status != IBT_SUCCESS) { 5915 DERR("srq_create: alloc_srq returned %d\n", status); 5916 *rvalp = (int)status; 5917 retval = 0; 5918 goto cleanup; 5919 } 5920 5921 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz; 5922 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz; 5923 5924 /* Get HCA-specific data_out info */ 5925 status = ibt_ci_data_out(ia_rp->ia_hca_hdl, 5926 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl, 5927 &args.srqc_data_out, sizeof (args.srqc_data_out)); 5928 5929 if (status != IBT_SUCCESS) { 5930 DERR("srq_create: ibt_ci_data_out error(%d)\n", status); 5931 *rvalp = (int)status; 5932 retval = 0; 5933 goto cleanup; 5934 } 5935 5936 srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz; 5937 5938 /* preparing to copyout map_data back to the library */ 5939 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz; 5940 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz; 5941 5942 /* insert into srq hash table */ 5943 retval = daplka_hash_insert(&ia_rp->ia_srq_htbl, 5944 &srq_hkey, (void *)srq_rp); 5945 if (retval != 0) { 5946 DERR("srq_create: cannot insert srq resource into srq_htbl\n"); 5947 goto cleanup; 5948 } 5949 inserted = B_TRUE; 5950 5951 /* return hkey to library */ 5952 args.srqc_hkey = srq_hkey; 5953 5954 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t), 5955 mode); 5956 if (retval != 0) { 5957 DERR("srq_create: copyout error %d\n", retval); 5958 retval = EFAULT; 5959 goto cleanup; 5960 } 5961 5962 D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey); 5963 D3(" sz(%d) sgl(%d)\n", 5964 args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl); 5965 D3("srq_create: exit\n"); 5966 return (0); 5967 5968 cleanup: 5969 if (inserted) { 5970 daplka_srq_resource_t *free_rp = NULL; 5971 5972 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey, 5973 (void **)&free_rp); 5974 if (free_rp != srq_rp) { 5975 /* 5976 * this case is impossible because ep_free will 5977 * wait until our state transition is complete. 5978 */ 5979 DERR("srq_create: cannot remove srq from hash table\n"); 5980 ASSERT(B_FALSE); 5981 return (retval); 5982 } 5983 } 5984 DAPLKA_RS_UNREF(srq_rp); 5985 return (retval); 5986 } 5987 5988 /* 5989 * Resize an existing SRQ 5990 */ 5991 /* ARGSUSED */ 5992 static int 5993 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5994 cred_t *cred, int *rvalp) 5995 { 5996 daplka_srq_resource_t *srq_rp = NULL; 5997 ibt_hca_attr_t *hca_attrp; 5998 dapl_srq_resize_t args; 5999 ibt_status_t status; 6000 int retval = 0; 6001 6002 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t), 6003 mode); 6004 if (retval != 0) { 6005 DERR("srq_resize: copyin error %d\n", retval); 6006 return (EFAULT); 6007 } 6008 6009 /* get srq resource */ 6010 srq_rp = (daplka_srq_resource_t *) 6011 daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey); 6012 if (srq_rp == NULL) { 6013 DERR("srq_resize: cannot find srq resource\n"); 6014 return (EINVAL); 6015 } 6016 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ); 6017 6018 hca_attrp = &ia_rp->ia_hca->hca_attr; 6019 if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) { 6020 DERR("srq_resize: invalid srq size %d", args.srqr_new_size); 6021 retval = EINVAL; 6022 goto cleanup; 6023 } 6024 6025 mutex_enter(&srq_rp->srq_lock); 6026 /* 6027 * If ibt_resize_srq fails that it is primarily due to resource 6028 * shortage. Per IB spec resize will never loose events and 6029 * a resize error leaves the SRQ intact. Therefore even if the 6030 * resize request fails we proceed and get the mapping data 6031 * from the SRQ so that the library can mmap it. 6032 */ 6033 status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE, 6034 args.srqr_new_size, 0, &args.srqr_real_size); 6035 if (status != IBT_SUCCESS) { 6036 /* we return the size of the old CQ if resize fails */ 6037 args.srqr_real_size = srq_rp->srq_real_size; 6038 ASSERT(status != IBT_SRQ_HDL_INVALID); 6039 DERR("srq_resize: ibt_modify_srq failed:%d\n", status); 6040 } else { 6041 srq_rp->srq_real_size = args.srqr_real_size; 6042 } 6043 mutex_exit(&srq_rp->srq_lock); 6044 6045 6046 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n", 6047 DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size); 6048 6049 /* Get HCA-specific data_out info */ 6050 status = ibt_ci_data_out(srq_rp->srq_hca_hdl, 6051 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl, 6052 &args.srqr_data_out, sizeof (args.srqr_data_out)); 6053 if (status != IBT_SUCCESS) { 6054 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status); 6055 /* return ibt_ci_data_out status */ 6056 *rvalp = (int)status; 6057 retval = 0; 6058 goto cleanup; 6059 } 6060 6061 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t), 6062 mode); 6063 if (retval != 0) { 6064 DERR("srq_resize: copyout error %d\n", retval); 6065 retval = EFAULT; 6066 goto cleanup; 6067 } 6068 6069 cleanup:; 6070 if (srq_rp != NULL) { 6071 DAPLKA_RS_UNREF(srq_rp); 6072 } 6073 return (retval); 6074 } 6075 6076 /* 6077 * Frees an SRQ resource. 6078 */ 6079 /* ARGSUSED */ 6080 static int 6081 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6082 cred_t *cred, int *rvalp) 6083 { 6084 daplka_srq_resource_t *srq_rp = NULL; 6085 dapl_srq_free_t args; 6086 int retval; 6087 6088 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode); 6089 if (retval != 0) { 6090 DERR("srq_free: copyin error %d\n", retval); 6091 return (EFAULT); 6092 } 6093 6094 retval = daplka_hash_remove(&ia_rp->ia_srq_htbl, 6095 args.srqf_hkey, (void **)&srq_rp); 6096 if (retval != 0 || srq_rp == NULL) { 6097 /* 6098 * this is only possible if we have two threads 6099 * calling ep_free in parallel. 6100 */ 6101 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n", 6102 retval, args.srqf_hkey); 6103 return (EINVAL); 6104 } 6105 6106 /* UNREF calls the actual free function when refcnt is zero */ 6107 DAPLKA_RS_UNREF(srq_rp); 6108 return (0); 6109 } 6110 6111 /* 6112 * destroys a SRQ resource. 6113 * called when refcnt drops to zero. 6114 */ 6115 static int 6116 daplka_srq_destroy(daplka_resource_t *gen_rp) 6117 { 6118 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)gen_rp; 6119 ibt_status_t status; 6120 6121 ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0); 6122 6123 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n", 6124 srq_rp, DAPLKA_RS_RNUM(srq_rp)); 6125 /* 6126 * destroy the srq 6127 */ 6128 if (srq_rp->srq_hdl != NULL) { 6129 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl); 6130 if (status != IBT_SUCCESS) { 6131 DERR("srq_destroy: ibt_free_srq returned %d\n", 6132 status); 6133 } 6134 srq_rp->srq_hdl = NULL; 6135 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp)); 6136 } 6137 /* 6138 * release all references 6139 */ 6140 if (srq_rp->srq_pd_res != NULL) { 6141 DAPLKA_RS_UNREF(srq_rp->srq_pd_res); 6142 srq_rp->srq_pd_res = NULL; 6143 } 6144 6145 mutex_destroy(&srq_rp->srq_lock); 6146 DAPLKA_RS_FINI(srq_rp); 6147 kmem_free(srq_rp, sizeof (daplka_srq_resource_t)); 6148 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp); 6149 return (0); 6150 } 6151 6152 static void 6153 daplka_hash_srq_free(void *obj) 6154 { 6155 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj; 6156 6157 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ); 6158 DAPLKA_RS_UNREF(srq_rp); 6159 } 6160 6161 /* 6162 * This function tells the CM to start listening on a service id. 6163 * It must be called by the passive side client before the client 6164 * can receive connection requests from remote endpoints. If the 6165 * client specifies a non-zero service id (connection qualifier in 6166 * dapl terms), this function will attempt to bind to this service 6167 * id and return an error if the id is already in use. If the client 6168 * specifies zero as the service id, this function will try to find 6169 * the next available service id and return it back to the client. 6170 * To support the cr_handoff function, this function will, in addition 6171 * to creating and inserting an SP resource into the per-IA SP hash 6172 * table, insert the SP resource into a global SP table. This table 6173 * maintains all active service points created by all dapl clients. 6174 * CR handoff locates the target SP by iterating through this global 6175 * table. 6176 */ 6177 /* ARGSUSED */ 6178 static int 6179 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6180 cred_t *cred, int *rvalp) 6181 { 6182 daplka_evd_resource_t *evd_rp = NULL; 6183 daplka_sp_resource_t *sp_rp = NULL; 6184 dapl_service_register_t args; 6185 ibt_srv_desc_t sd_args; 6186 ibt_srv_bind_t sb_args; 6187 ibt_status_t status; 6188 ib_svc_id_t retsid = 0; 6189 uint64_t sp_hkey = 0; 6190 boolean_t bumped = B_FALSE; 6191 int backlog_size; 6192 int retval = 0; 6193 6194 retval = ddi_copyin((void *)arg, &args, 6195 sizeof (dapl_service_register_t), mode); 6196 if (retval != 0) { 6197 DERR("service_register: copyin error %d\n", retval); 6198 return (EINVAL); 6199 } 6200 6201 sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags); 6202 if (sp_rp == NULL) { 6203 DERR("service_register: cannot allocate sp resource\n"); 6204 return (ENOMEM); 6205 } 6206 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp)) 6207 DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP, 6208 DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy); 6209 6210 /* check if evd exists */ 6211 evd_rp = (daplka_evd_resource_t *) 6212 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey); 6213 if (evd_rp == NULL) { 6214 DERR("service_register: evd resource not found\n"); 6215 retval = EINVAL; 6216 goto cleanup; 6217 } 6218 /* 6219 * initialize backlog size 6220 */ 6221 if (evd_rp && evd_rp->evd_cq_real_size > 0) { 6222 backlog_size = evd_rp->evd_cq_real_size + 1; 6223 } else { 6224 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG; 6225 } 6226 D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid); 6227 6228 /* save the userland sp ptr */ 6229 sp_rp->sp_cookie = args.sr_sp_cookie; 6230 sp_rp->sp_backlog_size = backlog_size; 6231 D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size); 6232 sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size * 6233 sizeof (daplka_sp_conn_pend_t), daplka_km_flags); 6234 6235 /* save evd resource pointer */ 6236 sp_rp->sp_evd_res = evd_rp; 6237 6238 /* 6239 * save ruid here so that we can do a comparison later 6240 * when someone does cr_handoff. the check will prevent 6241 * a malicious app from passing a CR to us. 6242 */ 6243 sp_rp->sp_ruid = crgetruid(cred); 6244 6245 /* fill in args for register_service */ 6246 sd_args.sd_ud_handler = NULL; 6247 sd_args.sd_handler = daplka_cm_service_handler; 6248 sd_args.sd_flags = IBT_SRV_NO_FLAGS; 6249 6250 status = ibt_register_service(daplka_dev->daplka_clnt_hdl, 6251 &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid); 6252 6253 if (status != IBT_SUCCESS) { 6254 DERR("service_register: ibt_register_service returned %d\n", 6255 status); 6256 *rvalp = (int)status; 6257 retval = 0; 6258 goto cleanup; 6259 } 6260 /* save returned sid */ 6261 sp_rp->sp_conn_qual = retsid; 6262 args.sr_retsid = retsid; 6263 6264 /* fill in args for bind_service */ 6265 sb_args.sb_pkey = ia_rp->ia_port_pkey; 6266 sb_args.sb_lease = 0xffffffff; 6267 sb_args.sb_key[0] = 0x1234; 6268 sb_args.sb_key[1] = 0x5678; 6269 sb_args.sb_name = DAPLKA_DRV_NAME; 6270 6271 D2("service_register: bind(0x%llx:0x%llx)\n", 6272 (longlong_t)ia_rp->ia_hca_sgid.gid_prefix, 6273 (longlong_t)ia_rp->ia_hca_sgid.gid_guid); 6274 6275 status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid, 6276 &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl); 6277 if (status != IBT_SUCCESS) { 6278 DERR("service_register: ibt_bind_service returned %d\n", 6279 status); 6280 *rvalp = (int)status; 6281 retval = 0; 6282 goto cleanup; 6283 } 6284 6285 /* 6286 * need to bump refcnt because the global hash table will 6287 * have a reference to sp_rp 6288 */ 6289 DAPLKA_RS_REF(sp_rp); 6290 bumped = B_TRUE; 6291 6292 /* insert into global sp hash table */ 6293 sp_rp->sp_global_hkey = 0; 6294 retval = daplka_hash_insert(&daplka_global_sp_htbl, 6295 &sp_rp->sp_global_hkey, (void *)sp_rp); 6296 if (retval != 0) { 6297 DERR("service_register: cannot insert sp resource\n"); 6298 goto cleanup; 6299 } 6300 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp)) 6301 6302 /* insert into per-IA sp hash table */ 6303 retval = daplka_hash_insert(&ia_rp->ia_sp_htbl, 6304 &sp_hkey, (void *)sp_rp); 6305 if (retval != 0) { 6306 DERR("service_register: cannot insert sp resource\n"); 6307 goto cleanup; 6308 } 6309 6310 /* pass index to application */ 6311 args.sr_sp_hkey = sp_hkey; 6312 retval = ddi_copyout(&args, (void *)arg, 6313 sizeof (dapl_service_register_t), mode); 6314 if (retval != 0) { 6315 DERR("service_register: copyout error %d\n", retval); 6316 retval = EFAULT; 6317 goto cleanup; 6318 } 6319 return (0); 6320 6321 cleanup:; 6322 ASSERT(sp_rp != NULL); 6323 /* remove from ia table */ 6324 if (sp_hkey != 0) { 6325 daplka_sp_resource_t *free_rp = NULL; 6326 6327 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl, 6328 sp_hkey, (void **)&free_rp); 6329 if (free_rp != sp_rp) { 6330 DERR("service_register: cannot remove sp\n"); 6331 /* 6332 * we can only get here if another thread 6333 * has completed the cleanup in svc_deregister 6334 */ 6335 return (retval); 6336 } 6337 } 6338 6339 /* remove from global table */ 6340 if (sp_rp->sp_global_hkey != 0) { 6341 daplka_sp_resource_t *free_rp = NULL; 6342 6343 /* 6344 * we get here if either the hash_insert into 6345 * ia_sp_htbl failed or the ddi_copyout failed. 6346 * hash_insert failure implies that we are the 6347 * only thread with a reference to sp. ddi_copyout 6348 * failure implies that svc_deregister could have 6349 * picked up the sp and destroyed it. but since 6350 * we got to this point, we must have removed 6351 * the sp ourselves in hash_remove above and 6352 * that the sp can be destroyed by us. 6353 */ 6354 (void) daplka_hash_remove(&daplka_global_sp_htbl, 6355 sp_rp->sp_global_hkey, (void **)&free_rp); 6356 if (free_rp != sp_rp) { 6357 DERR("service_register: cannot remove sp\n"); 6358 /* 6359 * this case is impossible. see explanation above. 6360 */ 6361 ASSERT(B_FALSE); 6362 return (retval); 6363 } 6364 sp_rp->sp_global_hkey = 0; 6365 } 6366 /* unreference sp */ 6367 if (bumped) { 6368 DAPLKA_RS_UNREF(sp_rp); 6369 } 6370 6371 /* destroy sp resource */ 6372 DAPLKA_RS_UNREF(sp_rp); 6373 return (retval); 6374 } 6375 6376 /* 6377 * deregisters the service and removes SP from the global table. 6378 */ 6379 /* ARGSUSED */ 6380 static int 6381 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6382 cred_t *cred, int *rvalp) 6383 { 6384 dapl_service_deregister_t args; 6385 daplka_sp_resource_t *sp_rp = NULL, *g_sp_rp = NULL; 6386 int retval; 6387 6388 retval = ddi_copyin((void *)arg, &args, 6389 sizeof (dapl_service_deregister_t), mode); 6390 6391 if (retval != 0) { 6392 DERR("service_deregister: copyin error %d\n", retval); 6393 return (EINVAL); 6394 } 6395 6396 retval = daplka_hash_remove(&ia_rp->ia_sp_htbl, 6397 args.sdr_sp_hkey, (void **)&sp_rp); 6398 if (retval != 0 || sp_rp == NULL) { 6399 DERR("service_deregister: cannot find sp resource\n"); 6400 return (EINVAL); 6401 } 6402 6403 retval = daplka_hash_remove(&daplka_global_sp_htbl, 6404 sp_rp->sp_global_hkey, (void **)&g_sp_rp); 6405 if (retval != 0 || g_sp_rp == NULL) { 6406 DERR("service_deregister: cannot find sp resource\n"); 6407 } 6408 6409 /* remove the global reference */ 6410 if (g_sp_rp == sp_rp) { 6411 DAPLKA_RS_UNREF(g_sp_rp); 6412 } 6413 6414 DAPLKA_RS_UNREF(sp_rp); 6415 return (0); 6416 } 6417 6418 /* 6419 * destroys a service point. 6420 * called when the refcnt drops to zero. 6421 */ 6422 static int 6423 daplka_sp_destroy(daplka_resource_t *gen_rp) 6424 { 6425 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp; 6426 ibt_status_t status; 6427 6428 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp)) 6429 ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0); 6430 D3("sp_destroy: entering, sp_rp %p, rnum %d\n", 6431 sp_rp, DAPLKA_RS_RNUM(sp_rp)); 6432 6433 /* 6434 * it is possible for pending connections to remain 6435 * on an SP. We need to clean them up here. 6436 */ 6437 if (sp_rp->sp_backlog != NULL) { 6438 ibt_cm_proceed_reply_t proc_reply; 6439 int i, cnt = 0; 6440 void *spcp_sidp; 6441 6442 for (i = 0; i < sp_rp->sp_backlog_size; i++) { 6443 if (sp_rp->sp_backlog[i].spcp_state == 6444 DAPLKA_SPCP_PENDING) { 6445 cnt++; 6446 if (sp_rp->sp_backlog[i].spcp_sid == NULL) { 6447 DERR("sp_destroy: " 6448 "spcp_sid == NULL!\n"); 6449 continue; 6450 } 6451 mutex_enter(&sp_rp->sp_lock); 6452 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid; 6453 sp_rp->sp_backlog[i].spcp_state = 6454 DAPLKA_SPCP_INIT; 6455 sp_rp->sp_backlog[i].spcp_sid = NULL; 6456 sp_rp->sp_backlog[i].spcp_req_len = 0; 6457 mutex_exit(&sp_rp->sp_lock); 6458 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, 6459 spcp_sidp, 6460 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0); 6461 if (status != IBT_SUCCESS) { 6462 DERR("sp_destroy: proceed failed %d\n", 6463 status); 6464 } 6465 } 6466 } 6467 if (cnt > 0) { 6468 DERR("sp_destroy: found %d pending " 6469 "connections\n", cnt); 6470 } 6471 } 6472 6473 if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) { 6474 status = ibt_unbind_service(sp_rp->sp_srv_hdl, 6475 sp_rp->sp_bind_hdl); 6476 if (status != IBT_SUCCESS) { 6477 DERR("sp_destroy: ibt_unbind_service " 6478 "failed: %d\n", status); 6479 } 6480 } 6481 6482 if (sp_rp->sp_srv_hdl != NULL) { 6483 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl, 6484 sp_rp->sp_srv_hdl); 6485 if (status != IBT_SUCCESS) { 6486 DERR("sp_destroy: ibt_deregister_service " 6487 "failed: %d\n", status); 6488 } 6489 } 6490 if (sp_rp->sp_backlog != NULL) { 6491 kmem_free(sp_rp->sp_backlog, 6492 sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t)); 6493 sp_rp->sp_backlog = NULL; 6494 sp_rp->sp_backlog_size = 0; 6495 } 6496 6497 /* 6498 * release reference to evd 6499 */ 6500 if (sp_rp->sp_evd_res != NULL) { 6501 DAPLKA_RS_UNREF(sp_rp->sp_evd_res); 6502 } 6503 sp_rp->sp_bind_hdl = NULL; 6504 sp_rp->sp_srv_hdl = NULL; 6505 DAPLKA_RS_FINI(sp_rp); 6506 kmem_free(sp_rp, sizeof (*sp_rp)); 6507 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp); 6508 return (0); 6509 } 6510 6511 /* 6512 * this function is called by daplka_hash_destroy for 6513 * freeing SP resource objects 6514 */ 6515 static void 6516 daplka_hash_sp_free(void *obj) 6517 { 6518 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj; 6519 daplka_sp_resource_t *g_sp_rp; 6520 int retval; 6521 6522 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 6523 6524 retval = daplka_hash_remove(&daplka_global_sp_htbl, 6525 sp_rp->sp_global_hkey, (void **)&g_sp_rp); 6526 if (retval != 0 || g_sp_rp == NULL) { 6527 DERR("sp_free: cannot find sp resource\n"); 6528 } 6529 if (g_sp_rp == sp_rp) { 6530 DAPLKA_RS_UNREF(g_sp_rp); 6531 } 6532 6533 DAPLKA_RS_UNREF(sp_rp); 6534 } 6535 6536 static void 6537 daplka_hash_sp_unref(void *obj) 6538 { 6539 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj; 6540 6541 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 6542 DAPLKA_RS_UNREF(sp_rp); 6543 } 6544 6545 /* 6546 * Passive side CM handlers 6547 */ 6548 6549 /* 6550 * processes the REQ_RCV event 6551 */ 6552 /* ARGSUSED */ 6553 static ibt_cm_status_t 6554 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event, 6555 ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len) 6556 { 6557 daplka_sp_conn_pend_t *conn = NULL; 6558 daplka_evd_event_t *cr_ev = NULL; 6559 ibt_cm_status_t cm_status = IBT_CM_DEFAULT; 6560 uint16_t bkl_index; 6561 ibt_status_t status; 6562 6563 /* 6564 * acquire a slot in the connection backlog of this service point 6565 */ 6566 mutex_enter(&spp->sp_lock); 6567 for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) { 6568 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) { 6569 conn = &spp->sp_backlog[bkl_index]; 6570 ASSERT(conn->spcp_sid == NULL); 6571 conn->spcp_state = DAPLKA_SPCP_PENDING; 6572 conn->spcp_sid = event->cm_session_id; 6573 break; 6574 } 6575 } 6576 mutex_exit(&spp->sp_lock); 6577 6578 /* 6579 * too many pending connections 6580 */ 6581 if (bkl_index == spp->sp_backlog_size) { 6582 DERR("service_req: connection pending exceeded %d limit\n", 6583 spp->sp_backlog_size); 6584 return (IBT_CM_NO_RESOURCE); 6585 } 6586 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn)) 6587 6588 /* 6589 * save data for cr_handoff 6590 */ 6591 if (pr_data != NULL && pr_len > 0) { 6592 int trunc_len = pr_len; 6593 6594 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) { 6595 DERR("service_req: private data truncated\n"); 6596 trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE; 6597 } 6598 conn->spcp_req_len = trunc_len; 6599 bcopy(pr_data, conn->spcp_req_data, trunc_len); 6600 } else { 6601 conn->spcp_req_len = 0; 6602 } 6603 conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in; 6604 conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out; 6605 6606 /* 6607 * create a CR event 6608 */ 6609 cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6610 if (cr_ev == NULL) { 6611 DERR("service_req: could not alloc cr_ev\n"); 6612 cm_status = IBT_CM_NO_RESOURCE; 6613 goto cleanup; 6614 } 6615 6616 cr_ev->ee_next = NULL; 6617 cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie; 6618 cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6619 cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index); 6620 /* 6621 * save the requestor gid 6622 * daplka_event_poll needs this if this is a third party REQ_RCV 6623 */ 6624 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix = 6625 event->cm_event.req.req_prim_addr.av_dgid.gid_prefix; 6626 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid = 6627 event->cm_event.req.req_prim_addr.av_dgid.gid_guid; 6628 6629 /* 6630 * set event type 6631 */ 6632 if (pr_len == 0) { 6633 cr_ev->ee_cmev.ec_cm_ev_type = 6634 DAPL_IB_CME_CONNECTION_REQUEST_PENDING; 6635 } else { 6636 cr_ev->ee_cmev.ec_cm_ev_priv_data = 6637 kmem_zalloc(pr_len, KM_NOSLEEP); 6638 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 6639 DERR("service_req: could not alloc priv\n"); 6640 cm_status = IBT_CM_NO_RESOURCE; 6641 goto cleanup; 6642 } 6643 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6644 cr_ev->ee_cmev.ec_cm_ev_type = 6645 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA; 6646 } 6647 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 6648 6649 /* 6650 * tell the active side to expect the processing time to be 6651 * at most equal to daplka_cm_delay 6652 */ 6653 status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id, 6654 daplka_cm_delay, NULL, 0); 6655 if (status != IBT_SUCCESS) { 6656 DERR("service_req: ibt_cm_delay failed %d\n", status); 6657 cm_status = IBT_CM_NO_RESOURCE; 6658 goto cleanup; 6659 } 6660 6661 /* 6662 * enqueue cr_ev onto the cr_events list of the EVD 6663 * corresponding to the SP 6664 */ 6665 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) " 6666 "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res, 6667 cr_ev->ee_cmev.ec_cm_ev_priv_data, 6668 (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len, 6669 (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie); 6670 6671 daplka_evd_wakeup(spp->sp_evd_res, 6672 &spp->sp_evd_res->evd_cr_events, cr_ev); 6673 6674 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn)) 6675 return (IBT_CM_DEFER); 6676 6677 cleanup:; 6678 /* 6679 * free the cr event 6680 */ 6681 if (cr_ev != NULL) { 6682 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) { 6683 kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6684 cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6685 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6686 } 6687 kmem_free(cr_ev, sizeof (daplka_evd_event_t)); 6688 } 6689 /* 6690 * release our slot in the backlog array 6691 */ 6692 if (conn != NULL) { 6693 mutex_enter(&spp->sp_lock); 6694 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING); 6695 ASSERT(conn->spcp_sid == event->cm_session_id); 6696 conn->spcp_state = DAPLKA_SPCP_INIT; 6697 conn->spcp_req_len = 0; 6698 conn->spcp_sid = NULL; 6699 mutex_exit(&spp->sp_lock); 6700 } 6701 return (cm_status); 6702 } 6703 6704 /* 6705 * processes the CONN_CLOSED event 6706 */ 6707 /* ARGSUSED */ 6708 static ibt_cm_status_t 6709 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp, 6710 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, 6711 void *priv_data, ibt_priv_data_len_t len) 6712 { 6713 daplka_ep_resource_t *ep_rp; 6714 daplka_evd_event_t *disc_ev; 6715 uint32_t old_state, new_state; 6716 6717 ep_rp = (daplka_ep_resource_t *) 6718 ibt_get_chan_private(event->cm_channel); 6719 if (ep_rp == NULL) { 6720 DERR("service_conn_closed: ep_rp == NULL\n"); 6721 return (IBT_CM_ACCEPT); 6722 } 6723 6724 /* 6725 * verify that the ep_state is either CONNECTED or 6726 * DISCONNECTING. if it is not in either states return 6727 * without generating an event. 6728 */ 6729 new_state = old_state = daplka_ep_get_state(ep_rp); 6730 if (old_state != DAPLKA_EP_STATE_CONNECTED && 6731 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 6732 /* 6733 * we can get here if the connection is being aborted 6734 */ 6735 D2("service_conn_closed: conn aborted, state = %d, " 6736 "closed = %d\n", old_state, (int)event->cm_event.closed); 6737 daplka_ep_set_state(ep_rp, old_state, new_state); 6738 return (IBT_CM_ACCEPT); 6739 } 6740 6741 /* 6742 * create a DAPL_IB_CME_DISCONNECTED event 6743 */ 6744 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6745 if (disc_ev == NULL) { 6746 DERR("service_conn_closed: cannot alloc disc_ev\n"); 6747 daplka_ep_set_state(ep_rp, old_state, new_state); 6748 return (IBT_CM_ACCEPT); 6749 } 6750 6751 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED; 6752 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6753 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6754 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6755 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6756 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6757 6758 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n", 6759 disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie); 6760 6761 /* 6762 * transition ep_state to DISCONNECTED 6763 */ 6764 new_state = DAPLKA_EP_STATE_DISCONNECTED; 6765 daplka_ep_set_state(ep_rp, old_state, new_state); 6766 6767 /* 6768 * enqueue event onto the conn_evd owned by ep_rp 6769 */ 6770 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6771 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 6772 6773 return (IBT_CM_ACCEPT); 6774 } 6775 6776 /* 6777 * processes the CONN_EST event 6778 */ 6779 /* ARGSUSED */ 6780 static ibt_cm_status_t 6781 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event, 6782 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 6783 { 6784 daplka_ep_resource_t *ep_rp; 6785 daplka_evd_event_t *conn_ev; 6786 void *pr_data = event->cm_priv_data; 6787 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 6788 uint32_t old_state, new_state; 6789 6790 ep_rp = (daplka_ep_resource_t *) 6791 ibt_get_chan_private(event->cm_channel); 6792 if (ep_rp == NULL) { 6793 DERR("service_conn_est: ep_rp == NULL\n"); 6794 return (IBT_CM_ACCEPT); 6795 } 6796 6797 /* 6798 * verify that ep_state is ACCEPTING. if it is not in this 6799 * state, return without generating an event. 6800 */ 6801 new_state = old_state = daplka_ep_get_state(ep_rp); 6802 if (old_state != DAPLKA_EP_STATE_ACCEPTING) { 6803 /* 6804 * we can get here if the connection is being aborted 6805 */ 6806 DERR("service_conn_est: conn aborted, state = %d\n", 6807 old_state); 6808 daplka_ep_set_state(ep_rp, old_state, new_state); 6809 return (IBT_CM_ACCEPT); 6810 } 6811 6812 /* 6813 * create a DAPL_IB_CME_CONNECTED event 6814 */ 6815 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6816 if (conn_ev == NULL) { 6817 DERR("service_conn_est: conn_ev alloc failed\n"); 6818 daplka_ep_set_state(ep_rp, old_state, new_state); 6819 return (IBT_CM_ACCEPT); 6820 } 6821 6822 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED; 6823 conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6824 conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6825 conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6826 6827 /* 6828 * copy private data into event 6829 */ 6830 if (pr_len > 0) { 6831 conn_ev->ee_cmev.ec_cm_ev_priv_data = 6832 kmem_zalloc(pr_len, KM_NOSLEEP); 6833 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 6834 DERR("service_conn_est: pr_data alloc failed\n"); 6835 daplka_ep_set_state(ep_rp, old_state, new_state); 6836 kmem_free(conn_ev, sizeof (daplka_evd_event_t)); 6837 return (IBT_CM_ACCEPT); 6838 } 6839 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6840 } 6841 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 6842 6843 D2("service_conn_est: enqueue event(%p) evdp(%p)\n", 6844 conn_ev, ep_rp->ep_conn_evd); 6845 6846 /* 6847 * transition ep_state to CONNECTED 6848 */ 6849 new_state = DAPLKA_EP_STATE_CONNECTED; 6850 daplka_ep_set_state(ep_rp, old_state, new_state); 6851 6852 /* 6853 * enqueue event onto the conn_evd owned by ep_rp 6854 */ 6855 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6856 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev); 6857 6858 return (IBT_CM_ACCEPT); 6859 } 6860 6861 /* 6862 * processes the FAILURE event 6863 */ 6864 /* ARGSUSED */ 6865 static ibt_cm_status_t 6866 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp, 6867 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data, 6868 ibt_priv_data_len_t len) 6869 { 6870 daplka_evd_event_t *disc_ev; 6871 daplka_ep_resource_t *ep_rp; 6872 uint32_t old_state, new_state; 6873 ibt_rc_chan_query_attr_t chan_attrs; 6874 ibt_status_t status; 6875 6876 /* 6877 * check that we still have a valid cm_channel before continuing 6878 */ 6879 if (event->cm_channel == NULL) { 6880 DERR("serice_event_failure: event->cm_channel == NULL\n"); 6881 return (IBT_CM_ACCEPT); 6882 } 6883 ep_rp = (daplka_ep_resource_t *) 6884 ibt_get_chan_private(event->cm_channel); 6885 if (ep_rp == NULL) { 6886 DERR("service_event_failure: ep_rp == NULL\n"); 6887 return (IBT_CM_ACCEPT); 6888 } 6889 6890 /* 6891 * verify that ep_state is ACCEPTING or DISCONNECTING. if it 6892 * is not in either state, return without generating an event. 6893 */ 6894 new_state = old_state = daplka_ep_get_state(ep_rp); 6895 if (old_state != DAPLKA_EP_STATE_ACCEPTING && 6896 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 6897 /* 6898 * we can get here if the connection is being aborted 6899 */ 6900 DERR("service_event_failure: conn aborted, state = %d, " 6901 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state, 6902 (int)event->cm_event.failed.cf_code, 6903 (int)event->cm_event.failed.cf_msg, 6904 (int)event->cm_event.failed.cf_reason); 6905 6906 daplka_ep_set_state(ep_rp, old_state, new_state); 6907 return (IBT_CM_ACCEPT); 6908 } 6909 6910 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 6911 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 6912 6913 if ((status == IBT_SUCCESS) && 6914 (chan_attrs.rc_state != IBT_STATE_ERROR)) { 6915 DERR("service_event_failure: conn abort qpn %d state %d\n", 6916 chan_attrs.rc_qpn, chan_attrs.rc_state); 6917 6918 /* explicit transition the QP to ERROR state */ 6919 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 6920 } 6921 6922 /* 6923 * create an event 6924 */ 6925 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6926 if (disc_ev == NULL) { 6927 DERR("service_event_failure: cannot alloc disc_ev\n"); 6928 daplka_ep_set_state(ep_rp, old_state, new_state); 6929 return (IBT_CM_ACCEPT); 6930 } 6931 6932 /* 6933 * fill in the appropriate event type 6934 */ 6935 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) { 6936 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 6937 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) { 6938 switch (event->cm_event.failed.cf_reason) { 6939 case IBT_CM_INVALID_CID: 6940 disc_ev->ee_cmev.ec_cm_ev_type = 6941 DAPL_IB_CME_DESTINATION_REJECT; 6942 break; 6943 default: 6944 disc_ev->ee_cmev.ec_cm_ev_type = 6945 DAPL_IB_CME_LOCAL_FAILURE; 6946 break; 6947 } 6948 } else { 6949 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE; 6950 } 6951 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6952 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6953 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6954 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6955 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6956 6957 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) " 6958 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev, 6959 ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code, 6960 (int)event->cm_event.failed.cf_msg, 6961 (int)event->cm_event.failed.cf_reason, 6962 (longlong_t)ep_rp->ep_psep_cookie); 6963 6964 /* 6965 * transition ep_state to DISCONNECTED 6966 */ 6967 new_state = DAPLKA_EP_STATE_DISCONNECTED; 6968 daplka_ep_set_state(ep_rp, old_state, new_state); 6969 6970 /* 6971 * enqueue event onto the conn_evd owned by ep_rp 6972 */ 6973 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6974 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 6975 6976 return (IBT_CM_ACCEPT); 6977 } 6978 6979 /* 6980 * this is the passive side CM handler. it gets registered 6981 * when an SP resource is created in daplka_service_register. 6982 */ 6983 static ibt_cm_status_t 6984 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event, 6985 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 6986 { 6987 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)cm_private; 6988 6989 if (sp_rp == NULL) { 6990 DERR("service_handler: sp_rp == NULL\n"); 6991 return (IBT_CM_NO_RESOURCE); 6992 } 6993 /* 6994 * default is not to return priv data 6995 */ 6996 if (ret_args != NULL) { 6997 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args)) 6998 ret_args->cm_ret_len = 0; 6999 } 7000 7001 switch (event->cm_type) { 7002 case IBT_CM_EVENT_REQ_RCV: 7003 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n"); 7004 return (daplka_cm_service_req(sp_rp, event, ret_args, 7005 event->cm_priv_data, event->cm_priv_data_len)); 7006 7007 case IBT_CM_EVENT_REP_RCV: 7008 /* passive side should not receive this event */ 7009 D2("service_handler: IBT_CM_EVENT_REP_RCV\n"); 7010 return (IBT_CM_DEFAULT); 7011 7012 case IBT_CM_EVENT_CONN_CLOSED: 7013 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n", 7014 event->cm_event.closed); 7015 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args, 7016 priv_data, len)); 7017 7018 case IBT_CM_EVENT_MRA_RCV: 7019 /* passive side does default processing MRA event */ 7020 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n"); 7021 return (IBT_CM_DEFAULT); 7022 7023 case IBT_CM_EVENT_CONN_EST: 7024 D2("service_handler: IBT_CM_EVENT_CONN_EST\n"); 7025 return (daplka_cm_service_conn_est(sp_rp, event, ret_args, 7026 priv_data, len)); 7027 7028 case IBT_CM_EVENT_FAILURE: 7029 D2("service_handler: IBT_CM_EVENT_FAILURE\n"); 7030 return (daplka_cm_service_event_failure(sp_rp, event, ret_args, 7031 priv_data, len)); 7032 case IBT_CM_EVENT_LAP_RCV: 7033 /* active side had initiated a path migration operation */ 7034 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n"); 7035 return (IBT_CM_ACCEPT); 7036 default: 7037 DERR("service_handler: invalid event %d\n", event->cm_type); 7038 break; 7039 } 7040 return (IBT_CM_DEFAULT); 7041 } 7042 7043 /* 7044 * Active side CM handlers 7045 */ 7046 7047 /* 7048 * Processes the REP_RCV event. When the passive side accepts the 7049 * connection, this handler is called. We make a copy of the private 7050 * data into the ep so that it can be passed back to userland in when 7051 * the CONN_EST event occurs. 7052 */ 7053 /* ARGSUSED */ 7054 static ibt_cm_status_t 7055 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7056 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7057 { 7058 void *pr_data = event->cm_priv_data; 7059 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 7060 uint32_t old_state, new_state; 7061 7062 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data, 7063 (int)pr_len); 7064 7065 ASSERT(ep_rp != NULL); 7066 new_state = old_state = daplka_ep_get_state(ep_rp); 7067 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 7068 /* 7069 * we can get here if the connection is being aborted 7070 */ 7071 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state); 7072 daplka_ep_set_state(ep_rp, old_state, new_state); 7073 return (IBT_CM_NO_CHANNEL); 7074 } 7075 7076 /* 7077 * we do not cancel the timer here because the connection 7078 * handshake is still in progress. 7079 */ 7080 7081 /* 7082 * save the private data. it will be passed up when 7083 * the connection is established. 7084 */ 7085 if (pr_len > 0) { 7086 ep_rp->ep_priv_len = pr_len; 7087 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len); 7088 } 7089 7090 /* 7091 * we do not actually transition to a different state. 7092 * the state will change when we get a conn_est, failure, 7093 * closed, or timeout event. 7094 */ 7095 daplka_ep_set_state(ep_rp, old_state, new_state); 7096 return (IBT_CM_ACCEPT); 7097 } 7098 7099 /* 7100 * Processes the CONN_CLOSED event. This gets called when either 7101 * the active or passive side closes the rc channel. 7102 */ 7103 /* ARGSUSED */ 7104 static ibt_cm_status_t 7105 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7106 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7107 { 7108 daplka_evd_event_t *disc_ev; 7109 uint32_t old_state, new_state; 7110 7111 ASSERT(ep_rp != NULL); 7112 old_state = new_state = daplka_ep_get_state(ep_rp); 7113 if (old_state != DAPLKA_EP_STATE_CONNECTED && 7114 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 7115 /* 7116 * we can get here if the connection is being aborted 7117 */ 7118 D2("rc_conn_closed: conn aborted, state = %d, " 7119 "closed = %d\n", old_state, (int)event->cm_event.closed); 7120 daplka_ep_set_state(ep_rp, old_state, new_state); 7121 return (IBT_CM_ACCEPT); 7122 } 7123 7124 /* 7125 * it's ok for the timer to fire at this point. the 7126 * taskq thread that processes the timer will just wait 7127 * until we are done with our state transition. 7128 */ 7129 if (daplka_cancel_timer(ep_rp) != 0) { 7130 /* 7131 * daplka_cancel_timer returns -1 if the timer is 7132 * being processed and 0 for all other cases. 7133 * we need to reset ep_state to allow timer processing 7134 * to continue. 7135 */ 7136 DERR("rc_conn_closed: timer is being processed\n"); 7137 daplka_ep_set_state(ep_rp, old_state, new_state); 7138 return (IBT_CM_ACCEPT); 7139 } 7140 7141 /* 7142 * create a DAPL_IB_CME_DISCONNECTED event 7143 */ 7144 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7145 if (disc_ev == NULL) { 7146 DERR("rc_conn_closed: could not alloc ev\n"); 7147 daplka_ep_set_state(ep_rp, old_state, new_state); 7148 return (IBT_CM_ACCEPT); 7149 } 7150 7151 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED; 7152 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7153 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7154 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 7155 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 7156 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 7157 7158 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n", 7159 disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed); 7160 7161 /* 7162 * transition ep_state to DISCONNECTED 7163 */ 7164 new_state = DAPLKA_EP_STATE_DISCONNECTED; 7165 daplka_ep_set_state(ep_rp, old_state, new_state); 7166 7167 /* 7168 * enqueue event onto the conn_evd owned by ep_rp 7169 */ 7170 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7171 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 7172 7173 return (IBT_CM_ACCEPT); 7174 } 7175 7176 /* 7177 * processes the CONN_EST event 7178 */ 7179 /* ARGSUSED */ 7180 static ibt_cm_status_t 7181 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7182 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7183 { 7184 daplka_evd_event_t *conn_ev; 7185 uint32_t old_state, new_state; 7186 7187 ASSERT(ep_rp != NULL); 7188 old_state = new_state = daplka_ep_get_state(ep_rp); 7189 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 7190 /* 7191 * we can get here if the connection is being aborted 7192 */ 7193 DERR("rc_conn_est: conn aborted, state = %d\n", old_state); 7194 daplka_ep_set_state(ep_rp, old_state, new_state); 7195 return (IBT_CM_ACCEPT); 7196 } 7197 7198 /* 7199 * it's ok for the timer to fire at this point. the 7200 * taskq thread that processes the timer will just wait 7201 * until we are done with our state transition. 7202 */ 7203 if (daplka_cancel_timer(ep_rp) != 0) { 7204 /* 7205 * daplka_cancel_timer returns -1 if the timer is 7206 * being processed and 0 for all other cases. 7207 * we need to reset ep_state to allow timer processing 7208 * to continue. 7209 */ 7210 DERR("rc_conn_est: timer is being processed\n"); 7211 daplka_ep_set_state(ep_rp, old_state, new_state); 7212 return (IBT_CM_ACCEPT); 7213 } 7214 7215 /* 7216 * create a DAPL_IB_CME_CONNECTED event 7217 */ 7218 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7219 if (conn_ev == NULL) { 7220 DERR("rc_conn_est: could not alloc ev\n"); 7221 daplka_ep_set_state(ep_rp, old_state, new_state); 7222 return (IBT_CM_ACCEPT); 7223 } 7224 7225 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED; 7226 conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7227 conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7228 conn_ev->ee_cmev.ec_cm_psep_cookie = 0; 7229 7230 /* 7231 * The private data passed back in the connection established 7232 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and 7233 * saved in ep resource structure. 7234 */ 7235 if (ep_rp->ep_priv_len > 0) { 7236 conn_ev->ee_cmev.ec_cm_ev_priv_data = 7237 kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP); 7238 7239 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 7240 DERR("rc_conn_est: could not alloc pr_data\n"); 7241 kmem_free(conn_ev, sizeof (daplka_evd_event_t)); 7242 daplka_ep_set_state(ep_rp, old_state, new_state); 7243 return (IBT_CM_ACCEPT); 7244 } 7245 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, 7246 ep_rp->ep_priv_len); 7247 } 7248 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len; 7249 7250 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), " 7251 "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd, 7252 conn_ev->ee_cmev.ec_cm_ev_priv_data, 7253 (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len); 7254 7255 /* 7256 * transition ep_state to CONNECTED 7257 */ 7258 new_state = DAPLKA_EP_STATE_CONNECTED; 7259 daplka_ep_set_state(ep_rp, old_state, new_state); 7260 7261 /* 7262 * enqueue event onto the conn_evd owned by ep_rp 7263 */ 7264 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7265 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev); 7266 7267 return (IBT_CM_ACCEPT); 7268 } 7269 7270 /* 7271 * processes the FAILURE event 7272 */ 7273 /* ARGSUSED */ 7274 static ibt_cm_status_t 7275 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7276 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7277 { 7278 daplka_evd_event_t *disc_ev; 7279 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 7280 void *pr_data = event->cm_priv_data; 7281 uint32_t old_state, new_state; 7282 ibt_rc_chan_query_attr_t chan_attrs; 7283 ibt_status_t status; 7284 7285 ASSERT(ep_rp != NULL); 7286 old_state = new_state = daplka_ep_get_state(ep_rp); 7287 if (old_state != DAPLKA_EP_STATE_CONNECTING && 7288 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 7289 /* 7290 * we can get here if the connection is being aborted 7291 */ 7292 DERR("rc_event_failure: conn aborted, state = %d, " 7293 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state, 7294 (int)event->cm_event.failed.cf_code, 7295 (int)event->cm_event.failed.cf_msg, 7296 (int)event->cm_event.failed.cf_reason); 7297 7298 daplka_ep_set_state(ep_rp, old_state, new_state); 7299 return (IBT_CM_ACCEPT); 7300 } 7301 7302 /* 7303 * it's ok for the timer to fire at this point. the 7304 * taskq thread that processes the timer will just wait 7305 * until we are done with our state transition. 7306 */ 7307 if (daplka_cancel_timer(ep_rp) != 0) { 7308 /* 7309 * daplka_cancel_timer returns -1 if the timer is 7310 * being processed and 0 for all other cases. 7311 * we need to reset ep_state to allow timer processing 7312 * to continue. 7313 */ 7314 DERR("rc_event_failure: timer is being processed\n"); 7315 daplka_ep_set_state(ep_rp, old_state, new_state); 7316 return (IBT_CM_ACCEPT); 7317 } 7318 7319 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 7320 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 7321 7322 if ((status == IBT_SUCCESS) && 7323 (chan_attrs.rc_state != IBT_STATE_ERROR)) { 7324 DERR("rc_event_failure: conn abort qpn %d state %d\n", 7325 chan_attrs.rc_qpn, chan_attrs.rc_state); 7326 7327 /* explicit transition the QP to ERROR state */ 7328 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 7329 } 7330 7331 /* 7332 * create an event 7333 */ 7334 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7335 if (disc_ev == NULL) { 7336 DERR("rc_event_failure: cannot alloc disc_ev\n"); 7337 daplka_ep_set_state(ep_rp, old_state, new_state); 7338 return (IBT_CM_ACCEPT); 7339 } 7340 7341 /* 7342 * copy private data into event 7343 */ 7344 if (pr_len > 0) { 7345 disc_ev->ee_cmev.ec_cm_ev_priv_data = 7346 kmem_zalloc(pr_len, KM_NOSLEEP); 7347 7348 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 7349 DERR("rc_event_failure: cannot alloc pr data\n"); 7350 kmem_free(disc_ev, sizeof (daplka_evd_event_t)); 7351 daplka_ep_set_state(ep_rp, old_state, new_state); 7352 return (IBT_CM_ACCEPT); 7353 } 7354 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 7355 } 7356 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 7357 7358 /* 7359 * fill in the appropriate event type 7360 */ 7361 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) { 7362 switch (event->cm_event.failed.cf_reason) { 7363 case IBT_CM_CONSUMER: 7364 disc_ev->ee_cmev.ec_cm_ev_type = 7365 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA; 7366 break; 7367 case IBT_CM_NO_CHAN: 7368 case IBT_CM_NO_RESC: 7369 disc_ev->ee_cmev.ec_cm_ev_type = 7370 DAPL_IB_CME_DESTINATION_REJECT; 7371 break; 7372 default: 7373 disc_ev->ee_cmev.ec_cm_ev_type = 7374 DAPL_IB_CME_DESTINATION_REJECT; 7375 break; 7376 } 7377 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) { 7378 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 7379 } else { 7380 /* others we'll mark as local failure */ 7381 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE; 7382 } 7383 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7384 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7385 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 7386 7387 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) " 7388 "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd, 7389 (int)event->cm_event.failed.cf_code, 7390 (int)event->cm_event.failed.cf_msg, 7391 (int)event->cm_event.failed.cf_reason); 7392 7393 /* 7394 * transition ep_state to DISCONNECTED 7395 */ 7396 new_state = DAPLKA_EP_STATE_DISCONNECTED; 7397 daplka_ep_set_state(ep_rp, old_state, new_state); 7398 7399 /* 7400 * enqueue event onto the conn_evd owned by ep_rp 7401 */ 7402 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7403 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 7404 7405 return (IBT_CM_ACCEPT); 7406 } 7407 7408 /* 7409 * This is the active side CM handler. It gets registered when 7410 * ibt_open_rc_channel is called. 7411 */ 7412 static ibt_cm_status_t 7413 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event, 7414 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7415 { 7416 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private; 7417 7418 if (ep_rp == NULL) { 7419 DERR("rc_handler: ep_rp == NULL\n"); 7420 return (IBT_CM_NO_CHANNEL); 7421 } 7422 /* 7423 * default is not to return priv data 7424 */ 7425 if (ret_args != NULL) { 7426 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args)) 7427 ret_args->cm_ret_len = 0; 7428 } 7429 7430 switch (event->cm_type) { 7431 case IBT_CM_EVENT_REQ_RCV: 7432 /* active side should not receive this event */ 7433 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n"); 7434 break; 7435 7436 case IBT_CM_EVENT_REP_RCV: 7437 /* connection accepted by passive side */ 7438 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n"); 7439 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args, 7440 priv_data, len)); 7441 7442 case IBT_CM_EVENT_CONN_CLOSED: 7443 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n", 7444 event->cm_event.closed); 7445 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args, 7446 priv_data, len)); 7447 7448 case IBT_CM_EVENT_MRA_RCV: 7449 /* passive side does default processing MRA event */ 7450 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n"); 7451 return (IBT_CM_DEFAULT); 7452 7453 case IBT_CM_EVENT_CONN_EST: 7454 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n"); 7455 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args, 7456 priv_data, len)); 7457 7458 case IBT_CM_EVENT_FAILURE: 7459 D2("rc_handler: IBT_CM_EVENT_FAILURE\n"); 7460 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args, 7461 priv_data, len)); 7462 7463 default: 7464 D2("rc_handler: invalid event %d\n", event->cm_type); 7465 break; 7466 } 7467 return (IBT_CM_DEFAULT); 7468 } 7469 7470 /* 7471 * creates an IA resource and inserts it into the global resource table. 7472 */ 7473 /* ARGSUSED */ 7474 static int 7475 daplka_ia_create(minor_t rnum, intptr_t arg, int mode, 7476 cred_t *cred, int *rvalp) 7477 { 7478 daplka_ia_resource_t *ia_rp, *tmp_rp; 7479 boolean_t inserted = B_FALSE; 7480 dapl_ia_create_t args; 7481 ibt_hca_hdl_t hca_hdl; 7482 ibt_status_t status; 7483 ib_gid_t sgid; 7484 int retval; 7485 ibt_hca_portinfo_t *pinfop; 7486 uint_t pinfon; 7487 uint_t size; 7488 ibt_ar_t ar_s; 7489 daplka_hca_t *hca; 7490 7491 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t), 7492 mode); 7493 if (retval != 0) { 7494 DERR("ia_create: copyin error %d\n", retval); 7495 return (EFAULT); 7496 } 7497 if (args.ia_version != DAPL_IF_VERSION) { 7498 DERR("ia_create: invalid version %d, expected version %d\n", 7499 args.ia_version, DAPL_IF_VERSION); 7500 return (EINVAL); 7501 } 7502 7503 /* 7504 * find the hca with the matching guid 7505 */ 7506 mutex_enter(&daplka_dev->daplka_mutex); 7507 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL; 7508 hca = hca->hca_next) { 7509 if (hca->hca_guid == args.ia_guid) { 7510 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); 7511 break; 7512 } 7513 } 7514 mutex_exit(&daplka_dev->daplka_mutex); 7515 7516 if (hca == NULL) { 7517 DERR("ia_create: guid 0x%016llx not found\n", 7518 (longlong_t)args.ia_guid); 7519 return (EINVAL); 7520 } 7521 7522 /* 7523 * check whether port number is valid and whether it is up 7524 */ 7525 if (args.ia_port > hca->hca_nports) { 7526 DERR("ia_create: invalid hca_port %d\n", args.ia_port); 7527 DAPLKA_RELE_HCA(daplka_dev, hca); 7528 return (EINVAL); 7529 } 7530 hca_hdl = hca->hca_hdl; 7531 if (hca_hdl == NULL) { 7532 DERR("ia_create: hca_hdl == NULL\n"); 7533 DAPLKA_RELE_HCA(daplka_dev, hca); 7534 return (EINVAL); 7535 } 7536 status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port, 7537 &pinfop, &pinfon, &size); 7538 if (status != IBT_SUCCESS) { 7539 DERR("ia_create: ibt_query_hca_ports returned %d\n", status); 7540 *rvalp = (int)status; 7541 DAPLKA_RELE_HCA(daplka_dev, hca); 7542 return (0); 7543 } 7544 sgid = pinfop->p_sgid_tbl[0]; 7545 ibt_free_portinfo(pinfop, size); 7546 7547 ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags); 7548 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp)) 7549 DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy); 7550 7551 mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL); 7552 cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL); 7553 ia_rp->ia_hca_hdl = hca_hdl; 7554 ia_rp->ia_hca_sgid = sgid; 7555 ia_rp->ia_hca = hca; 7556 ia_rp->ia_port_num = args.ia_port; 7557 ia_rp->ia_port_pkey = args.ia_pkey; 7558 ia_rp->ia_pid = ddi_get_pid(); 7559 ia_rp->ia_async_evd_hkeys = NULL; 7560 ia_rp->ia_ar_registered = B_FALSE; 7561 bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES); 7562 7563 /* register Address Record */ 7564 ar_s.ar_gid = ia_rp->ia_hca_sgid; 7565 ar_s.ar_pkey = ia_rp->ia_port_pkey; 7566 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES); 7567 #define UC(b) ar_s.ar_data[(b)] 7568 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n", 7569 UC(8), UC(9), UC(10), UC(11)); 7570 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n", 7571 UC(12), UC(13), UC(14), UC(15)); 7572 retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s); 7573 if (retval != IBT_SUCCESS) { 7574 DERR("ia_create: failed to register Address Record.\n"); 7575 retval = EINVAL; 7576 goto cleanup; 7577 } 7578 ia_rp->ia_ar_registered = B_TRUE; 7579 7580 /* 7581 * create hash tables for all object types 7582 */ 7583 retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ, 7584 daplka_hash_ep_free, daplka_hash_generic_lookup); 7585 if (retval != 0) { 7586 DERR("ia_create: cannot create ep hash table\n"); 7587 goto cleanup; 7588 } 7589 retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ, 7590 daplka_hash_mr_free, daplka_hash_generic_lookup); 7591 if (retval != 0) { 7592 DERR("ia_create: cannot create mr hash table\n"); 7593 goto cleanup; 7594 } 7595 retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ, 7596 daplka_hash_mw_free, daplka_hash_generic_lookup); 7597 if (retval != 0) { 7598 DERR("ia_create: cannot create mw hash table\n"); 7599 goto cleanup; 7600 } 7601 retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ, 7602 daplka_hash_pd_free, daplka_hash_generic_lookup); 7603 if (retval != 0) { 7604 DERR("ia_create: cannot create pd hash table\n"); 7605 goto cleanup; 7606 } 7607 retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ, 7608 daplka_hash_evd_free, daplka_hash_generic_lookup); 7609 if (retval != 0) { 7610 DERR("ia_create: cannot create evd hash table\n"); 7611 goto cleanup; 7612 } 7613 retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ, 7614 daplka_hash_cno_free, daplka_hash_generic_lookup); 7615 if (retval != 0) { 7616 DERR("ia_create: cannot create cno hash table\n"); 7617 goto cleanup; 7618 } 7619 retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ, 7620 daplka_hash_sp_free, daplka_hash_generic_lookup); 7621 if (retval != 0) { 7622 DERR("ia_create: cannot create sp hash table\n"); 7623 goto cleanup; 7624 } 7625 retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ, 7626 daplka_hash_srq_free, daplka_hash_generic_lookup); 7627 if (retval != 0) { 7628 DERR("ia_create: cannot create srq hash table\n"); 7629 goto cleanup; 7630 } 7631 /* 7632 * insert ia_rp into the global resource table 7633 */ 7634 retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp); 7635 if (retval != 0) { 7636 DERR("ia_create: cannot insert resource\n"); 7637 goto cleanup; 7638 } 7639 inserted = B_TRUE; 7640 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp)) 7641 7642 args.ia_resnum = rnum; 7643 retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t)); 7644 if (retval != 0) { 7645 DERR("ia_create: copyout error %d\n", retval); 7646 retval = EFAULT; 7647 goto cleanup; 7648 } 7649 return (0); 7650 7651 cleanup:; 7652 if (inserted) { 7653 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum); 7654 if (tmp_rp != ia_rp) { 7655 /* 7656 * we can return here because another thread must 7657 * have freed up the resource 7658 */ 7659 DERR("ia_create: cannot remove resource\n"); 7660 return (retval); 7661 } 7662 } 7663 DAPLKA_RS_UNREF(ia_rp); 7664 return (retval); 7665 } 7666 7667 /* 7668 * destroys an IA resource 7669 */ 7670 static int 7671 daplka_ia_destroy(daplka_resource_t *gen_rp) 7672 { 7673 daplka_ia_resource_t *ia_rp = (daplka_ia_resource_t *)gen_rp; 7674 daplka_async_evd_hkey_t *hkp; 7675 int cnt; 7676 ibt_ar_t ar_s; 7677 7678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp)) 7679 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp); 7680 7681 /* deregister Address Record */ 7682 if (ia_rp->ia_ar_registered) { 7683 ar_s.ar_gid = ia_rp->ia_hca_sgid; 7684 ar_s.ar_pkey = ia_rp->ia_port_pkey; 7685 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES); 7686 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s); 7687 ia_rp->ia_ar_registered = B_FALSE; 7688 } 7689 7690 /* 7691 * destroy hash tables. make sure resources are 7692 * destroyed in the correct order. 7693 */ 7694 daplka_hash_destroy(&ia_rp->ia_mw_htbl); 7695 daplka_hash_destroy(&ia_rp->ia_mr_htbl); 7696 daplka_hash_destroy(&ia_rp->ia_ep_htbl); 7697 daplka_hash_destroy(&ia_rp->ia_srq_htbl); 7698 daplka_hash_destroy(&ia_rp->ia_evd_htbl); 7699 daplka_hash_destroy(&ia_rp->ia_cno_htbl); 7700 daplka_hash_destroy(&ia_rp->ia_pd_htbl); 7701 daplka_hash_destroy(&ia_rp->ia_sp_htbl); 7702 7703 /* 7704 * free the async evd list 7705 */ 7706 cnt = 0; 7707 hkp = ia_rp->ia_async_evd_hkeys; 7708 while (hkp != NULL) { 7709 daplka_async_evd_hkey_t *free_hkp; 7710 7711 cnt++; 7712 free_hkp = hkp; 7713 hkp = hkp->aeh_next; 7714 kmem_free(free_hkp, sizeof (*free_hkp)); 7715 } 7716 if (cnt > 0) { 7717 D3("ia_destroy: freed %d hkeys\n", cnt); 7718 } 7719 mutex_destroy(&ia_rp->ia_lock); 7720 cv_destroy(&ia_rp->ia_cv); 7721 ia_rp->ia_hca_hdl = NULL; 7722 7723 DAPLKA_RS_FINI(ia_rp); 7724 7725 if (ia_rp->ia_hca) 7726 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca); 7727 7728 kmem_free(ia_rp, sizeof (daplka_ia_resource_t)); 7729 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp); 7730 return (0); 7731 } 7732 7733 static void 7734 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event, 7735 uint64_t cookie, daplka_ia_resource_t *ia_rp) 7736 { 7737 daplka_evd_event_t *evp; 7738 daplka_evd_resource_t *async_evd; 7739 daplka_async_evd_hkey_t *curr; 7740 7741 mutex_enter(&ia_rp->ia_lock); 7742 curr = ia_rp->ia_async_evd_hkeys; 7743 while (curr != NULL) { 7744 /* 7745 * Note: this allocation does not zero out the buffer 7746 * since we init all the fields. 7747 */ 7748 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7749 if (evp == NULL) { 7750 DERR("async_event_enqueue: event alloc failed" 7751 "!found\n", ia_rp, curr->aeh_evd_hkey); 7752 curr = curr->aeh_next; 7753 continue; 7754 } 7755 evp->ee_next = NULL; 7756 evp->ee_aev.ibae_type = code; 7757 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid; 7758 evp->ee_aev.ibae_cookie = cookie; 7759 evp->ee_aev.ibae_port = event->ev_port; 7760 7761 /* 7762 * Lookup the async evd corresponding to this ia and enqueue 7763 * evp and wakeup any waiter. 7764 */ 7765 async_evd = (daplka_evd_resource_t *) 7766 daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey); 7767 if (async_evd == NULL) { /* async evd is being freed */ 7768 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx " 7769 "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey); 7770 kmem_free(evp, sizeof (daplka_evd_event_t)); 7771 curr = curr->aeh_next; 7772 continue; 7773 } 7774 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp); 7775 7776 /* decrement refcnt on async_evd */ 7777 DAPLKA_RS_UNREF(async_evd); 7778 curr = curr->aeh_next; 7779 } 7780 mutex_exit(&ia_rp->ia_lock); 7781 } 7782 /* 7783 * This routine is called in kernel context 7784 */ 7785 7786 /* ARGSUSED */ 7787 static void 7788 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7789 ibt_async_code_t code, ibt_async_event_t *event) 7790 { 7791 daplka_ep_resource_t *epp; 7792 daplka_ia_resource_t *ia_rp; 7793 minor_t ia_rnum; 7794 7795 if (event->ev_chan_hdl == NULL) { 7796 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n"); 7797 return; 7798 } 7799 7800 mutex_enter(&daplka_dev->daplka_mutex); 7801 epp = ibt_get_chan_private(event->ev_chan_hdl); 7802 if (epp == NULL) { 7803 mutex_exit(&daplka_dev->daplka_mutex); 7804 DERR("daplka_rc_async_handler: chan_private is NULL\n"); 7805 return; 7806 } 7807 7808 /* grab a reference to this ep */ 7809 DAPLKA_RS_REF(epp); 7810 mutex_exit(&daplka_dev->daplka_mutex); 7811 7812 /* 7813 * The endpoint resource has the resource number corresponding to 7814 * the IA resource. Use that to lookup the ia resource entry 7815 */ 7816 ia_rnum = DAPLKA_RS_RNUM(epp); 7817 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum); 7818 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) { 7819 D2("daplka_rc_async_handler: resource (%d) not found\n", 7820 ia_rnum); 7821 DAPLKA_RS_UNREF(epp); 7822 return; 7823 } 7824 7825 /* 7826 * Create an async event and chain it to the async evd 7827 */ 7828 daplka_async_event_create(code, event, epp->ep_cookie, ia_rp); 7829 7830 DAPLKA_RS_UNREF(ia_rp); 7831 DAPLKA_RS_UNREF(epp); 7832 } 7833 7834 /* 7835 * This routine is called in kernel context 7836 */ 7837 7838 /* ARGSUSED */ 7839 static void 7840 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7841 ibt_async_code_t code, ibt_async_event_t *event) 7842 { 7843 daplka_evd_resource_t *evdp; 7844 daplka_ia_resource_t *ia_rp; 7845 minor_t ia_rnum; 7846 7847 if (event->ev_cq_hdl == NULL) 7848 return; 7849 7850 mutex_enter(&daplka_dev->daplka_mutex); 7851 evdp = ibt_get_cq_private(event->ev_cq_hdl); 7852 if (evdp == NULL) { 7853 mutex_exit(&daplka_dev->daplka_mutex); 7854 DERR("daplka_cq_async_handler: get cq private(%p) failed\n", 7855 event->ev_cq_hdl); 7856 return; 7857 } 7858 /* grab a reference to this evd resource */ 7859 DAPLKA_RS_REF(evdp); 7860 mutex_exit(&daplka_dev->daplka_mutex); 7861 7862 /* 7863 * The endpoint resource has the resource number corresponding to 7864 * the IA resource. Use that to lookup the ia resource entry 7865 */ 7866 ia_rnum = DAPLKA_RS_RNUM(evdp); 7867 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum); 7868 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) { 7869 DERR("daplka_cq_async_handler: resource (%d) not found\n", 7870 ia_rnum); 7871 DAPLKA_RS_UNREF(evdp); 7872 return; 7873 } 7874 7875 /* 7876 * Create an async event and chain it to the async evd 7877 */ 7878 daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp); 7879 7880 /* release all the refcount that were acquired */ 7881 DAPLKA_RS_UNREF(ia_rp); 7882 DAPLKA_RS_UNREF(evdp); 7883 } 7884 7885 /* 7886 * This routine is called in kernel context, handles unaffiliated async errors 7887 */ 7888 7889 /* ARGSUSED */ 7890 static void 7891 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7892 ibt_async_code_t code, ibt_async_event_t *event) 7893 { 7894 int i, j; 7895 daplka_resource_blk_t *blk; 7896 daplka_resource_t *rp; 7897 daplka_ia_resource_t *ia_rp; 7898 7899 /* 7900 * Walk the resource table looking for an ia that matches the 7901 * hca_hdl. 7902 */ 7903 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 7904 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 7905 blk = daplka_resource.daplka_rc_root[i]; 7906 if (blk == NULL) 7907 continue; 7908 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 7909 rp = blk->daplka_rcblk_blks[j]; 7910 if ((rp == NULL) || 7911 ((intptr_t)rp == DAPLKA_RC_RESERVED) || 7912 (rp->rs_type != DAPL_TYPE_IA)) { 7913 continue; 7914 } 7915 /* 7916 * rp is an IA resource check if it belongs 7917 * to the hca/port for which we got the event 7918 */ 7919 ia_rp = (daplka_ia_resource_t *)rp; 7920 DAPLKA_RS_REF(ia_rp); 7921 if ((hca_hdl == ia_rp->ia_hca_hdl) && 7922 (event->ev_port == ia_rp->ia_port_num)) { 7923 /* 7924 * walk the ep hash table. Acquire a 7925 * reader lock. NULL dgid indicates 7926 * local port up event. 7927 */ 7928 daplka_hash_walk(&ia_rp->ia_ep_htbl, 7929 daplka_ep_failback, NULL, RW_READER); 7930 } 7931 DAPLKA_RS_UNREF(ia_rp); 7932 } 7933 } 7934 rw_exit(&daplka_resource.daplka_rct_lock); 7935 } 7936 7937 static int 7938 daplka_handle_hca_detach_event(ibt_async_event_t *event) 7939 { 7940 daplka_hca_t *hca; 7941 7942 /* 7943 * find the hca with the matching guid 7944 */ 7945 mutex_enter(&daplka_dev->daplka_mutex); 7946 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL; 7947 hca = hca->hca_next) { 7948 if (hca->hca_guid == event->ev_hca_guid) { 7949 if (DAPLKA_HCA_BUSY(hca)) { 7950 mutex_exit(&daplka_dev->daplka_mutex); 7951 return (IBT_HCA_RESOURCES_NOT_FREED); 7952 } 7953 daplka_dequeue_hca(daplka_dev, hca); 7954 break; 7955 } 7956 } 7957 mutex_exit(&daplka_dev->daplka_mutex); 7958 7959 if (hca == NULL) 7960 return (IBT_FAILURE); 7961 7962 return (daplka_fini_hca(daplka_dev, hca)); 7963 } 7964 7965 /* 7966 * This routine is called in kernel context 7967 */ 7968 static void 7969 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7970 ibt_async_code_t code, ibt_async_event_t *event) 7971 { 7972 switch (code) { 7973 case IBT_ERROR_CATASTROPHIC_CHAN: 7974 case IBT_ERROR_INVALID_REQUEST_CHAN: 7975 case IBT_ERROR_ACCESS_VIOLATION_CHAN: 7976 case IBT_ERROR_PATH_MIGRATE_REQ: 7977 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code); 7978 /* These events are affiliated with a the RC channel */ 7979 daplka_rc_async_handler(clnt_private, hca_hdl, code, event); 7980 break; 7981 case IBT_ERROR_CQ: 7982 /* This event is affiliated with a the CQ */ 7983 D2("daplka_async_handler(): IBT_ERROR_CQ\n"); 7984 daplka_cq_async_handler(clnt_private, hca_hdl, code, event); 7985 break; 7986 case IBT_ERROR_PORT_DOWN: 7987 D2("daplka_async_handler(): IBT_PORT_DOWN\n"); 7988 break; 7989 case IBT_EVENT_PORT_UP: 7990 D2("daplka_async_handler(): IBT_PORT_UP\n"); 7991 if (daplka_apm) { 7992 daplka_un_async_handler(clnt_private, hca_hdl, code, 7993 event); 7994 } 7995 break; 7996 case IBT_HCA_ATTACH_EVENT: 7997 /* 7998 * NOTE: In some error recovery paths, it is possible to 7999 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. 8000 */ 8001 D2("daplka_async_handler(): IBT_HCA_ATTACH\n"); 8002 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid); 8003 break; 8004 case IBT_HCA_DETACH_EVENT: 8005 D2("daplka_async_handler(): IBT_HCA_DETACH\n"); 8006 /* Free all hca resources and close the HCA. */ 8007 (void) daplka_handle_hca_detach_event(event); 8008 break; 8009 case IBT_EVENT_PATH_MIGRATED: 8010 /* This event is affiliated with APM */ 8011 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n"); 8012 break; 8013 default: 8014 D2("daplka_async_handler(): unhandled code = 0x%x\n", code); 8015 break; 8016 } 8017 } 8018 8019 /* 8020 * This routine is called in kernel context related to Subnet events 8021 */ 8022 /*ARGSUSED*/ 8023 static void 8024 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code, 8025 ibt_subnet_event_t *event) 8026 { 8027 ib_gid_t *sgid = &gid; 8028 ib_gid_t *dgid; 8029 8030 dgid = &event->sm_notice_gid; 8031 switch (code) { 8032 case IBT_SM_EVENT_GID_AVAIL: 8033 /* This event is affiliated with remote port up */ 8034 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n"); 8035 if (daplka_apm) 8036 daplka_sm_gid_avail(sgid, dgid); 8037 return; 8038 case IBT_SM_EVENT_GID_UNAVAIL: 8039 /* This event is affiliated with remote port down */ 8040 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n"); 8041 return; 8042 default: 8043 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n", 8044 code); 8045 return; 8046 } 8047 } 8048 8049 /* 8050 * This routine is called in kernel context, handles Subnet GID avail events 8051 * which correspond to remote port up. Setting up alternate path or path 8052 * migration (failback) has to be initiated from the active side of the 8053 * original connect. 8054 */ 8055 static void 8056 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid) 8057 { 8058 int i, j; 8059 daplka_resource_blk_t *blk; 8060 daplka_resource_t *rp; 8061 daplka_ia_resource_t *ia_rp; 8062 8063 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n", 8064 (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid, 8065 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid); 8066 8067 /* 8068 * Walk the resource table looking for an ia that matches the sgid 8069 */ 8070 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 8071 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 8072 blk = daplka_resource.daplka_rc_root[i]; 8073 if (blk == NULL) 8074 continue; 8075 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 8076 rp = blk->daplka_rcblk_blks[j]; 8077 if ((rp == NULL) || 8078 ((intptr_t)rp == DAPLKA_RC_RESERVED) || 8079 (rp->rs_type != DAPL_TYPE_IA)) { 8080 continue; 8081 } 8082 /* 8083 * rp is an IA resource check if its gid 8084 * matches with the calling sgid 8085 */ 8086 ia_rp = (daplka_ia_resource_t *)rp; 8087 DAPLKA_RS_REF(ia_rp); 8088 if ((sgid->gid_prefix == 8089 ia_rp->ia_hca_sgid.gid_prefix) && 8090 (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) { 8091 /* 8092 * walk the ep hash table. Acquire a 8093 * reader lock. 8094 */ 8095 daplka_hash_walk(&ia_rp->ia_ep_htbl, 8096 daplka_ep_failback, 8097 (void *)dgid, RW_READER); 8098 } 8099 DAPLKA_RS_UNREF(ia_rp); 8100 } 8101 } 8102 rw_exit(&daplka_resource.daplka_rct_lock); 8103 } 8104 8105 /* 8106 * This routine is called in kernel context to get and set an alternate path 8107 */ 8108 static int 8109 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid) 8110 { 8111 ibt_alt_path_info_t path_info; 8112 ibt_alt_path_attr_t path_attr; 8113 ibt_ap_returns_t ap_rets; 8114 ibt_status_t status; 8115 8116 D2("daplka_ep_altpath : ibt_get_alt_path()\n"); 8117 bzero(&path_info, sizeof (ibt_alt_path_info_t)); 8118 bzero(&path_attr, sizeof (ibt_alt_path_attr_t)); 8119 if (dgid != NULL) { 8120 path_attr.apa_sgid = ep_rp->ep_sgid; 8121 path_attr.apa_dgid = *dgid; 8122 } 8123 status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL, 8124 &path_attr, &path_info); 8125 if (status != IBT_SUCCESS) { 8126 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n", 8127 status); 8128 return (1); 8129 } 8130 8131 D2("daplka_ep_altpath : ibt_set_alt_path()\n"); 8132 bzero(&ap_rets, sizeof (ibt_ap_returns_t)); 8133 status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING, 8134 &path_info, NULL, 0, &ap_rets); 8135 if ((status != IBT_SUCCESS) || 8136 (ap_rets.ap_status != IBT_CM_AP_LOADED)) { 8137 DERR("daplka_ep_altpath : ibt_set_alt_path failed " 8138 "status %d ap_status %d\n", status, ap_rets.ap_status); 8139 return (1); 8140 } 8141 return (0); 8142 } 8143 8144 /* 8145 * This routine is called in kernel context to failback to the original path 8146 */ 8147 static int 8148 daplka_ep_failback(void *objp, void *arg) 8149 { 8150 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp; 8151 ib_gid_t *dgid; 8152 ibt_status_t status; 8153 ibt_rc_chan_query_attr_t chan_attrs; 8154 int i; 8155 8156 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 8157 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n", 8158 (longlong_t)ep_rp->ep_sgid.gid_prefix, 8159 (longlong_t)ep_rp->ep_sgid.gid_guid, 8160 (longlong_t)ep_rp->ep_dgid.gid_prefix, 8161 (longlong_t)ep_rp->ep_dgid.gid_guid); 8162 8163 /* 8164 * daplka_ep_failback is called from daplka_hash_walk 8165 * which holds the read lock on hash table to protect 8166 * the endpoint resource from removal 8167 */ 8168 mutex_enter(&ep_rp->ep_lock); 8169 /* check for unconnected endpoints */ 8170 /* first check for ep state */ 8171 if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) { 8172 mutex_exit(&ep_rp->ep_lock); 8173 D2("daplka_ep_failback : endpoints not connected\n"); 8174 return (0); 8175 } 8176 8177 /* second check for gids */ 8178 if (((ep_rp->ep_sgid.gid_prefix == 0) && 8179 (ep_rp->ep_sgid.gid_guid == 0)) || 8180 ((ep_rp->ep_dgid.gid_prefix == 0) && 8181 (ep_rp->ep_dgid.gid_guid == 0))) { 8182 mutex_exit(&ep_rp->ep_lock); 8183 D2("daplka_ep_failback : skip unconnected endpoints\n"); 8184 return (0); 8185 } 8186 8187 /* 8188 * matching destination ep 8189 * when dgid is NULL, the async event is a local port up. 8190 * dgid becomes wild card, i.e. all endpoints match 8191 */ 8192 dgid = (ib_gid_t *)arg; 8193 if (dgid == NULL) { 8194 /* ignore loopback ep */ 8195 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) && 8196 (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) { 8197 mutex_exit(&ep_rp->ep_lock); 8198 D2("daplka_ep_failback : skip loopback endpoints\n"); 8199 return (0); 8200 } 8201 } else { 8202 /* matching remote ep */ 8203 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) || 8204 (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) { 8205 mutex_exit(&ep_rp->ep_lock); 8206 D2("daplka_ep_failback : unrelated endpoints\n"); 8207 return (0); 8208 } 8209 } 8210 8211 /* call get and set altpath with original dgid used in ep_connect */ 8212 if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) { 8213 mutex_exit(&ep_rp->ep_lock); 8214 return (0); 8215 } 8216 8217 /* 8218 * wait for migration state to be ARMed 8219 * e.g. a post_send msg will transit mig_state from REARM to ARM 8220 */ 8221 for (i = 0; i < daplka_query_aft_setaltpath; i++) { 8222 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 8223 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 8224 if (status != IBT_SUCCESS) { 8225 mutex_exit(&ep_rp->ep_lock); 8226 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n"); 8227 return (0); 8228 } 8229 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED) 8230 break; 8231 } 8232 8233 D2("daplka_ep_altpath : query[%d] mig_st=%d\n", 8234 i, chan_attrs.rc_mig_state); 8235 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n", 8236 (longlong_t) 8237 chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix, 8238 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid, 8239 (longlong_t) 8240 chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix, 8241 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid); 8242 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n", 8243 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix, 8244 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid, 8245 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix, 8246 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid); 8247 8248 /* skip failback on ARMed state not reached or env override */ 8249 if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) { 8250 mutex_exit(&ep_rp->ep_lock); 8251 DERR("daplka_ep_altpath : ARMed state not reached\n"); 8252 return (0); 8253 } 8254 8255 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n"); 8256 status = ibt_migrate_path(ep_rp->ep_chan_hdl); 8257 if (status != IBT_SUCCESS) { 8258 mutex_exit(&ep_rp->ep_lock); 8259 DERR("daplka_ep_failback : migration failed " 8260 "status %d\n", status); 8261 return (0); 8262 } 8263 8264 /* call get and altpath with NULL dgid to indicate unspecified dgid */ 8265 (void) daplka_ep_altpath(ep_rp, NULL); 8266 mutex_exit(&ep_rp->ep_lock); 8267 return (0); 8268 } 8269 8270 /* 8271 * IBTF wrappers used for resource accounting 8272 */ 8273 static ibt_status_t 8274 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl, 8275 ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args, 8276 ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes) 8277 { 8278 daplka_hca_t *hca_p; 8279 uint32_t max_qps; 8280 boolean_t acct_enabled; 8281 ibt_status_t status; 8282 8283 acct_enabled = daplka_accounting_enabled; 8284 hca_p = ep_rp->ep_hca; 8285 max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100; 8286 8287 if (acct_enabled) { 8288 if (daplka_max_qp_percent != 0 && 8289 max_qps <= hca_p->hca_qp_count) { 8290 DERR("ibt_alloc_rc_channel: resource limit exceeded " 8291 "(limit %d, count %d)\n", max_qps, 8292 hca_p->hca_qp_count); 8293 return (IBT_INSUFF_RESOURCE); 8294 } 8295 DAPLKA_RS_ACCT_INC(ep_rp, 1); 8296 atomic_inc_32(&hca_p->hca_qp_count); 8297 } 8298 status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes); 8299 8300 if (status != IBT_SUCCESS && acct_enabled) { 8301 DAPLKA_RS_ACCT_DEC(ep_rp, 1); 8302 atomic_dec_32(&hca_p->hca_qp_count); 8303 } 8304 return (status); 8305 } 8306 8307 static ibt_status_t 8308 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl) 8309 { 8310 daplka_hca_t *hca_p; 8311 ibt_status_t status; 8312 8313 hca_p = ep_rp->ep_hca; 8314 8315 status = ibt_free_channel(chan_hdl); 8316 if (status != IBT_SUCCESS) { 8317 return (status); 8318 } 8319 if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) { 8320 DAPLKA_RS_ACCT_DEC(ep_rp, 1); 8321 atomic_dec_32(&hca_p->hca_qp_count); 8322 } 8323 return (status); 8324 } 8325 8326 static ibt_status_t 8327 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl, 8328 ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size) 8329 { 8330 daplka_hca_t *hca_p; 8331 uint32_t max_cqs; 8332 boolean_t acct_enabled; 8333 ibt_status_t status; 8334 8335 acct_enabled = daplka_accounting_enabled; 8336 hca_p = evd_rp->evd_hca; 8337 max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100; 8338 8339 if (acct_enabled) { 8340 if (daplka_max_cq_percent != 0 && 8341 max_cqs <= hca_p->hca_cq_count) { 8342 DERR("ibt_alloc_cq: resource limit exceeded " 8343 "(limit %d, count %d)\n", max_cqs, 8344 hca_p->hca_cq_count); 8345 return (IBT_INSUFF_RESOURCE); 8346 } 8347 DAPLKA_RS_ACCT_INC(evd_rp, 1); 8348 atomic_inc_32(&hca_p->hca_cq_count); 8349 } 8350 status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size); 8351 8352 if (status != IBT_SUCCESS && acct_enabled) { 8353 DAPLKA_RS_ACCT_DEC(evd_rp, 1); 8354 atomic_dec_32(&hca_p->hca_cq_count); 8355 } 8356 return (status); 8357 } 8358 8359 static ibt_status_t 8360 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl) 8361 { 8362 daplka_hca_t *hca_p; 8363 ibt_status_t status; 8364 8365 hca_p = evd_rp->evd_hca; 8366 8367 status = ibt_free_cq(cq_hdl); 8368 if (status != IBT_SUCCESS) { 8369 return (status); 8370 } 8371 if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) { 8372 DAPLKA_RS_ACCT_DEC(evd_rp, 1); 8373 atomic_dec_32(&hca_p->hca_cq_count); 8374 } 8375 return (status); 8376 } 8377 8378 static ibt_status_t 8379 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl, 8380 ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p) 8381 { 8382 daplka_hca_t *hca_p; 8383 uint32_t max_pds; 8384 boolean_t acct_enabled; 8385 ibt_status_t status; 8386 8387 acct_enabled = daplka_accounting_enabled; 8388 hca_p = pd_rp->pd_hca; 8389 max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100; 8390 8391 if (acct_enabled) { 8392 if (daplka_max_pd_percent != 0 && 8393 max_pds <= hca_p->hca_pd_count) { 8394 DERR("ibt_alloc_pd: resource limit exceeded " 8395 "(limit %d, count %d)\n", max_pds, 8396 hca_p->hca_pd_count); 8397 return (IBT_INSUFF_RESOURCE); 8398 } 8399 DAPLKA_RS_ACCT_INC(pd_rp, 1); 8400 atomic_inc_32(&hca_p->hca_pd_count); 8401 } 8402 status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p); 8403 8404 if (status != IBT_SUCCESS && acct_enabled) { 8405 DAPLKA_RS_ACCT_DEC(pd_rp, 1); 8406 atomic_dec_32(&hca_p->hca_pd_count); 8407 } 8408 return (status); 8409 } 8410 8411 static ibt_status_t 8412 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl, 8413 ibt_pd_hdl_t pd_hdl) 8414 { 8415 daplka_hca_t *hca_p; 8416 ibt_status_t status; 8417 8418 hca_p = pd_rp->pd_hca; 8419 8420 status = ibt_free_pd(hca_hdl, pd_hdl); 8421 if (status != IBT_SUCCESS) { 8422 return (status); 8423 } 8424 if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) { 8425 DAPLKA_RS_ACCT_DEC(pd_rp, 1); 8426 atomic_dec_32(&hca_p->hca_pd_count); 8427 } 8428 return (status); 8429 } 8430 8431 static ibt_status_t 8432 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl, 8433 ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p, 8434 ibt_rkey_t *rkey_p) 8435 { 8436 daplka_hca_t *hca_p; 8437 uint32_t max_mws; 8438 boolean_t acct_enabled; 8439 ibt_status_t status; 8440 8441 acct_enabled = daplka_accounting_enabled; 8442 hca_p = mw_rp->mw_hca; 8443 max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100; 8444 8445 if (acct_enabled) { 8446 if (daplka_max_mw_percent != 0 && 8447 max_mws <= hca_p->hca_mw_count) { 8448 DERR("ibt_alloc_mw: resource limit exceeded " 8449 "(limit %d, count %d)\n", max_mws, 8450 hca_p->hca_mw_count); 8451 return (IBT_INSUFF_RESOURCE); 8452 } 8453 DAPLKA_RS_ACCT_INC(mw_rp, 1); 8454 atomic_inc_32(&hca_p->hca_mw_count); 8455 } 8456 status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p); 8457 8458 if (status != IBT_SUCCESS && acct_enabled) { 8459 DAPLKA_RS_ACCT_DEC(mw_rp, 1); 8460 atomic_dec_32(&hca_p->hca_mw_count); 8461 } 8462 return (status); 8463 } 8464 8465 static ibt_status_t 8466 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl, 8467 ibt_mw_hdl_t mw_hdl) 8468 { 8469 daplka_hca_t *hca_p; 8470 ibt_status_t status; 8471 8472 hca_p = mw_rp->mw_hca; 8473 8474 status = ibt_free_mw(hca_hdl, mw_hdl); 8475 if (status != IBT_SUCCESS) { 8476 return (status); 8477 } 8478 if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) { 8479 DAPLKA_RS_ACCT_DEC(mw_rp, 1); 8480 atomic_dec_32(&hca_p->hca_mw_count); 8481 } 8482 return (status); 8483 } 8484 8485 static ibt_status_t 8486 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl, 8487 ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p, 8488 ibt_mr_desc_t *mr_desc_p) 8489 { 8490 daplka_hca_t *hca_p; 8491 uint32_t max_mrs; 8492 boolean_t acct_enabled; 8493 ibt_status_t status; 8494 8495 acct_enabled = daplka_accounting_enabled; 8496 hca_p = mr_rp->mr_hca; 8497 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100; 8498 8499 if (acct_enabled) { 8500 if (daplka_max_mr_percent != 0 && 8501 max_mrs <= hca_p->hca_mr_count) { 8502 DERR("ibt_register_mr: resource limit exceeded " 8503 "(limit %d, count %d)\n", max_mrs, 8504 hca_p->hca_mr_count); 8505 return (IBT_INSUFF_RESOURCE); 8506 } 8507 DAPLKA_RS_ACCT_INC(mr_rp, 1); 8508 atomic_inc_32(&hca_p->hca_mr_count); 8509 } 8510 status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p); 8511 8512 if (status != IBT_SUCCESS && acct_enabled) { 8513 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8514 atomic_dec_32(&hca_p->hca_mr_count); 8515 } 8516 return (status); 8517 } 8518 8519 static ibt_status_t 8520 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp, 8521 ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl, 8522 ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p, 8523 ibt_mr_desc_t *mr_desc_p) 8524 { 8525 daplka_hca_t *hca_p; 8526 uint32_t max_mrs; 8527 boolean_t acct_enabled; 8528 ibt_status_t status; 8529 8530 acct_enabled = daplka_accounting_enabled; 8531 hca_p = mr_rp->mr_hca; 8532 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100; 8533 8534 if (acct_enabled) { 8535 if (daplka_max_mr_percent != 0 && 8536 max_mrs <= hca_p->hca_mr_count) { 8537 DERR("ibt_register_shared_mr: resource limit exceeded " 8538 "(limit %d, count %d)\n", max_mrs, 8539 hca_p->hca_mr_count); 8540 return (IBT_INSUFF_RESOURCE); 8541 } 8542 DAPLKA_RS_ACCT_INC(mr_rp, 1); 8543 atomic_inc_32(&hca_p->hca_mr_count); 8544 } 8545 status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl, 8546 smr_attr_p, mr_hdl_p, mr_desc_p); 8547 8548 if (status != IBT_SUCCESS && acct_enabled) { 8549 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8550 atomic_dec_32(&hca_p->hca_mr_count); 8551 } 8552 return (status); 8553 } 8554 8555 static ibt_status_t 8556 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl, 8557 ibt_mr_hdl_t mr_hdl) 8558 { 8559 daplka_hca_t *hca_p; 8560 ibt_status_t status; 8561 8562 hca_p = mr_rp->mr_hca; 8563 8564 status = ibt_deregister_mr(hca_hdl, mr_hdl); 8565 if (status != IBT_SUCCESS) { 8566 return (status); 8567 } 8568 if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) { 8569 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8570 atomic_dec_32(&hca_p->hca_mr_count); 8571 } 8572 return (status); 8573 } 8574 8575 static ibt_status_t 8576 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl, 8577 ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz, 8578 ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz) 8579 { 8580 daplka_hca_t *hca_p; 8581 uint32_t max_srqs; 8582 boolean_t acct_enabled; 8583 ibt_status_t status; 8584 8585 acct_enabled = daplka_accounting_enabled; 8586 hca_p = srq_rp->srq_hca; 8587 max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100; 8588 8589 if (acct_enabled) { 8590 if (daplka_max_srq_percent != 0 && 8591 max_srqs <= hca_p->hca_srq_count) { 8592 DERR("ibt_alloc_srq: resource limit exceeded " 8593 "(limit %d, count %d)\n", max_srqs, 8594 hca_p->hca_srq_count); 8595 return (IBT_INSUFF_RESOURCE); 8596 } 8597 DAPLKA_RS_ACCT_INC(srq_rp, 1); 8598 atomic_inc_32(&hca_p->hca_srq_count); 8599 } 8600 status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz); 8601 8602 if (status != IBT_SUCCESS && acct_enabled) { 8603 DAPLKA_RS_ACCT_DEC(srq_rp, 1); 8604 atomic_dec_32(&hca_p->hca_srq_count); 8605 } 8606 return (status); 8607 } 8608 8609 static ibt_status_t 8610 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl) 8611 { 8612 daplka_hca_t *hca_p; 8613 ibt_status_t status; 8614 8615 hca_p = srq_rp->srq_hca; 8616 8617 D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl); 8618 8619 status = ibt_free_srq(srq_hdl); 8620 if (status != IBT_SUCCESS) { 8621 return (status); 8622 } 8623 if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) { 8624 DAPLKA_RS_ACCT_DEC(srq_rp, 1); 8625 atomic_dec_32(&hca_p->hca_srq_count); 8626 } 8627 return (status); 8628 } 8629 8630 8631 static int 8632 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode, 8633 cred_t *cred, int *rvalp) 8634 { 8635 int error; 8636 8637 switch (cmd) { 8638 case DAPL_IA_CREATE: 8639 error = daplka_ia_create(rnum, arg, mode, cred, rvalp); 8640 break; 8641 8642 /* can potentially add other commands here */ 8643 8644 default: 8645 DERR("daplka_common_ioctl: cmd not supported\n"); 8646 error = DDI_FAILURE; 8647 } 8648 return (error); 8649 } 8650 8651 static int 8652 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8653 cred_t *cred, int *rvalp) 8654 { 8655 int error; 8656 8657 switch (cmd) { 8658 case DAPL_EVD_CREATE: 8659 error = daplka_evd_create(rp, arg, mode, cred, rvalp); 8660 break; 8661 8662 case DAPL_CQ_RESIZE: 8663 error = daplka_cq_resize(rp, arg, mode, cred, rvalp); 8664 break; 8665 8666 case DAPL_EVENT_POLL: 8667 error = daplka_event_poll(rp, arg, mode, cred, rvalp); 8668 break; 8669 8670 case DAPL_EVENT_WAKEUP: 8671 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp); 8672 break; 8673 8674 case DAPL_EVD_MODIFY_CNO: 8675 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp); 8676 break; 8677 8678 case DAPL_EVD_FREE: 8679 error = daplka_evd_free(rp, arg, mode, cred, rvalp); 8680 break; 8681 8682 default: 8683 DERR("daplka_evd_ioctl: cmd not supported\n"); 8684 error = DDI_FAILURE; 8685 } 8686 return (error); 8687 } 8688 8689 static int 8690 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8691 cred_t *cred, int *rvalp) 8692 { 8693 int error; 8694 8695 switch (cmd) { 8696 case DAPL_EP_MODIFY: 8697 error = daplka_ep_modify(rp, arg, mode, cred, rvalp); 8698 break; 8699 8700 case DAPL_EP_FREE: 8701 error = daplka_ep_free(rp, arg, mode, cred, rvalp); 8702 break; 8703 8704 case DAPL_EP_CONNECT: 8705 error = daplka_ep_connect(rp, arg, mode, cred, rvalp); 8706 break; 8707 8708 case DAPL_EP_DISCONNECT: 8709 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp); 8710 break; 8711 8712 case DAPL_EP_REINIT: 8713 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp); 8714 break; 8715 8716 case DAPL_EP_CREATE: 8717 error = daplka_ep_create(rp, arg, mode, cred, rvalp); 8718 break; 8719 8720 default: 8721 DERR("daplka_ep_ioctl: cmd not supported\n"); 8722 error = DDI_FAILURE; 8723 } 8724 return (error); 8725 } 8726 8727 static int 8728 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8729 cred_t *cred, int *rvalp) 8730 { 8731 int error; 8732 8733 switch (cmd) { 8734 case DAPL_MR_REGISTER: 8735 error = daplka_mr_register(rp, arg, mode, cred, rvalp); 8736 break; 8737 8738 case DAPL_MR_REGISTER_LMR: 8739 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp); 8740 break; 8741 8742 case DAPL_MR_REGISTER_SHARED: 8743 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp); 8744 break; 8745 8746 case DAPL_MR_DEREGISTER: 8747 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp); 8748 break; 8749 8750 case DAPL_MR_SYNC: 8751 error = daplka_mr_sync(rp, arg, mode, cred, rvalp); 8752 break; 8753 8754 default: 8755 DERR("daplka_mr_ioctl: cmd not supported\n"); 8756 error = DDI_FAILURE; 8757 } 8758 return (error); 8759 } 8760 8761 static int 8762 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8763 cred_t *cred, int *rvalp) 8764 { 8765 int error; 8766 8767 switch (cmd) { 8768 case DAPL_MW_ALLOC: 8769 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp); 8770 break; 8771 8772 case DAPL_MW_FREE: 8773 error = daplka_mw_free(rp, arg, mode, cred, rvalp); 8774 break; 8775 8776 default: 8777 DERR("daplka_mw_ioctl: cmd not supported\n"); 8778 error = DDI_FAILURE; 8779 } 8780 return (error); 8781 } 8782 8783 static int 8784 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8785 cred_t *cred, int *rvalp) 8786 { 8787 int error; 8788 8789 switch (cmd) { 8790 case DAPL_CNO_ALLOC: 8791 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp); 8792 break; 8793 8794 case DAPL_CNO_FREE: 8795 error = daplka_cno_free(rp, arg, mode, cred, rvalp); 8796 break; 8797 8798 case DAPL_CNO_WAIT: 8799 error = daplka_cno_wait(rp, arg, mode, cred, rvalp); 8800 break; 8801 8802 default: 8803 DERR("daplka_cno_ioctl: cmd not supported\n"); 8804 error = DDI_FAILURE; 8805 } 8806 return (error); 8807 } 8808 8809 static int 8810 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8811 cred_t *cred, int *rvalp) 8812 { 8813 int error; 8814 8815 switch (cmd) { 8816 case DAPL_PD_ALLOC: 8817 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp); 8818 break; 8819 8820 case DAPL_PD_FREE: 8821 error = daplka_pd_free(rp, arg, mode, cred, rvalp); 8822 break; 8823 8824 default: 8825 DERR("daplka_pd_ioctl: cmd not supported\n"); 8826 error = DDI_FAILURE; 8827 } 8828 return (error); 8829 } 8830 8831 static int 8832 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8833 cred_t *cred, int *rvalp) 8834 { 8835 int error; 8836 8837 switch (cmd) { 8838 case DAPL_SERVICE_REGISTER: 8839 error = daplka_service_register(rp, arg, mode, cred, rvalp); 8840 break; 8841 8842 case DAPL_SERVICE_DEREGISTER: 8843 error = daplka_service_deregister(rp, arg, mode, cred, rvalp); 8844 break; 8845 8846 default: 8847 DERR("daplka_sp_ioctl: cmd not supported\n"); 8848 error = DDI_FAILURE; 8849 } 8850 return (error); 8851 } 8852 8853 static int 8854 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8855 cred_t *cred, int *rvalp) 8856 { 8857 int error; 8858 8859 switch (cmd) { 8860 case DAPL_SRQ_CREATE: 8861 error = daplka_srq_create(rp, arg, mode, cred, rvalp); 8862 break; 8863 8864 case DAPL_SRQ_RESIZE: 8865 error = daplka_srq_resize(rp, arg, mode, cred, rvalp); 8866 break; 8867 8868 case DAPL_SRQ_FREE: 8869 error = daplka_srq_free(rp, arg, mode, cred, rvalp); 8870 break; 8871 8872 default: 8873 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd); 8874 error = DDI_FAILURE; 8875 break; 8876 } 8877 return (error); 8878 } 8879 8880 static int 8881 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8882 cred_t *cred, int *rvalp) 8883 { 8884 int error; 8885 8886 switch (cmd) { 8887 case DAPL_CR_ACCEPT: 8888 error = daplka_cr_accept(rp, arg, mode, cred, rvalp); 8889 break; 8890 8891 case DAPL_CR_REJECT: 8892 error = daplka_cr_reject(rp, arg, mode, cred, rvalp); 8893 break; 8894 8895 case DAPL_IA_QUERY: 8896 error = daplka_ia_query(rp, arg, mode, cred, rvalp); 8897 break; 8898 8899 case DAPL_CR_HANDOFF: 8900 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp); 8901 break; 8902 8903 default: 8904 DERR("daplka_misc_ioctl: cmd not supported\n"); 8905 error = DDI_FAILURE; 8906 } 8907 return (error); 8908 } 8909 8910 /*ARGSUSED*/ 8911 static int 8912 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 8913 int *rvalp) 8914 { 8915 daplka_ia_resource_t *ia_rp; 8916 minor_t rnum; 8917 int error = 0; 8918 8919 rnum = getminor(dev); 8920 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum); 8921 if (ia_rp == NULL) { 8922 DERR("ioctl: resource not found, rnum %d\n", rnum); 8923 return (ENXIO); 8924 } 8925 8926 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd); 8927 if (DAPLKA_RS_RESERVED(ia_rp)) { 8928 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp); 8929 return (error); 8930 } 8931 if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) { 8932 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp)); 8933 error = EINVAL; 8934 goto cleanup; 8935 } 8936 if (ia_rp->ia_pid != ddi_get_pid()) { 8937 DERR("ioctl: ia_pid %d != pid %d\n", 8938 ia_rp->ia_pid, ddi_get_pid()); 8939 error = EINVAL; 8940 goto cleanup; 8941 } 8942 8943 switch (cmd & DAPL_TYPE_MASK) { 8944 case DAPL_TYPE_EVD: 8945 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8946 break; 8947 8948 case DAPL_TYPE_EP: 8949 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8950 break; 8951 8952 case DAPL_TYPE_MR: 8953 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8954 break; 8955 8956 case DAPL_TYPE_MW: 8957 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8958 break; 8959 8960 case DAPL_TYPE_PD: 8961 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8962 break; 8963 8964 case DAPL_TYPE_SP: 8965 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8966 break; 8967 8968 case DAPL_TYPE_CNO: 8969 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8970 break; 8971 8972 case DAPL_TYPE_MISC: 8973 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8974 break; 8975 8976 case DAPL_TYPE_SRQ: 8977 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8978 break; 8979 8980 default: 8981 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp)); 8982 error = DDI_FAILURE; 8983 } 8984 8985 cleanup:; 8986 DAPLKA_RS_UNREF(ia_rp); 8987 return (error); 8988 } 8989 8990 /* ARGSUSED */ 8991 static int 8992 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred) 8993 { 8994 minor_t rnum; 8995 8996 /* 8997 * Char only 8998 */ 8999 if (otyp != OTYP_CHR) { 9000 return (EINVAL); 9001 } 9002 9003 /* 9004 * Only zero can be opened, clones are used for resources. 9005 */ 9006 if (getminor(*devp) != DAPLKA_DRIVER_MINOR) { 9007 DERR("daplka_open: bad minor %d\n", getminor(*devp)); 9008 return (ENODEV); 9009 } 9010 9011 /* 9012 * - allocate new minor number 9013 * - update devp argument to new device 9014 */ 9015 if (daplka_resource_reserve(&rnum) == 0) { 9016 *devp = makedevice(getmajor(*devp), rnum); 9017 } else { 9018 return (ENOMEM); 9019 } 9020 9021 return (DDI_SUCCESS); 9022 } 9023 9024 /* ARGSUSED */ 9025 static int 9026 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred) 9027 { 9028 daplka_ia_resource_t *ia_rp; 9029 minor_t rnum = getminor(dev); 9030 9031 /* 9032 * Char only 9033 */ 9034 if (otyp != OTYP_CHR) { 9035 return (EINVAL); 9036 } 9037 D2("daplka_close: closing rnum = %d\n", rnum); 9038 atomic_inc_32(&daplka_pending_close); 9039 9040 /* 9041 * remove from resource table. 9042 */ 9043 ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum); 9044 9045 /* 9046 * remove the initial reference 9047 */ 9048 if (ia_rp != NULL) { 9049 DAPLKA_RS_UNREF(ia_rp); 9050 } 9051 atomic_dec_32(&daplka_pending_close); 9052 return (DDI_SUCCESS); 9053 } 9054 9055 9056 /* 9057 * Resource management routines 9058 * 9059 * We start with no resource array. Each time we run out of slots, we 9060 * reallocate a new larger array and copy the pointer to the new array and 9061 * a new resource blk is allocated and added to the hash table. 9062 * 9063 * The resource control block contains: 9064 * root - array of pointer of resource blks 9065 * sz - current size of array. 9066 * len - last valid entry in array. 9067 * 9068 * A search operation based on a resource number is as follows: 9069 * index = rnum / RESOURCE_BLKSZ; 9070 * ASSERT(index < resource_block.len); 9071 * ASSERT(index < resource_block.sz); 9072 * offset = rnum % RESOURCE_BLKSZ; 9073 * ASSERT(offset >= resource_block.root[index]->base); 9074 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 9075 * return resource_block.root[index]->blks[offset]; 9076 * 9077 * A resource blk is freed when its used count reaches zero. 9078 */ 9079 9080 /* 9081 * initializes the global resource table 9082 */ 9083 static void 9084 daplka_resource_init(void) 9085 { 9086 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource)) 9087 rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL); 9088 daplka_resource.daplka_rc_len = 0; 9089 daplka_resource.daplka_rc_sz = 0; 9090 daplka_resource.daplka_rc_cnt = 0; 9091 daplka_resource.daplka_rc_flag = 0; 9092 daplka_resource.daplka_rc_root = NULL; 9093 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource)) 9094 } 9095 9096 /* 9097 * destroys the global resource table 9098 */ 9099 static void 9100 daplka_resource_fini(void) 9101 { 9102 int i; 9103 9104 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9105 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 9106 daplka_resource_blk_t *blk; 9107 int j; 9108 9109 blk = daplka_resource.daplka_rc_root[i]; 9110 if (blk == NULL) { 9111 continue; 9112 } 9113 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 9114 if (blk->daplka_rcblk_blks[j] != NULL) { 9115 DERR("resource_fini: non-null slot %d, %p\n", 9116 j, blk->daplka_rcblk_blks[j]); 9117 } 9118 } 9119 kmem_free(blk, sizeof (*blk)); 9120 daplka_resource.daplka_rc_root[i] = NULL; 9121 } 9122 if (daplka_resource.daplka_rc_root != NULL) { 9123 uint_t sz; 9124 9125 sz = daplka_resource.daplka_rc_sz * 9126 sizeof (daplka_resource_blk_t *); 9127 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz); 9128 daplka_resource.daplka_rc_root = NULL; 9129 daplka_resource.daplka_rc_len = 0; 9130 daplka_resource.daplka_rc_sz = 0; 9131 } 9132 rw_exit(&daplka_resource.daplka_rct_lock); 9133 rw_destroy(&daplka_resource.daplka_rct_lock); 9134 } 9135 9136 /* 9137 * reserves a slot in the global resource table. 9138 * this is called by the open() syscall. it is needed because 9139 * at open() time, we do not have sufficient information to 9140 * create an IA resource. the library needs to subsequently 9141 * call daplka_ia_create to insert an IA resource into this 9142 * reserved slot. 9143 */ 9144 static int 9145 daplka_resource_reserve(minor_t *rnum) 9146 { 9147 int i, j, empty = -1; 9148 daplka_resource_blk_t *blk; 9149 9150 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9151 /* 9152 * Try to find an empty slot 9153 */ 9154 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 9155 blk = daplka_resource.daplka_rc_root[i]; 9156 if (blk != NULL && blk->daplka_rcblk_avail > 0) { 9157 9158 D3("resource_alloc: available blks %d\n", 9159 blk->daplka_rcblk_avail); 9160 9161 /* 9162 * found an empty slot in this blk 9163 */ 9164 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 9165 if (blk->daplka_rcblk_blks[j] == NULL) { 9166 *rnum = (minor_t) 9167 (j + (i * DAPLKA_RC_BLKSZ)); 9168 blk->daplka_rcblk_blks[j] = 9169 (daplka_resource_t *) 9170 DAPLKA_RC_RESERVED; 9171 blk->daplka_rcblk_avail--; 9172 daplka_resource.daplka_rc_cnt++; 9173 rw_exit(&daplka_resource. 9174 daplka_rct_lock); 9175 return (0); 9176 } 9177 } 9178 } else if (blk == NULL && empty < 0) { 9179 /* 9180 * remember first empty slot 9181 */ 9182 empty = i; 9183 } 9184 } 9185 9186 /* 9187 * Couldn't find anything, allocate a new blk 9188 * Do we need to reallocate the root array 9189 */ 9190 if (empty < 0) { 9191 if (daplka_resource.daplka_rc_len == 9192 daplka_resource.daplka_rc_sz) { 9193 /* 9194 * Allocate new array and copy current stuff into it 9195 */ 9196 daplka_resource_blk_t **p; 9197 uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz + 9198 DAPLKA_RC_BLKSZ; 9199 9200 D3("resource_alloc: increasing no. of buckets to %d\n", 9201 newsz); 9202 9203 p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags); 9204 9205 if (daplka_resource.daplka_rc_root) { 9206 uint_t oldsz; 9207 9208 oldsz = (uint_t)(daplka_resource.daplka_rc_sz * 9209 (int)sizeof (*p)); 9210 9211 /* 9212 * Copy old data into new space and 9213 * free old stuff 9214 */ 9215 bcopy(daplka_resource.daplka_rc_root, p, oldsz); 9216 kmem_free(daplka_resource.daplka_rc_root, 9217 oldsz); 9218 } 9219 9220 daplka_resource.daplka_rc_root = p; 9221 daplka_resource.daplka_rc_sz = (int)newsz; 9222 } 9223 9224 empty = daplka_resource.daplka_rc_len; 9225 daplka_resource.daplka_rc_len++; 9226 9227 D3("resource_alloc: daplka_rc_len %d\n", 9228 daplka_resource.daplka_rc_len); 9229 } 9230 9231 /* 9232 * Allocate a new blk 9233 */ 9234 blk = kmem_zalloc(sizeof (*blk), daplka_km_flags); 9235 ASSERT(daplka_resource.daplka_rc_root[empty] == NULL); 9236 daplka_resource.daplka_rc_root[empty] = blk; 9237 blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1; 9238 9239 /* 9240 * Allocate slot 9241 */ 9242 *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ); 9243 blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED; 9244 daplka_resource.daplka_rc_cnt++; 9245 rw_exit(&daplka_resource.daplka_rct_lock); 9246 9247 return (0); 9248 } 9249 9250 /* 9251 * removes resource from global resource table 9252 */ 9253 static daplka_resource_t * 9254 daplka_resource_remove(minor_t rnum) 9255 { 9256 int i, j; 9257 daplka_resource_blk_t *blk; 9258 daplka_resource_t *p; 9259 9260 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9261 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9262 9263 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9264 if (i >= daplka_resource.daplka_rc_len) { 9265 rw_exit(&daplka_resource.daplka_rct_lock); 9266 DERR("resource_remove: invalid rnum %d\n", rnum); 9267 return (NULL); 9268 } 9269 9270 ASSERT(daplka_resource.daplka_rc_root); 9271 ASSERT(i < daplka_resource.daplka_rc_len); 9272 ASSERT(i < daplka_resource.daplka_rc_sz); 9273 blk = daplka_resource.daplka_rc_root[i]; 9274 if (blk == NULL) { 9275 rw_exit(&daplka_resource.daplka_rct_lock); 9276 DERR("resource_remove: invalid rnum %d\n", rnum); 9277 return (NULL); 9278 } 9279 9280 if (blk->daplka_rcblk_blks[j] == NULL) { 9281 rw_exit(&daplka_resource.daplka_rct_lock); 9282 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n"); 9283 return (NULL); 9284 } 9285 p = blk->daplka_rcblk_blks[j]; 9286 blk->daplka_rcblk_blks[j] = NULL; 9287 blk->daplka_rcblk_avail++; 9288 if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) { 9289 /* 9290 * free this blk 9291 */ 9292 kmem_free(blk, sizeof (*blk)); 9293 daplka_resource.daplka_rc_root[i] = NULL; 9294 } 9295 daplka_resource.daplka_rc_cnt--; 9296 rw_exit(&daplka_resource.daplka_rct_lock); 9297 9298 if ((intptr_t)p == DAPLKA_RC_RESERVED) { 9299 return (NULL); 9300 } else { 9301 return (p); 9302 } 9303 } 9304 9305 /* 9306 * inserts resource into the slot designated by rnum 9307 */ 9308 static int 9309 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp) 9310 { 9311 int i, j, error = -1; 9312 daplka_resource_blk_t *blk; 9313 9314 /* 9315 * Find resource and lock it in WRITER mode 9316 * search for available resource slot 9317 */ 9318 9319 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9320 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9321 9322 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9323 if (i >= daplka_resource.daplka_rc_len) { 9324 rw_exit(&daplka_resource.daplka_rct_lock); 9325 DERR("resource_insert: resource %d not found\n", rnum); 9326 return (-1); 9327 } 9328 9329 blk = daplka_resource.daplka_rc_root[i]; 9330 if (blk != NULL) { 9331 ASSERT(i < daplka_resource.daplka_rc_len); 9332 ASSERT(i < daplka_resource.daplka_rc_sz); 9333 9334 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) { 9335 blk->daplka_rcblk_blks[j] = rp; 9336 error = 0; 9337 } else { 9338 DERR("resource_insert: %d not reserved, blk = %p\n", 9339 rnum, blk->daplka_rcblk_blks[j]); 9340 } 9341 } else { 9342 DERR("resource_insert: resource %d not found\n", rnum); 9343 } 9344 rw_exit(&daplka_resource.daplka_rct_lock); 9345 return (error); 9346 } 9347 9348 /* 9349 * finds resource using minor device number 9350 */ 9351 static daplka_resource_t * 9352 daplka_resource_lookup(minor_t rnum) 9353 { 9354 int i, j; 9355 daplka_resource_blk_t *blk; 9356 daplka_resource_t *rp; 9357 9358 /* 9359 * Find resource and lock it in READER mode 9360 * search for available resource slot 9361 */ 9362 9363 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9364 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9365 9366 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 9367 if (i >= daplka_resource.daplka_rc_len) { 9368 rw_exit(&daplka_resource.daplka_rct_lock); 9369 DERR("resource_lookup: resource %d not found\n", rnum); 9370 return (NULL); 9371 } 9372 9373 blk = daplka_resource.daplka_rc_root[i]; 9374 if (blk != NULL) { 9375 ASSERT(i < daplka_resource.daplka_rc_len); 9376 ASSERT(i < daplka_resource.daplka_rc_sz); 9377 9378 rp = blk->daplka_rcblk_blks[j]; 9379 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) { 9380 D3("resource_lookup: %d not found, blk = %p\n", 9381 rnum, blk->daplka_rcblk_blks[j]); 9382 } else { 9383 DAPLKA_RS_REF((daplka_ia_resource_t *)rp); 9384 } 9385 } else { 9386 DERR("resource_lookup: resource %d not found\n", rnum); 9387 rp = NULL; 9388 } 9389 rw_exit(&daplka_resource.daplka_rct_lock); 9390 return (rp); 9391 } 9392 9393 /* 9394 * generic hash table implementation 9395 */ 9396 9397 /* 9398 * daplka_hash_create: 9399 * initializes a hash table with the specified parameters 9400 * 9401 * input: 9402 * htblp pointer to hash table 9403 * 9404 * nbuckets number of buckets (must be power of 2) 9405 * 9406 * free_func this function is called on each hash 9407 * table element when daplka_hash_destroy 9408 * is called 9409 * 9410 * lookup_func if daplka_hash_lookup is able to find 9411 * the desired object, this function is 9412 * applied on the object before 9413 * daplka_hash_lookup returns 9414 * output: 9415 * none 9416 * 9417 * return value(s): 9418 * EINVAL nbuckets is not a power of 2 9419 * ENOMEM cannot allocate buckets 9420 * 0 success 9421 */ 9422 static int 9423 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets, 9424 void (*free_func)(void *), void (*lookup_func)(void *)) 9425 { 9426 int i; 9427 9428 if ((nbuckets & ~(nbuckets - 1)) != nbuckets) { 9429 DERR("hash_create: nbuckets not power of 2\n"); 9430 return (EINVAL); 9431 } 9432 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp)) 9433 9434 htblp->ht_buckets = 9435 kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets, 9436 daplka_km_flags); 9437 if (htblp->ht_buckets == NULL) { 9438 DERR("hash_create: cannot allocate buckets\n"); 9439 return (ENOMEM); 9440 } 9441 for (i = 0; i < nbuckets; i++) { 9442 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i])) 9443 htblp->ht_buckets[i].hb_count = 0; 9444 htblp->ht_buckets[i].hb_entries = NULL; 9445 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i])) 9446 } 9447 rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL); 9448 mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL); 9449 9450 htblp->ht_count = 0; 9451 htblp->ht_next_hkey = (uint64_t)gethrtime(); 9452 htblp->ht_nbuckets = nbuckets; 9453 htblp->ht_free_func = free_func; 9454 htblp->ht_lookup_func = lookup_func; 9455 htblp->ht_initialized = B_TRUE; 9456 D3("hash_create: done, buckets = %d\n", nbuckets); 9457 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp)) 9458 return (0); 9459 } 9460 9461 /* 9462 * daplka_hash_insert: 9463 * inserts an object into a hash table 9464 * 9465 * input: 9466 * htblp pointer to hash table 9467 * 9468 * hkeyp pointer to hash key. 9469 * *hkeyp being non-zero means that the caller 9470 * has generated its own hkey. if *hkeyp is zero, 9471 * this function will generate an hkey for the 9472 * caller. it is recommended that the caller 9473 * leave the hkey generation to this function 9474 * because the hkey is more likely to be evenly 9475 * distributed. 9476 * 9477 * objp pointer to object to be inserted into 9478 * hash table 9479 * 9480 * output: 9481 * hkeyp the generated hkey is returned via this pointer 9482 * 9483 * return value(s): 9484 * EINVAL invalid parameter 9485 * ENOMEM cannot allocate hash entry 9486 * 0 successful 9487 */ 9488 static int 9489 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp) 9490 { 9491 daplka_hash_entry_t *hep, *curr_hep; 9492 daplka_hash_bucket_t *hbp; 9493 uint32_t bucket; 9494 uint64_t hkey; 9495 9496 if (hkeyp == NULL) { 9497 DERR("hash_insert: hkeyp == NULL\n"); 9498 return (EINVAL); 9499 } 9500 hep = kmem_zalloc(sizeof (*hep), daplka_km_flags); 9501 if (hep == NULL) { 9502 DERR("hash_insert: cannot alloc hash_entry\n"); 9503 return (ENOMEM); 9504 } 9505 if (*hkeyp == 0) { 9506 /* generate a new key */ 9507 mutex_enter(&htblp->ht_key_lock); 9508 hkey = ++htblp->ht_next_hkey; 9509 if (hkey == 0) { 9510 hkey = htblp->ht_next_hkey = (uint64_t)gethrtime(); 9511 } 9512 mutex_exit(&htblp->ht_key_lock); 9513 } else { 9514 /* use user generated key */ 9515 hkey = *hkeyp; 9516 } 9517 9518 /* only works if ht_nbuckets is a power of 2 */ 9519 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9520 ASSERT(objp != NULL); 9521 ASSERT(bucket < htblp->ht_nbuckets); 9522 9523 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9524 hep->he_hkey = hkey; 9525 hep->he_objp = objp; 9526 9527 /* look for duplicate entries */ 9528 hbp = &htblp->ht_buckets[bucket]; 9529 curr_hep = hbp->hb_entries; 9530 while (curr_hep != NULL) { 9531 if (curr_hep->he_hkey == hep->he_hkey) { 9532 break; 9533 } 9534 curr_hep = curr_hep->he_next; 9535 } 9536 if (curr_hep != NULL) { 9537 DERR("hash_insert: found duplicate hash entry: " 9538 "bucket %d, hkey 0x%016llx\n", 9539 bucket, (longlong_t)hep->he_hkey); 9540 kmem_free(hep, sizeof (*hep)); 9541 rw_exit(&htblp->ht_table_lock); 9542 return (EINVAL); 9543 } 9544 hep->he_next = hbp->hb_entries; 9545 hbp->hb_entries = hep; 9546 hbp->hb_count++; 9547 htblp->ht_count++; 9548 rw_exit(&htblp->ht_table_lock); 9549 9550 if (*hkeyp == 0) { 9551 *hkeyp = hkey; 9552 ASSERT(*hkeyp != 0); 9553 } 9554 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n", 9555 htblp, (longlong_t)*hkeyp, bucket); 9556 return (0); 9557 } 9558 9559 /* 9560 * daplka_hash_remove: 9561 * removes object identified by hkey from hash table 9562 * 9563 * input: 9564 * htblp pointer to hash table 9565 * 9566 * hkey hkey that identifies the object to be removed 9567 * 9568 * output: 9569 * objpp pointer to pointer to object. 9570 * if remove is successful, the removed object 9571 * will be returned via *objpp. 9572 * 9573 * return value(s): 9574 * EINVAL cannot find hash entry 9575 * 0 successful 9576 */ 9577 static int 9578 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp) 9579 { 9580 daplka_hash_entry_t *free_hep, **curr_hepp; 9581 daplka_hash_bucket_t *hbp; 9582 uint32_t bucket; 9583 9584 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9585 9586 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9587 hbp = &htblp->ht_buckets[bucket]; 9588 9589 curr_hepp = &hbp->hb_entries; 9590 while (*curr_hepp != NULL) { 9591 if ((*curr_hepp)->he_hkey == hkey) { 9592 break; 9593 } 9594 curr_hepp = &(*curr_hepp)->he_next; 9595 } 9596 if (*curr_hepp == NULL) { 9597 DERR("hash_remove: cannot find hash entry: " 9598 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey); 9599 rw_exit(&htblp->ht_table_lock); 9600 return (EINVAL); 9601 } else { 9602 if (objpp != NULL) { 9603 *objpp = (*curr_hepp)->he_objp; 9604 } 9605 free_hep = *curr_hepp; 9606 *curr_hepp = (*curr_hepp)->he_next; 9607 kmem_free(free_hep, sizeof (*free_hep)); 9608 } 9609 hbp->hb_count--; 9610 htblp->ht_count--; 9611 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, " 9612 "hb_count %d, hb_count %d\n", 9613 (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count); 9614 rw_exit(&htblp->ht_table_lock); 9615 return (0); 9616 } 9617 9618 /* 9619 * daplka_hash_walk: 9620 * walks through the entire hash table. applying func on each of 9621 * the inserted objects. stops walking if func returns non-zero. 9622 * 9623 * input: 9624 * htblp pointer to hash table 9625 * 9626 * func function to be applied on each object 9627 * 9628 * farg second argument to func 9629 * 9630 * lockmode can be RW_WRITER or RW_READER. this 9631 * allows the caller to choose what type 9632 * of lock to acquire before walking the 9633 * table. 9634 * 9635 * output: 9636 * none 9637 * 9638 * return value(s): 9639 * none 9640 */ 9641 static void 9642 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *), 9643 void *farg, krw_t lockmode) 9644 { 9645 daplka_hash_entry_t *curr_hep; 9646 daplka_hash_bucket_t *hbp; 9647 uint32_t bucket, retval = 0; 9648 9649 ASSERT(lockmode == RW_WRITER || lockmode == RW_READER); 9650 9651 /* needed for warlock */ 9652 if (lockmode == RW_WRITER) { 9653 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9654 } else { 9655 rw_enter(&htblp->ht_table_lock, RW_READER); 9656 } 9657 for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) { 9658 hbp = &htblp->ht_buckets[bucket]; 9659 curr_hep = hbp->hb_entries; 9660 while (curr_hep != NULL) { 9661 retval = (*func)(curr_hep->he_objp, farg); 9662 if (retval != 0) { 9663 break; 9664 } 9665 curr_hep = curr_hep->he_next; 9666 } 9667 } 9668 rw_exit(&htblp->ht_table_lock); 9669 } 9670 9671 /* 9672 * daplka_hash_lookup: 9673 * finds object from hkey 9674 * 9675 * input: 9676 * htblp pointer to hash table 9677 * 9678 * hkey hkey that identifies the object to be looked up 9679 * 9680 * output: 9681 * none 9682 * 9683 * return value(s): 9684 * NULL if not found 9685 * object pointer if found 9686 */ 9687 static void * 9688 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey) 9689 { 9690 daplka_hash_entry_t *curr_hep; 9691 uint32_t bucket; 9692 void *objp; 9693 9694 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9695 9696 rw_enter(&htblp->ht_table_lock, RW_READER); 9697 curr_hep = htblp->ht_buckets[bucket].hb_entries; 9698 while (curr_hep != NULL) { 9699 if (curr_hep->he_hkey == hkey) { 9700 break; 9701 } 9702 curr_hep = curr_hep->he_next; 9703 } 9704 if (curr_hep == NULL) { 9705 DERR("hash_lookup: cannot find hash entry: " 9706 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey); 9707 rw_exit(&htblp->ht_table_lock); 9708 return (NULL); 9709 } 9710 objp = curr_hep->he_objp; 9711 ASSERT(objp != NULL); 9712 if (htblp->ht_lookup_func != NULL) { 9713 (*htblp->ht_lookup_func)(objp); 9714 } 9715 rw_exit(&htblp->ht_table_lock); 9716 return (objp); 9717 } 9718 9719 /* 9720 * daplka_hash_destroy: 9721 * destroys hash table. applies free_func on all inserted objects. 9722 * 9723 * input: 9724 * htblp pointer to hash table 9725 * 9726 * output: 9727 * none 9728 * 9729 * return value(s): 9730 * none 9731 */ 9732 static void 9733 daplka_hash_destroy(daplka_hash_table_t *htblp) 9734 { 9735 daplka_hash_entry_t *curr_hep, *free_hep; 9736 daplka_hash_entry_t *free_list = NULL; 9737 daplka_hash_bucket_t *hbp; 9738 uint32_t bucket, cnt, total = 0; 9739 9740 if (!htblp->ht_initialized) { 9741 DERR("hash_destroy: not initialized\n"); 9742 return; 9743 } 9744 /* free all elements from hash table */ 9745 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9746 for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) { 9747 hbp = &htblp->ht_buckets[bucket]; 9748 9749 /* build list of elements to be freed */ 9750 curr_hep = hbp->hb_entries; 9751 cnt = 0; 9752 while (curr_hep != NULL) { 9753 cnt++; 9754 free_hep = curr_hep; 9755 curr_hep = curr_hep->he_next; 9756 9757 free_hep->he_next = free_list; 9758 free_list = free_hep; 9759 } 9760 ASSERT(cnt == hbp->hb_count); 9761 total += cnt; 9762 hbp->hb_count = 0; 9763 hbp->hb_entries = NULL; 9764 } 9765 ASSERT(total == htblp->ht_count); 9766 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n", 9767 htblp, htblp->ht_nbuckets, total); 9768 rw_exit(&htblp->ht_table_lock); 9769 9770 /* free all objects, now without holding the hash table lock */ 9771 cnt = 0; 9772 while (free_list != NULL) { 9773 cnt++; 9774 free_hep = free_list; 9775 free_list = free_list->he_next; 9776 if (htblp->ht_free_func != NULL) { 9777 (*htblp->ht_free_func)(free_hep->he_objp); 9778 } 9779 kmem_free(free_hep, sizeof (*free_hep)); 9780 } 9781 ASSERT(total == cnt); 9782 9783 /* free hash buckets and destroy locks */ 9784 kmem_free(htblp->ht_buckets, 9785 sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets); 9786 9787 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9788 htblp->ht_buckets = NULL; 9789 htblp->ht_count = 0; 9790 htblp->ht_nbuckets = 0; 9791 htblp->ht_free_func = NULL; 9792 htblp->ht_lookup_func = NULL; 9793 htblp->ht_initialized = B_FALSE; 9794 rw_exit(&htblp->ht_table_lock); 9795 9796 mutex_destroy(&htblp->ht_key_lock); 9797 rw_destroy(&htblp->ht_table_lock); 9798 } 9799 9800 /* 9801 * daplka_hash_getsize: 9802 * return the number of objects in hash table 9803 * 9804 * input: 9805 * htblp pointer to hash table 9806 * 9807 * output: 9808 * none 9809 * 9810 * return value(s): 9811 * number of objects in hash table 9812 */ 9813 static uint32_t 9814 daplka_hash_getsize(daplka_hash_table_t *htblp) 9815 { 9816 uint32_t sz; 9817 9818 rw_enter(&htblp->ht_table_lock, RW_READER); 9819 sz = htblp->ht_count; 9820 rw_exit(&htblp->ht_table_lock); 9821 9822 return (sz); 9823 } 9824 9825 /* 9826 * this function is used as ht_lookup_func above when lookup is called. 9827 * other types of objs may use a more elaborate lookup_func. 9828 */ 9829 static void 9830 daplka_hash_generic_lookup(void *obj) 9831 { 9832 daplka_resource_t *rp = (daplka_resource_t *)obj; 9833 9834 mutex_enter(&rp->rs_reflock); 9835 rp->rs_refcnt++; 9836 ASSERT(rp->rs_refcnt != 0); 9837 mutex_exit(&rp->rs_reflock); 9838 } 9839 9840 /* 9841 * Generates a non-zero 32 bit hash key used for the timer hash table. 9842 */ 9843 static uint32_t 9844 daplka_timer_hkey_gen() 9845 { 9846 uint32_t new_hkey; 9847 9848 do { 9849 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey); 9850 } while (new_hkey == 0); 9851 9852 return (new_hkey); 9853 } 9854 9855 9856 /* 9857 * The DAPL KA debug logging routines 9858 */ 9859 9860 /* 9861 * Add the string str to the end of the debug log, followed by a newline. 9862 */ 9863 static void 9864 daplka_dbglog(char *str) 9865 { 9866 size_t length; 9867 size_t remlen; 9868 9869 /* 9870 * If this is the first time we've written to the log, initialize it. 9871 */ 9872 if (!daplka_dbginit) { 9873 return; 9874 } 9875 mutex_enter(&daplka_dbglock); 9876 /* 9877 * Note the log is circular; if this string would run over the end, 9878 * we copy the first piece to the end and then the last piece to 9879 * the beginning of the log. 9880 */ 9881 length = strlen(str); 9882 9883 remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1; 9884 9885 if (length > remlen) { 9886 if (remlen) 9887 bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen); 9888 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = '\0'; 9889 str += remlen; 9890 length -= remlen; 9891 daplka_dbgnext = 0; 9892 } 9893 bcopy(str, daplka_dbgbuf + daplka_dbgnext, length); 9894 daplka_dbgnext += length; 9895 9896 if (daplka_dbgnext >= sizeof (daplka_dbgbuf)) 9897 daplka_dbgnext = 0; 9898 mutex_exit(&daplka_dbglock); 9899 } 9900 9901 9902 /* 9903 * Add a printf-style message to whichever debug logs we're currently using. 9904 */ 9905 static void 9906 daplka_debug(const char *fmt, ...) 9907 { 9908 char buff[512]; 9909 va_list ap; 9910 /* 9911 * The system prepends the thread id and high resolution time 9912 * (nanoseconds are dropped and so are the upper digits) 9913 * to the specified string. 9914 * The unit for timestamp is 10 microseconds. 9915 * It wraps around every 10000 seconds. 9916 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec. 9917 */ 9918 int micro_time = (int)((gethrtime() / 10000) % 1000000000); 9919 (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time); 9920 9921 va_start(ap, fmt); 9922 (void) vsprintf(buff+strlen(buff), fmt, ap); 9923 va_end(ap); 9924 9925 daplka_dbglog(buff); 9926 } 9927 9928 static void 9929 daplka_console(const char *fmt, ...) 9930 { 9931 char buff[512]; 9932 va_list ap; 9933 9934 va_start(ap, fmt); 9935 (void) vsprintf(buff, fmt, ap); 9936 va_end(ap); 9937 9938 cmn_err(CE_CONT, "%s", buff); 9939 } 9940