1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 /* 28 * Overview of the RSM Kernel Agent: 29 * --------------------------------- 30 * 31 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 32 * kernel agent is a pseudo device driver which makes use of the RSMPI 33 * interface on behalf of the RSMAPI user library. 34 * 35 * The kernel agent functionality can be categorized into the following 36 * components: 37 * 1. Driver Infrastructure 38 * 2. Export/Import Segment Management 39 * 3. Internal resource allocation/deallocation 40 * 41 * The driver infrastructure includes the basic module loading entry points 42 * like _init, _info, _fini to load, unload and report information about 43 * the driver module. The driver infrastructure also includes the 44 * autoconfiguration entry points namely, attach, detach and getinfo for 45 * the device autoconfiguration. 46 * 47 * The kernel agent is a pseudo character device driver and exports 48 * a cb_ops structure which defines the driver entry points for character 49 * device access. This includes the open and close entry points. The 50 * other entry points provided include ioctl, devmap and segmap and chpoll. 51 * read and write entry points are not used since the device is memory 52 * mapped. Also ddi_prop_op is used for the prop_op entry point. 53 * 54 * The ioctl entry point supports a number of commands, which are used by 55 * the RSMAPI library in order to export and import segments. These 56 * commands include commands for binding and rebinding the physical pages 57 * allocated to the virtual address range, publishing the export segment, 58 * unpublishing and republishing an export segment, creating an 59 * import segment and a virtual connection from this import segment to 60 * an export segment, performing scatter-gather data transfer, barrier 61 * operations. 62 * 63 * 64 * Export and Import segments: 65 * --------------------------- 66 * 67 * In order to create an RSM export segment a process allocates a range in its 68 * virtual address space for the segment using standard Solaris interfaces. 69 * The process then calls RSMAPI, which in turn makes an ioctl call to the 70 * RSM kernel agent for an allocation of physical memory pages and for 71 * creation of the export segment by binding these pages to the virtual 72 * address range. These pages are locked in memory so that remote accesses 73 * are always applied to the correct page. Then the RSM segment is published, 74 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 75 * is assigned to it. 76 * 77 * In order to import a published RSM segment, RSMAPI creates an import 78 * segment and forms a virtual connection across the interconnect to the 79 * export segment, via an ioctl into the kernel agent with the connect 80 * command. The import segment setup is completed by mapping the 81 * local device memory into the importers virtual address space. The 82 * mapping of the import segment is handled by the segmap/devmap 83 * infrastructure described as follows. 84 * 85 * Segmap and Devmap interfaces: 86 * 87 * The RSM kernel agent allows device memory to be directly accessed by user 88 * threads via memory mapping. In order to do so, the RSM kernel agent 89 * supports the devmap and segmap entry points. 90 * 91 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 92 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 93 * responsible for exporting the device memory to the user applications. 94 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 95 * control is transfered to the devmap_setup call which calls rsm_devmap. 96 * 97 * rsm_devmap validates the user mapping to the device or kernel memory 98 * and passes the information to the system for setting up the mapping. The 99 * actual setting up of the mapping is done by devmap_devmem_setup(for 100 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 101 * registered for device context management via the devmap_devmem_setup 102 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 103 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 104 * is created, a mapping is freed, a mapping is accessed or an existing 105 * mapping is duplicated respectively. These callbacks allow the RSM kernel 106 * agent to maintain state information associated with the mappings. 107 * The state information is mainly in the form of a cookie list for the import 108 * segment for which mapping has been done. 109 * 110 * Forced disconnect of import segments: 111 * 112 * When an exported segment is unpublished, the exporter sends a forced 113 * disconnect message to all its importers. The importer segments are 114 * unloaded and disconnected. This involves unloading the original 115 * mappings and remapping to a preallocated kernel trash page. This is 116 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 117 * preallocated by the kernel agent during attach using ddi_umem_alloc with 118 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 119 * due to unloading of the original mappings. 120 * 121 * Additionally every segment has a mapping generation number associated 122 * with it. This is an entry in the barrier generation page, created 123 * during attach time. This mapping generation number for the import 124 * segments is incremented on a force disconnect to notify the application 125 * of the force disconnect. On this notification, the application needs 126 * to reconnect the segment to establish a new legitimate mapping. 127 * 128 * 129 * Locks used in the kernel agent: 130 * ------------------------------- 131 * 132 * The kernel agent uses a variety of mutexes and condition variables for 133 * mutual exclusion of the shared data structures and for synchronization 134 * between the various threads. Some of the locks are described as follows. 135 * 136 * Each resource structure, which represents either an export/import segment 137 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 138 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 139 * rsmseglock_acquire and rsmseglock_release macros. An additional 140 * lock called the rsmsi_lock is used for the shared import data structure 141 * that is relevant for resources representing import segments. There is 142 * also a condition variable associated with the resource called s_cv. This 143 * is used to wait for events like the segment state change etc. 144 * 145 * The resource structures are allocated from a pool of resource structures, 146 * called rsm_resource. This pool is protected via a reader-writer lock, 147 * called rsmrc_lock. 148 * 149 * There are two separate hash tables, one for the export segments and 150 * one for the import segments. The export segments are inserted into the 151 * export segment hash table only after they have been published and the 152 * import segments are inserted in the import segments list only after they 153 * have successfully connected to an exported segment. These tables are 154 * protected via reader-writer locks. 155 * 156 * Debug Support in the kernel agent: 157 * ---------------------------------- 158 * 159 * Debugging support in the kernel agent is provided by the following 160 * macros. 161 * 162 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 163 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 164 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 165 * on the definition of the category and level. All messages that belong to 166 * the specified category(rsmdbg_category) and are of an equal or greater 167 * severity than the specified level(rsmdbg_level) are logged. The message 168 * is a string which uses the same formatting rules as the strings used in 169 * printf. 170 * 171 * The category defines which component of the kernel agent has logged this 172 * message. There are a number of categories that have been defined such as 173 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 174 * DBG_ADDCATEGORY is used to add in another category to the currently 175 * specified category value so that the component using this new category 176 * can also effectively log debug messages. Thus, the category of a specific 177 * message is some combination of the available categories and we can define 178 * sub-categories if we want a finer level of granularity. 179 * 180 * The level defines the severity of the message. Different level values are 181 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 182 * the least severe(debug level is 0). 183 * 184 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 185 * variable or a string respectively. 186 * 187 * 188 * NOTES: 189 * 190 * Special Fork and Exec Handling: 191 * ------------------------------- 192 * 193 * The backing physical pages of an exported segment are always locked down. 194 * Thus, there are two cases in which a process having exported segments 195 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 196 * forks and invokes exit before the duped file descriptors for the export 197 * segments are closed in the child process. The hang is caused because the 198 * address space release algorithm in Solaris VM subsystem is based on a 199 * non-blocking loop which does not terminate while segments are locked 200 * down. In addition to this, Solaris VM subsystem lacks a callback 201 * mechanism to the rsm kernel agent to allow unlocking these export 202 * segment pages. 203 * 204 * In order to circumvent this problem, the kernel agent does the following. 205 * The Solaris VM subsystem keeps memory segments in increasing order of 206 * virtual addressses. Thus a special page(special_exit_offset) is allocated 207 * by the kernel agent and is mmapped into the heap area of the process address 208 * space(the mmap is done by the RSMAPI library). During the mmap processing 209 * of this special page by the devmap infrastructure, a callback(the same 210 * devmap context management callbacks discussed above) is registered for an 211 * unmap. 212 * 213 * As discussed above, this page is processed by the Solaris address space 214 * release code before any of the exported segments pages(which are allocated 215 * from high memory). It is during this processing that the unmap callback gets 216 * called and this callback is responsible for force destroying the exported 217 * segments and thus eliminating the problem of locked pages. 218 * 219 * Flow-control: 220 * ------------ 221 * 222 * A credit based flow control algorithm is used for messages whose 223 * processing cannot be done in the interrupt context because it might 224 * involve invoking rsmpi calls, or might take a long time to complete 225 * or might need to allocate resources. The algorithm operates on a per 226 * path basis. To send a message the pathend needs to have a credit and 227 * it consumes one for every message that is flow controlled. On the 228 * receiving pathend the message is put on a msgbuf_queue and a task is 229 * dispatched on the worker thread - recv_taskq where it is processed. 230 * After processing the message, the receiving pathend dequeues the message, 231 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 232 * credits to the sender pathend. 233 * 234 * RSM_DRTEST: 235 * ----------- 236 * 237 * This is used to enable the DR testing using a test driver on test 238 * platforms which do not supported DR. 239 * 240 */ 241 242 #include <sys/types.h> 243 #include <sys/param.h> 244 #include <sys/user.h> 245 #include <sys/buf.h> 246 #include <sys/systm.h> 247 #include <sys/cred.h> 248 #include <sys/vm.h> 249 #include <sys/uio.h> 250 #include <vm/seg.h> 251 #include <vm/page.h> 252 #include <sys/stat.h> 253 254 #include <sys/time.h> 255 #include <sys/errno.h> 256 257 #include <sys/file.h> 258 #include <sys/uio.h> 259 #include <sys/proc.h> 260 #include <sys/mman.h> 261 #include <sys/open.h> 262 #include <sys/atomic.h> 263 #include <sys/mem_config.h> 264 265 266 #include <sys/ddi.h> 267 #include <sys/devops.h> 268 #include <sys/ddidevmap.h> 269 #include <sys/sunddi.h> 270 #include <sys/esunddi.h> 271 #include <sys/ddi_impldefs.h> 272 273 #include <sys/kmem.h> 274 #include <sys/conf.h> 275 #include <sys/devops.h> 276 #include <sys/ddi_impldefs.h> 277 278 #include <sys/modctl.h> 279 280 #include <sys/policy.h> 281 #include <sys/types.h> 282 #include <sys/conf.h> 283 #include <sys/param.h> 284 285 #include <sys/taskq.h> 286 287 #include <sys/rsm/rsm_common.h> 288 #include <sys/rsm/rsmapi_common.h> 289 #include <sys/rsm/rsm.h> 290 #include <rsm_in.h> 291 #include <sys/rsm/rsmka_path_int.h> 292 #include <sys/rsm/rsmpi.h> 293 294 #include <sys/modctl.h> 295 #include <sys/debug.h> 296 297 #include <sys/tuneable.h> 298 299 #ifdef RSM_DRTEST 300 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 301 void *arg); 302 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 303 void *arg); 304 #endif 305 306 extern void dbg_printf(int category, int level, char *fmt, ...); 307 extern void rsmka_pathmanager_init(); 308 extern void rsmka_pathmanager_cleanup(); 309 extern void rele_sendq_token(); 310 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 311 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 312 extern int rsmka_topology_ioctl(caddr_t, int, int); 313 314 extern pri_t maxclsyspri; 315 extern work_queue_t work_queue; 316 extern kmutex_t ipc_info_lock; 317 extern kmutex_t ipc_info_cvlock; 318 extern kcondvar_t ipc_info_cv; 319 extern kmutex_t path_hold_cvlock; 320 extern kcondvar_t path_hold_cv; 321 322 extern kmutex_t rsmka_buf_lock; 323 324 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 325 extern adapter_t *rsmka_lookup_adapter(char *, int); 326 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 327 extern boolean_t rsmka_do_path_active(path_t *, int); 328 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 329 extern void rsmka_release_adapter(adapter_t *); 330 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 331 extern void rsmka_dequeue_msgbuf(path_t *path); 332 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 333 /* lint -w2 */ 334 335 static int rsm_open(dev_t *, int, int, cred_t *); 336 static int rsm_close(dev_t, int, int, cred_t *); 337 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 338 cred_t *credp, int *rvalp); 339 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 340 uint_t); 341 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 342 uint_t, uint_t, cred_t *); 343 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 344 struct pollhead **phpp); 345 346 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 347 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 348 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 349 350 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 351 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 352 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 353 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 354 rsm_permission_t); 355 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 356 static void rsmacl_free(rsmapi_access_entry_t *, int); 357 static void rsmpiacl_free(rsm_access_entry_t *, int); 358 359 static int rsm_inc_pgcnt(pgcnt_t); 360 static void rsm_dec_pgcnt(pgcnt_t); 361 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 362 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 363 size_t *); 364 static void exporter_quiesce(); 365 static void rsmseg_suspend(rsmseg_t *, int *); 366 static void rsmsegshare_suspend(rsmseg_t *); 367 static int rsmseg_resume(rsmseg_t *, void **); 368 static int rsmsegshare_resume(rsmseg_t *); 369 370 static struct cb_ops rsm_cb_ops = { 371 rsm_open, /* open */ 372 rsm_close, /* close */ 373 nodev, /* strategy */ 374 nodev, /* print */ 375 nodev, /* dump */ 376 nodev, /* read */ 377 nodev, /* write */ 378 rsm_ioctl, /* ioctl */ 379 rsm_devmap, /* devmap */ 380 NULL, /* mmap */ 381 rsm_segmap, /* segmap */ 382 rsm_chpoll, /* poll */ 383 ddi_prop_op, /* cb_prop_op */ 384 0, /* streamtab */ 385 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 386 0, 387 0, 388 0 389 }; 390 391 static struct dev_ops rsm_ops = { 392 DEVO_REV, /* devo_rev, */ 393 0, /* refcnt */ 394 rsm_info, /* get_dev_info */ 395 nulldev, /* identify */ 396 nulldev, /* probe */ 397 rsm_attach, /* attach */ 398 rsm_detach, /* detach */ 399 nodev, /* reset */ 400 &rsm_cb_ops, /* driver operations */ 401 (struct bus_ops *)0, /* bus operations */ 402 0, 403 ddi_quiesce_not_needed, /* quiesce */ 404 }; 405 406 /* 407 * Module linkage information for the kernel. 408 */ 409 410 static struct modldrv modldrv = { 411 &mod_driverops, /* Type of module. This one is a pseudo driver */ 412 "Remote Shared Memory Driver", 413 &rsm_ops, /* driver ops */ 414 }; 415 416 static struct modlinkage modlinkage = { 417 MODREV_1, 418 (void *)&modldrv, 419 0, 420 0, 421 0 422 }; 423 424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 427 428 static kphysm_setup_vector_t rsm_dr_callback_vec = { 429 KPHYSM_SETUP_VECTOR_VERSION, 430 rsm_dr_callback_post_add, 431 rsm_dr_callback_pre_del, 432 rsm_dr_callback_post_del 433 }; 434 435 /* This flag can be changed to 0 to help with PIT testing */ 436 int rsmka_modunloadok = 1; 437 int no_reply_cnt = 0; 438 439 uint64_t rsm_ctrlmsg_errcnt = 0; 440 uint64_t rsm_ipcsend_errcnt = 0; 441 442 #define MAX_NODES 64 443 444 static struct rsm_driver_data rsm_drv_data; 445 static struct rsmresource_table rsm_resource; 446 447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 448 static void rsmresource_destroy(void); 449 static int rsmresource_alloc(minor_t *); 450 static rsmresource_t *rsmresource_free(minor_t rnum); 451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 452 static int rsm_unpublish(rsmseg_t *seg, int mode); 453 static int rsm_unbind(rsmseg_t *seg); 454 static uint_t rsmhash(rsm_memseg_id_t key); 455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 456 static void rsmhash_free(rsmhash_table_t *rhash, int size); 457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 460 void *cookie); 461 int rsm_disconnect(rsmseg_t *seg); 462 void rsmseg_unload(rsmseg_t *); 463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 464 465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 466 rsm_intr_q_op_t opcode, rsm_addr_t src, 467 void *data, size_t size, rsm_intr_hand_arg_t arg); 468 469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 470 471 rsm_node_id_t my_nodeid; 472 473 /* cookie, va, offsets and length for the barrier */ 474 static rsm_gnum_t *bar_va; 475 static ddi_umem_cookie_t bar_cookie; 476 static off_t barrier_offset; 477 static size_t barrier_size; 478 static int max_segs; 479 480 /* cookie for the trash memory */ 481 static ddi_umem_cookie_t remap_cookie; 482 483 static rsm_memseg_id_t rsm_nextavail_segmentid; 484 485 extern taskq_t *work_taskq; 486 extern char *taskq_name; 487 488 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 489 490 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 491 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 492 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 493 494 static rsm_ipc_t rsm_ipc; /* ipc info */ 495 496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 497 static list_head_t rsm_suspend_list; 498 499 /* list of descriptors for remote importers */ 500 static importers_table_t importer_list; 501 502 kmutex_t rsm_suspend_cvlock; 503 kcondvar_t rsm_suspend_cv; 504 505 static kmutex_t rsm_lock; 506 507 adapter_t loopback_adapter; 508 rsm_controller_attr_t loopback_attr; 509 510 int rsmipc_send_controlmsg(path_t *path, int msgtype); 511 512 void rsmka_init_loopback(); 513 514 int rsmka_null_seg_create( 515 rsm_controller_handle_t, 516 rsm_memseg_export_handle_t *, 517 size_t, 518 uint_t, 519 rsm_memory_local_t *, 520 rsm_resource_callback_t, 521 rsm_resource_callback_arg_t); 522 523 int rsmka_null_seg_destroy( 524 rsm_memseg_export_handle_t); 525 526 int rsmka_null_bind( 527 rsm_memseg_export_handle_t, 528 off_t, 529 rsm_memory_local_t *, 530 rsm_resource_callback_t, 531 rsm_resource_callback_arg_t); 532 533 int rsmka_null_unbind( 534 rsm_memseg_export_handle_t, 535 off_t, 536 size_t); 537 538 int rsmka_null_rebind( 539 rsm_memseg_export_handle_t, 540 off_t, 541 rsm_memory_local_t *, 542 rsm_resource_callback_t, 543 rsm_resource_callback_arg_t); 544 545 int rsmka_null_publish( 546 rsm_memseg_export_handle_t, 547 rsm_access_entry_t [], 548 uint_t, 549 rsm_memseg_id_t, 550 rsm_resource_callback_t, 551 rsm_resource_callback_arg_t); 552 553 554 int rsmka_null_republish( 555 rsm_memseg_export_handle_t, 556 rsm_access_entry_t [], 557 uint_t, 558 rsm_resource_callback_t, 559 rsm_resource_callback_arg_t); 560 561 int rsmka_null_unpublish( 562 rsm_memseg_export_handle_t); 563 564 rsm_ops_t null_rsmpi_ops; 565 566 /* 567 * data and locks to keep track of total amount of exported memory 568 */ 569 static pgcnt_t rsm_pgcnt; 570 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 571 static kmutex_t rsm_pgcnt_lock; 572 573 static int rsm_enable_dr; 574 575 static char loopback_str[] = "loopback"; 576 577 int rsm_hash_size; 578 579 /* 580 * The locking model is as follows: 581 * 582 * Local operations: 583 * find resource - grab reader lock on resouce list 584 * insert rc - grab writer lock 585 * delete rc - grab writer lock and resource mutex 586 * read/write - no lock 587 * 588 * Remote invocations: 589 * find resource - grab read lock and resource mutex 590 * 591 * State: 592 * resource state - grab resource mutex 593 */ 594 595 int 596 _init(void) 597 { 598 int e; 599 600 e = mod_install(&modlinkage); 601 if (e != 0) { 602 return (e); 603 } 604 605 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 606 607 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 608 609 610 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 611 612 rsm_hash_size = RSM_HASHSZ; 613 614 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 615 616 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 617 618 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 619 620 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 621 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 622 623 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 625 626 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 627 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 628 629 rsm_ipc.count = RSMIPC_SZ; 630 rsm_ipc.wanted = 0; 631 rsm_ipc.sequence = 0; 632 633 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 634 635 for (e = 0; e < RSMIPC_SZ; e++) { 636 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 637 638 RSMIPC_SET(slot, RSMIPC_FREE); 639 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 640 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 641 } 642 643 /* 644 * Initialize the suspend message list 645 */ 646 rsm_suspend_list.list_head = NULL; 647 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 648 649 /* 650 * It is assumed here that configuration data is available 651 * during system boot since _init may be called at that time. 652 */ 653 654 rsmka_pathmanager_init(); 655 656 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 657 "rsm: _init done\n")); 658 659 return (DDI_SUCCESS); 660 661 } 662 663 int 664 _info(struct modinfo *modinfop) 665 { 666 667 return (mod_info(&modlinkage, modinfop)); 668 } 669 670 int 671 _fini(void) 672 { 673 int e; 674 675 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 676 "rsm: _fini enter\n")); 677 678 /* 679 * The rsmka_modunloadok flag is simply used to help with 680 * the PIT testing. Make this flag 0 to disallow modunload. 681 */ 682 if (rsmka_modunloadok == 0) 683 return (EBUSY); 684 685 /* rsm_detach will be called as a result of mod_remove */ 686 e = mod_remove(&modlinkage); 687 if (e) { 688 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 689 "Unable to fini RSM %x\n", e)); 690 return (e); 691 } 692 693 rsmka_pathmanager_cleanup(); 694 695 rw_destroy(&rsm_resource.rsmrc_lock); 696 697 rw_destroy(&rsm_export_segs.rsmhash_rw); 698 rw_destroy(&rsm_import_segs.rsmhash_rw); 699 rw_destroy(&rsm_event_queues.rsmhash_rw); 700 701 mutex_destroy(&importer_list.lock); 702 703 mutex_destroy(&rsm_ipc.lock); 704 cv_destroy(&rsm_ipc.cv); 705 706 (void) mutex_destroy(&rsm_suspend_list.list_lock); 707 708 (void) mutex_destroy(&rsm_pgcnt_lock); 709 710 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 711 712 return (DDI_SUCCESS); 713 714 } 715 716 /*ARGSUSED1*/ 717 static int 718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 719 { 720 minor_t rnum; 721 int percent; 722 int ret; 723 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 724 725 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 726 727 switch (cmd) { 728 case DDI_ATTACH: 729 break; 730 case DDI_RESUME: 731 default: 732 DBG_PRINTF((category, RSM_ERR, 733 "rsm:rsm_attach - cmd not supported\n")); 734 return (DDI_FAILURE); 735 } 736 737 if (rsm_dip != NULL) { 738 DBG_PRINTF((category, RSM_ERR, 739 "rsm:rsm_attach - supports only " 740 "one instance\n")); 741 return (DDI_FAILURE); 742 } 743 744 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 745 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 746 "enable-dynamic-reconfiguration", 1); 747 748 mutex_enter(&rsm_drv_data.drv_lock); 749 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 750 mutex_exit(&rsm_drv_data.drv_lock); 751 752 if (rsm_enable_dr) { 753 #ifdef RSM_DRTEST 754 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 755 (void *)NULL); 756 #else 757 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 758 (void *)NULL); 759 #endif 760 if (ret != 0) { 761 mutex_exit(&rsm_drv_data.drv_lock); 762 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 763 "reconfiguration setup failed\n"); 764 return (DDI_FAILURE); 765 } 766 } 767 768 mutex_enter(&rsm_drv_data.drv_lock); 769 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 770 rsm_drv_data.drv_state = RSM_DRV_OK; 771 cv_broadcast(&rsm_drv_data.drv_cv); 772 mutex_exit(&rsm_drv_data.drv_lock); 773 774 /* 775 * page_list_read_lock(); 776 * xx_setup(); 777 * page_list_read_unlock(); 778 */ 779 780 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 781 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 782 "segment-hashtable-size", RSM_HASHSZ); 783 if (rsm_hash_size == 0) { 784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 785 "rsm: segment-hashtable-size in rsm.conf " 786 "must be greater than 0, defaulting to 128\n")); 787 rsm_hash_size = RSM_HASHSZ; 788 } 789 790 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 791 rsm_hash_size)); 792 793 rsm_pgcnt = 0; 794 795 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 796 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 797 "max-exported-memory", 0); 798 if (percent < 0) { 799 DBG_PRINTF((category, RSM_ERR, 800 "rsm:rsm_attach not enough memory available to " 801 "export, or max-exported-memory set incorrectly.\n")); 802 return (DDI_FAILURE); 803 } 804 /* 0 indicates no fixed upper limit. maxmem is the max */ 805 /* available pageable physical mem */ 806 rsm_pgcnt_max = (percent*maxmem)/100; 807 808 if (rsm_pgcnt_max > 0) { 809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 810 "rsm: Available physical memory = %lu pages, " 811 "Max exportable memory = %lu pages", 812 maxmem, rsm_pgcnt_max)); 813 } 814 815 /* 816 * Create minor number 817 */ 818 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 819 DBG_PRINTF((category, RSM_ERR, 820 "rsm: rsm_attach - Unable to get " 821 "minor number\n")); 822 return (DDI_FAILURE); 823 } 824 825 ASSERT(rnum == RSM_DRIVER_MINOR); 826 827 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 828 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 829 DBG_PRINTF((category, RSM_ERR, 830 "rsm: rsm_attach - unable to allocate " 831 "minor #\n")); 832 return (DDI_FAILURE); 833 } 834 835 rsm_dip = devi; 836 /* 837 * Allocate the hashtables 838 */ 839 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 840 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 841 842 importer_list.bucket = (importing_token_t **) 843 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), KM_SLEEP); 844 845 /* 846 * Allocate a resource struct 847 */ 848 { 849 rsmresource_t *p; 850 851 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 852 853 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 854 855 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 856 } 857 858 /* 859 * Based on the rsm.conf property max-segments, determine the maximum 860 * number of segments that can be exported/imported. This is then used 861 * to determine the size for barrier failure pages. 862 */ 863 864 /* First get the max number of segments from the rsm.conf file */ 865 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 866 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 867 "max-segments", 0); 868 if (max_segs == 0) { 869 /* Use default number of segments */ 870 max_segs = RSM_MAX_NUM_SEG; 871 } 872 873 /* 874 * Based on the max number of segments allowed, determine the barrier 875 * page size. add 1 to max_segs since the barrier page itself uses 876 * a slot 877 */ 878 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 879 PAGESIZE); 880 881 /* 882 * allocation of the barrier failure page 883 */ 884 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 885 DDI_UMEM_SLEEP, &bar_cookie); 886 887 /* 888 * Set the barrier_offset 889 */ 890 barrier_offset = 0; 891 892 /* 893 * Allocate a trash memory and get a cookie for it. This will be used 894 * when remapping segments during force disconnects. Allocate the 895 * trash memory with a large size which is page aligned. 896 */ 897 (void) ddi_umem_alloc((size_t)TRASHSIZE, 898 DDI_UMEM_TRASH, &remap_cookie); 899 900 /* initialize user segment id allocation variable */ 901 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 902 903 /* 904 * initialize the null_rsmpi_ops vector and the loopback adapter 905 */ 906 rsmka_init_loopback(); 907 908 909 ddi_report_dev(devi); 910 911 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 912 913 return (DDI_SUCCESS); 914 } 915 916 /* 917 * The call to mod_remove in the _fine routine will cause the system 918 * to call rsm_detach 919 */ 920 /*ARGSUSED*/ 921 static int 922 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 923 { 924 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 925 926 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 927 928 switch (cmd) { 929 case DDI_DETACH: 930 break; 931 default: 932 DBG_PRINTF((category, RSM_ERR, 933 "rsm:rsm_detach - cmd %x not supported\n", 934 cmd)); 935 return (DDI_FAILURE); 936 } 937 938 mutex_enter(&rsm_drv_data.drv_lock); 939 while (rsm_drv_data.drv_state != RSM_DRV_OK) 940 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 941 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 942 mutex_exit(&rsm_drv_data.drv_lock); 943 944 /* 945 * Unregister the DR callback functions 946 */ 947 if (rsm_enable_dr) { 948 #ifdef RSM_DRTEST 949 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 950 (void *)NULL); 951 #else 952 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 953 (void *)NULL); 954 #endif 955 } 956 957 mutex_enter(&rsm_drv_data.drv_lock); 958 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 959 rsm_drv_data.drv_state = RSM_DRV_NEW; 960 mutex_exit(&rsm_drv_data.drv_lock); 961 962 ASSERT(rsm_suspend_list.list_head == NULL); 963 964 /* 965 * Release all resources, seglist, controller, ... 966 */ 967 968 /* remove intersend queues */ 969 /* remove registered services */ 970 971 972 ddi_remove_minor_node(dip, DRIVER_NAME); 973 rsm_dip = NULL; 974 975 /* 976 * Free minor zero resource 977 */ 978 { 979 rsmresource_t *p; 980 981 p = rsmresource_free(RSM_DRIVER_MINOR); 982 if (p) { 983 mutex_destroy(&p->rsmrc_lock); 984 kmem_free((void *)p, sizeof (*p)); 985 } 986 } 987 988 /* 989 * Free resource table 990 */ 991 992 rsmresource_destroy(); 993 994 /* 995 * Free the hash tables 996 */ 997 rsmhash_free(&rsm_export_segs, rsm_hash_size); 998 rsmhash_free(&rsm_import_segs, rsm_hash_size); 999 1000 kmem_free((void *)importer_list.bucket, 1001 rsm_hash_size * sizeof (importing_token_t *)); 1002 importer_list.bucket = NULL; 1003 1004 1005 /* free barrier page */ 1006 if (bar_cookie != NULL) { 1007 ddi_umem_free(bar_cookie); 1008 } 1009 bar_va = NULL; 1010 bar_cookie = NULL; 1011 1012 /* 1013 * Free the memory allocated for the trash 1014 */ 1015 if (remap_cookie != NULL) { 1016 ddi_umem_free(remap_cookie); 1017 } 1018 remap_cookie = NULL; 1019 1020 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1021 1022 return (DDI_SUCCESS); 1023 } 1024 1025 /*ARGSUSED*/ 1026 static int 1027 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1028 { 1029 register int error; 1030 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1031 1032 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1033 1034 switch (infocmd) { 1035 case DDI_INFO_DEVT2DEVINFO: 1036 if (rsm_dip == NULL) 1037 error = DDI_FAILURE; 1038 else { 1039 *result = (void *)rsm_dip; 1040 error = DDI_SUCCESS; 1041 } 1042 break; 1043 case DDI_INFO_DEVT2INSTANCE: 1044 *result = (void *)0; 1045 error = DDI_SUCCESS; 1046 break; 1047 default: 1048 error = DDI_FAILURE; 1049 } 1050 1051 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1052 return (error); 1053 } 1054 1055 adapter_t * 1056 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1057 { 1058 adapter_t *adapter; 1059 char adapter_devname[MAXNAMELEN]; 1060 int instance; 1061 DBG_DEFINE(category, 1062 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1063 1064 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1065 1066 instance = msg->cnum; 1067 1068 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1069 return (NULL); 1070 } 1071 1072 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1073 return (NULL); 1074 1075 if (strcmp(adapter_devname, "loopback") == 0) 1076 return (&loopback_adapter); 1077 1078 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1079 1080 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1081 1082 return (adapter); 1083 } 1084 1085 1086 /* 1087 * *********************** Resource Number Management ******************** 1088 * All resources are stored in a simple hash table. The table is an array 1089 * of pointers to resource blks. Each blk contains: 1090 * base - base number of this blk 1091 * used - number of used slots in this blk. 1092 * blks - array of pointers to resource items. 1093 * An entry in a resource blk is empty if it's NULL. 1094 * 1095 * We start with no resource array. Each time we run out of slots, we 1096 * reallocate a new larger array and copy the pointer to the new array and 1097 * a new resource blk is allocated and added to the hash table. 1098 * 1099 * The resource control block contains: 1100 * root - array of pointer of resource blks 1101 * sz - current size of array. 1102 * len - last valid entry in array. 1103 * 1104 * A search operation based on a resource number is as follows: 1105 * index = rnum / RESOURCE_BLKSZ; 1106 * ASSERT(index < resource_block.len); 1107 * ASSERT(index < resource_block.sz); 1108 * offset = rnum % RESOURCE_BLKSZ; 1109 * ASSERT(offset >= resource_block.root[index]->base); 1110 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1111 * return resource_block.root[index]->blks[offset]; 1112 * 1113 * A resource blk is freed with its used count reachs zero. 1114 */ 1115 static int 1116 rsmresource_alloc(minor_t *rnum) 1117 { 1118 1119 /* search for available resource slot */ 1120 int i, j, empty = -1; 1121 rsmresource_blk_t *blk; 1122 1123 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1124 "rsmresource_alloc enter\n")); 1125 1126 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1127 1128 /* Try to find an empty slot */ 1129 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1130 blk = rsm_resource.rsmrc_root[i]; 1131 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1132 /* found an empty slot in this blk */ 1133 for (j = 0; j < RSMRC_BLKSZ; j++) { 1134 if (blk->rsmrcblk_blks[j] == NULL) { 1135 *rnum = (minor_t) 1136 (j + (i * RSMRC_BLKSZ)); 1137 /* 1138 * obey gen page limits 1139 */ 1140 if (*rnum >= max_segs + 1) { 1141 if (empty < 0) { 1142 rw_exit(&rsm_resource. 1143 rsmrc_lock); 1144 DBG_PRINTF(( 1145 RSM_KERNEL_ALL, 1146 RSM_ERR, 1147 "rsmresource" 1148 "_alloc failed:" 1149 "not enough res" 1150 "%d\n", *rnum)); 1151 return (RSMERR_INSUFFICIENT_RESOURCES); 1152 } else { 1153 /* use empty slot */ 1154 break; 1155 } 1156 1157 } 1158 1159 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1160 blk->rsmrcblk_avail--; 1161 rw_exit(&rsm_resource.rsmrc_lock); 1162 DBG_PRINTF((RSM_KERNEL_ALL, 1163 RSM_DEBUG_VERBOSE, 1164 "rsmresource_alloc done\n")); 1165 return (RSM_SUCCESS); 1166 } 1167 } 1168 } else if (blk == NULL && empty < 0) { 1169 /* remember first empty slot */ 1170 empty = i; 1171 } 1172 } 1173 1174 /* Couldn't find anything, allocate a new blk */ 1175 /* 1176 * Do we need to reallocate the root array 1177 */ 1178 if (empty < 0) { 1179 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1180 /* 1181 * Allocate new array and copy current stuff into it 1182 */ 1183 rsmresource_blk_t **p; 1184 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1185 RSMRC_BLKSZ; 1186 /* 1187 * Don't allocate more that max valid rnum 1188 */ 1189 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1190 max_segs + 1) { 1191 rw_exit(&rsm_resource.rsmrc_lock); 1192 return (RSMERR_INSUFFICIENT_RESOURCES); 1193 } 1194 1195 p = (rsmresource_blk_t **)kmem_zalloc( 1196 newsz * sizeof (*p), 1197 KM_SLEEP); 1198 1199 if (rsm_resource.rsmrc_root) { 1200 uint_t oldsz; 1201 1202 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1203 (int)sizeof (*p)); 1204 1205 /* 1206 * Copy old data into new space and 1207 * free old stuff 1208 */ 1209 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1210 kmem_free(rsm_resource.rsmrc_root, oldsz); 1211 } 1212 1213 rsm_resource.rsmrc_root = p; 1214 rsm_resource.rsmrc_sz = (int)newsz; 1215 } 1216 1217 empty = rsm_resource.rsmrc_len; 1218 rsm_resource.rsmrc_len++; 1219 } 1220 1221 /* 1222 * Allocate a new blk 1223 */ 1224 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1225 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1226 rsm_resource.rsmrc_root[empty] = blk; 1227 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1228 1229 /* 1230 * Allocate slot 1231 */ 1232 1233 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1234 1235 /* 1236 * watch out not to exceed bounds of barrier page 1237 */ 1238 if (*rnum >= max_segs + 1) { 1239 rw_exit(&rsm_resource.rsmrc_lock); 1240 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1241 "rsmresource_alloc failed %d\n", *rnum)); 1242 1243 return (RSMERR_INSUFFICIENT_RESOURCES); 1244 } 1245 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1246 1247 1248 rw_exit(&rsm_resource.rsmrc_lock); 1249 1250 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1251 "rsmresource_alloc done\n")); 1252 1253 return (RSM_SUCCESS); 1254 } 1255 1256 static rsmresource_t * 1257 rsmresource_free(minor_t rnum) 1258 { 1259 1260 /* search for available resource slot */ 1261 int i, j; 1262 rsmresource_blk_t *blk; 1263 rsmresource_t *p; 1264 1265 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1266 "rsmresource_free enter\n")); 1267 1268 i = (int)(rnum / RSMRC_BLKSZ); 1269 j = (int)(rnum % RSMRC_BLKSZ); 1270 1271 if (i >= rsm_resource.rsmrc_len) { 1272 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1273 "rsmresource_free done\n")); 1274 return (NULL); 1275 } 1276 1277 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1278 1279 ASSERT(rsm_resource.rsmrc_root); 1280 ASSERT(i < rsm_resource.rsmrc_len); 1281 ASSERT(i < rsm_resource.rsmrc_sz); 1282 blk = rsm_resource.rsmrc_root[i]; 1283 if (blk == NULL) { 1284 rw_exit(&rsm_resource.rsmrc_lock); 1285 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1286 "rsmresource_free done\n")); 1287 return (NULL); 1288 } 1289 1290 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1291 1292 p = blk->rsmrcblk_blks[j]; 1293 if (p == RSMRC_RESERVED) { 1294 p = NULL; 1295 } 1296 1297 blk->rsmrcblk_blks[j] = NULL; 1298 blk->rsmrcblk_avail++; 1299 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1300 /* free this blk */ 1301 kmem_free(blk, sizeof (*blk)); 1302 rsm_resource.rsmrc_root[i] = NULL; 1303 } 1304 1305 rw_exit(&rsm_resource.rsmrc_lock); 1306 1307 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1308 "rsmresource_free done\n")); 1309 1310 return (p); 1311 } 1312 1313 static rsmresource_t * 1314 rsmresource_lookup(minor_t rnum, int lock) 1315 { 1316 int i, j; 1317 rsmresource_blk_t *blk; 1318 rsmresource_t *p; 1319 1320 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1321 "rsmresource_lookup enter\n")); 1322 1323 /* Find resource and lock it in READER mode */ 1324 /* search for available resource slot */ 1325 1326 i = (int)(rnum / RSMRC_BLKSZ); 1327 j = (int)(rnum % RSMRC_BLKSZ); 1328 1329 if (i >= rsm_resource.rsmrc_len) { 1330 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1331 "rsmresource_lookup done\n")); 1332 return (NULL); 1333 } 1334 1335 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1336 1337 blk = rsm_resource.rsmrc_root[i]; 1338 if (blk != NULL) { 1339 ASSERT(i < rsm_resource.rsmrc_len); 1340 ASSERT(i < rsm_resource.rsmrc_sz); 1341 1342 p = blk->rsmrcblk_blks[j]; 1343 if (lock == RSM_LOCK) { 1344 if (p != RSMRC_RESERVED) { 1345 mutex_enter(&p->rsmrc_lock); 1346 } else { 1347 p = NULL; 1348 } 1349 } 1350 } else { 1351 p = NULL; 1352 } 1353 rw_exit(&rsm_resource.rsmrc_lock); 1354 1355 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1356 "rsmresource_lookup done\n")); 1357 1358 return (p); 1359 } 1360 1361 static void 1362 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1363 { 1364 /* Find resource and lock it in READER mode */ 1365 /* Caller can upgrade if need be */ 1366 /* search for available resource slot */ 1367 int i, j; 1368 rsmresource_blk_t *blk; 1369 1370 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1371 "rsmresource_insert enter\n")); 1372 1373 i = (int)(rnum / RSMRC_BLKSZ); 1374 j = (int)(rnum % RSMRC_BLKSZ); 1375 1376 p->rsmrc_type = type; 1377 p->rsmrc_num = rnum; 1378 1379 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1380 1381 ASSERT(rsm_resource.rsmrc_root); 1382 ASSERT(i < rsm_resource.rsmrc_len); 1383 ASSERT(i < rsm_resource.rsmrc_sz); 1384 1385 blk = rsm_resource.rsmrc_root[i]; 1386 ASSERT(blk); 1387 1388 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1389 1390 blk->rsmrcblk_blks[j] = p; 1391 1392 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1393 "rsmresource_insert done\n")); 1394 1395 rw_exit(&rsm_resource.rsmrc_lock); 1396 } 1397 1398 static void 1399 rsmresource_destroy() 1400 { 1401 int i, j; 1402 1403 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1404 "rsmresource_destroy enter\n")); 1405 1406 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1407 1408 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1409 rsmresource_blk_t *blk; 1410 1411 blk = rsm_resource.rsmrc_root[i]; 1412 if (blk == NULL) { 1413 continue; 1414 } 1415 for (j = 0; j < RSMRC_BLKSZ; j++) { 1416 if (blk->rsmrcblk_blks[j] != NULL) { 1417 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1418 "Not null slot %d, %lx\n", j, 1419 (size_t)blk->rsmrcblk_blks[j])); 1420 } 1421 } 1422 kmem_free(blk, sizeof (*blk)); 1423 rsm_resource.rsmrc_root[i] = NULL; 1424 } 1425 if (rsm_resource.rsmrc_root) { 1426 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1427 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1428 rsm_resource.rsmrc_root = NULL; 1429 rsm_resource.rsmrc_len = 0; 1430 rsm_resource.rsmrc_sz = 0; 1431 } 1432 1433 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1434 "rsmresource_destroy done\n")); 1435 1436 rw_exit(&rsm_resource.rsmrc_lock); 1437 } 1438 1439 1440 /* ******************** Generic Key Hash Table Management ********* */ 1441 static rsmresource_t * 1442 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1443 rsm_resource_state_t state) 1444 { 1445 rsmresource_t *p; 1446 uint_t hashval; 1447 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1448 1449 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1450 1451 hashval = rsmhash(key); 1452 1453 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1454 key, hashval)); 1455 1456 rw_enter(&rhash->rsmhash_rw, RW_READER); 1457 1458 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1459 1460 for (; p; p = p->rsmrc_next) { 1461 if (p->rsmrc_key == key) { 1462 /* acquire resource lock */ 1463 RSMRC_LOCK(p); 1464 break; 1465 } 1466 } 1467 1468 rw_exit(&rhash->rsmhash_rw); 1469 1470 if (p != NULL && p->rsmrc_state != state) { 1471 /* state changed, release lock and return null */ 1472 RSMRC_UNLOCK(p); 1473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1474 "rsmhash_lookup done: state changed\n")); 1475 return (NULL); 1476 } 1477 1478 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1479 1480 return (p); 1481 } 1482 1483 static void 1484 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1485 { 1486 rsmresource_t *p, **back; 1487 uint_t hashval; 1488 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1489 1490 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1491 1492 hashval = rsmhash(rcelm->rsmrc_key); 1493 1494 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1495 rcelm->rsmrc_key, hashval)); 1496 1497 /* 1498 * It's ok not to find the segment. 1499 */ 1500 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1501 1502 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1503 1504 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1505 if (p == rcelm) { 1506 *back = rcelm->rsmrc_next; 1507 break; 1508 } 1509 } 1510 1511 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1512 1513 rw_exit(&rhash->rsmhash_rw); 1514 } 1515 1516 static int 1517 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1518 int dup_check, rsm_resource_state_t state) 1519 { 1520 rsmresource_t *p = NULL, **bktp; 1521 uint_t hashval; 1522 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1523 1524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1525 1526 /* lock table */ 1527 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1528 1529 /* 1530 * If the current resource state is other than the state passed in 1531 * then the resource is (probably) already on the list. eg. for an 1532 * import segment if the state is not RSM_STATE_NEW then it's on the 1533 * list already. 1534 */ 1535 RSMRC_LOCK(new); 1536 if (new->rsmrc_state != state) { 1537 RSMRC_UNLOCK(new); 1538 rw_exit(&rhash->rsmhash_rw); 1539 return (RSMERR_BAD_SEG_HNDL); 1540 } 1541 1542 hashval = rsmhash(key); 1543 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1544 1545 if (dup_check) { 1546 /* 1547 * Used for checking export segments; don't want to have 1548 * the same key used for multiple segments. 1549 */ 1550 1551 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1552 1553 for (; p; p = p->rsmrc_next) { 1554 if (p->rsmrc_key == key) { 1555 RSMRC_UNLOCK(new); 1556 break; 1557 } 1558 } 1559 } 1560 1561 if (p == NULL) { 1562 /* Key doesn't exist, add it */ 1563 1564 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1565 1566 new->rsmrc_key = key; 1567 new->rsmrc_next = *bktp; 1568 *bktp = new; 1569 } 1570 1571 rw_exit(&rhash->rsmhash_rw); 1572 1573 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1574 1575 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1576 } 1577 1578 /* 1579 * XOR each byte of the key. 1580 */ 1581 static uint_t 1582 rsmhash(rsm_memseg_id_t key) 1583 { 1584 uint_t hash = key; 1585 1586 hash ^= (key >> 8); 1587 hash ^= (key >> 16); 1588 hash ^= (key >> 24); 1589 1590 return (hash % rsm_hash_size); 1591 1592 } 1593 1594 /* 1595 * generic function to get a specific bucket 1596 */ 1597 static void * 1598 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1599 { 1600 1601 if (rhash->bucket == NULL) 1602 return (NULL); 1603 else 1604 return ((void *)rhash->bucket[hashval]); 1605 } 1606 1607 /* 1608 * generic function to get a specific bucket's address 1609 */ 1610 static void ** 1611 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1612 { 1613 if (rhash->bucket == NULL) 1614 return (NULL); 1615 else 1616 return ((void **)&(rhash->bucket[hashval])); 1617 } 1618 1619 /* 1620 * generic function to alloc a hash table 1621 */ 1622 static void 1623 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1624 { 1625 rhash->bucket = (rsmresource_t **) 1626 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1627 } 1628 1629 /* 1630 * generic function to free a hash table 1631 */ 1632 static void 1633 rsmhash_free(rsmhash_table_t *rhash, int size) 1634 { 1635 1636 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1637 rhash->bucket = NULL; 1638 1639 } 1640 /* *********************** Exported Segment Key Management ************ */ 1641 1642 #define rsmexport_add(new, key) \ 1643 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1644 RSM_STATE_BIND) 1645 1646 #define rsmexport_rm(arg) \ 1647 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1648 1649 #define rsmexport_lookup(key) \ 1650 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1651 1652 /* ************************** Import Segment List Management ********** */ 1653 1654 /* 1655 * Add segment to import list. This will be useful for paging and loopback 1656 * segment unloading. 1657 */ 1658 #define rsmimport_add(arg, key) \ 1659 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1660 RSM_STATE_NEW) 1661 1662 #define rsmimport_rm(arg) \ 1663 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1664 1665 /* 1666 * #define rsmimport_lookup(key) \ 1667 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1668 */ 1669 1670 /* 1671 * increase the ref count and make the import segment point to the 1672 * shared data structure. Return a pointer to the share data struct 1673 * and the shared data struct is locked upon return 1674 */ 1675 static rsm_import_share_t * 1676 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1677 rsmseg_t *segp) 1678 { 1679 uint_t hash; 1680 rsmresource_t *p; 1681 rsm_import_share_t *shdatap; 1682 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1683 1684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1685 1686 hash = rsmhash(key); 1687 /* lock table */ 1688 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1689 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1690 key, hash)); 1691 1692 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1693 1694 for (; p; p = p->rsmrc_next) { 1695 /* 1696 * Look for an entry that is importing the same exporter 1697 * with the share data structure allocated. 1698 */ 1699 if ((p->rsmrc_key == key) && 1700 (p->rsmrc_node == node) && 1701 (p->rsmrc_adapter == adapter) && 1702 (((rsmseg_t *)p)->s_share != NULL)) { 1703 shdatap = ((rsmseg_t *)p)->s_share; 1704 break; 1705 } 1706 } 1707 1708 if (p == NULL) { 1709 /* we are the first importer, create the shared data struct */ 1710 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1711 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1712 shdatap->rsmsi_segid = key; 1713 shdatap->rsmsi_node = node; 1714 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1715 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1716 } 1717 1718 rsmseglock_acquire(segp); 1719 1720 /* we grab the shared lock before returning from this function */ 1721 mutex_enter(&shdatap->rsmsi_lock); 1722 1723 shdatap->rsmsi_refcnt++; 1724 segp->s_share = shdatap; 1725 1726 rsmseglock_release(segp); 1727 1728 rw_exit(&rsm_import_segs.rsmhash_rw); 1729 1730 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1731 1732 return (shdatap); 1733 } 1734 1735 /* 1736 * the shared data structure should be locked before calling 1737 * rsmsharecv_signal(). 1738 * Change the state and signal any waiting segments. 1739 */ 1740 void 1741 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1742 { 1743 ASSERT(rsmsharelock_held(seg)); 1744 1745 if (seg->s_share->rsmsi_state == oldstate) { 1746 seg->s_share->rsmsi_state = newstate; 1747 cv_broadcast(&seg->s_share->rsmsi_cv); 1748 } 1749 } 1750 1751 /* 1752 * Add to the hash table 1753 */ 1754 static void 1755 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1756 void *cookie) 1757 { 1758 1759 importing_token_t *head; 1760 importing_token_t *new_token; 1761 int index; 1762 1763 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1764 1765 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1766 1767 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1768 new_token->importing_node = node; 1769 new_token->key = key; 1770 new_token->import_segment_cookie = cookie; 1771 new_token->importing_adapter_hwaddr = hwaddr; 1772 1773 index = rsmhash(key); 1774 1775 mutex_enter(&importer_list.lock); 1776 1777 head = importer_list.bucket[index]; 1778 importer_list.bucket[index] = new_token; 1779 new_token->next = head; 1780 mutex_exit(&importer_list.lock); 1781 1782 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1783 } 1784 1785 static void 1786 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1787 { 1788 1789 importing_token_t *prev, *token = NULL; 1790 int index; 1791 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1792 1793 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1794 1795 index = rsmhash(key); 1796 1797 mutex_enter(&importer_list.lock); 1798 1799 token = importer_list.bucket[index]; 1800 1801 prev = token; 1802 while (token != NULL) { 1803 if (token->importing_node == node && 1804 token->import_segment_cookie == cookie) { 1805 if (prev == token) 1806 importer_list.bucket[index] = token->next; 1807 else 1808 prev->next = token->next; 1809 kmem_free((void *)token, sizeof (*token)); 1810 break; 1811 } else { 1812 prev = token; 1813 token = token->next; 1814 } 1815 } 1816 1817 mutex_exit(&importer_list.lock); 1818 1819 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1820 1821 1822 } 1823 1824 /* **************************Segment Structure Management ************* */ 1825 1826 /* 1827 * Free segment structure 1828 */ 1829 static void 1830 rsmseg_free(rsmseg_t *seg) 1831 { 1832 1833 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1834 1835 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1836 1837 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1838 rsmseglock_acquire(seg); 1839 if (seg->s_ckl != NULL) { 1840 /* Segment is still busy */ 1841 seg->s_state = RSM_STATE_END; 1842 rsmseglock_release(seg); 1843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1844 "rsmseg_free done\n")); 1845 return; 1846 } 1847 1848 rsmseglock_release(seg); 1849 1850 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1851 1852 /* 1853 * If it's an importer decrement the refcount 1854 * and if its down to zero free the shared data structure. 1855 * This is where failures during rsm_connect() are unrefcounted 1856 */ 1857 if (seg->s_share != NULL) { 1858 1859 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1860 1861 rsmsharelock_acquire(seg); 1862 1863 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1864 1865 seg->s_share->rsmsi_refcnt--; 1866 1867 if (seg->s_share->rsmsi_refcnt == 0) { 1868 rsmsharelock_release(seg); 1869 mutex_destroy(&seg->s_share->rsmsi_lock); 1870 cv_destroy(&seg->s_share->rsmsi_cv); 1871 kmem_free((void *)(seg->s_share), 1872 sizeof (rsm_import_share_t)); 1873 } else { 1874 rsmsharelock_release(seg); 1875 } 1876 /* 1877 * The following needs to be done after any 1878 * rsmsharelock calls which use seg->s_share. 1879 */ 1880 seg->s_share = NULL; 1881 } 1882 1883 cv_destroy(&seg->s_cv); 1884 mutex_destroy(&seg->s_lock); 1885 rsmacl_free(seg->s_acl, seg->s_acl_len); 1886 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1887 if (seg->s_adapter) 1888 rsmka_release_adapter(seg->s_adapter); 1889 1890 kmem_free((void *)seg, sizeof (*seg)); 1891 1892 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1893 1894 } 1895 1896 1897 static rsmseg_t * 1898 rsmseg_alloc(minor_t num, struct cred *cred) 1899 { 1900 rsmseg_t *new; 1901 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1902 1903 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1904 /* 1905 * allocate memory for new segment. This should be a segkmem cache. 1906 */ 1907 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1908 1909 new->s_state = RSM_STATE_NEW; 1910 new->s_minor = num; 1911 new->s_acl_len = 0; 1912 new->s_cookie = NULL; 1913 new->s_adapter = NULL; 1914 1915 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1916 /* we don't have a key yet, will set at export/connect */ 1917 new->s_uid = crgetuid(cred); 1918 new->s_gid = crgetgid(cred); 1919 1920 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1921 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1922 1923 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1924 1925 return (new); 1926 } 1927 1928 /* ******************************** Driver Open/Close/Poll *************** */ 1929 1930 /*ARGSUSED1*/ 1931 static int 1932 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1933 { 1934 minor_t rnum; 1935 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1936 1937 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1938 /* 1939 * Char only 1940 */ 1941 if (otyp != OTYP_CHR) { 1942 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1943 return (EINVAL); 1944 } 1945 1946 /* 1947 * Only zero can be opened, clones are used for resources. 1948 */ 1949 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1950 DBG_PRINTF((category, RSM_ERR, 1951 "rsm_open: bad minor %d\n", getminor(*devp))); 1952 return (ENODEV); 1953 } 1954 1955 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1956 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1957 return (EPERM); 1958 } 1959 1960 if (!(flag & FWRITE)) { 1961 /* 1962 * The library function _rsm_librsm_init calls open for 1963 * /dev/rsm with flag set to O_RDONLY. We want a valid 1964 * file descriptor to be returned for minor device zero. 1965 */ 1966 1967 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1968 "rsm_open RDONLY done\n")); 1969 return (DDI_SUCCESS); 1970 } 1971 1972 /* 1973 * - allocate new minor number and segment. 1974 * - add segment to list of all segments. 1975 * - set minordev data to segment 1976 * - update devp argument to new device 1977 * - update s_cred to cred; make sure you do crhold(cred); 1978 */ 1979 1980 /* allocate a new resource number */ 1981 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1982 /* 1983 * We will bind this minor to a specific resource in first 1984 * ioctl 1985 */ 1986 *devp = makedevice(getmajor(*devp), rnum); 1987 } else { 1988 return (EAGAIN); 1989 } 1990 1991 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1992 return (DDI_SUCCESS); 1993 } 1994 1995 static void 1996 rsmseg_close(rsmseg_t *seg, int force_flag) 1997 { 1998 int e = RSM_SUCCESS; 1999 2000 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2001 2002 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2003 2004 rsmseglock_acquire(seg); 2005 if (!force_flag && (seg->s_hdr.rsmrc_type == 2006 RSM_RESOURCE_EXPORT_SEGMENT)) { 2007 /* 2008 * If we are processing rsm_close wait for force_destroy 2009 * processing to complete since force_destroy processing 2010 * needs to finish first before we can free the segment. 2011 * force_destroy is only for export segments 2012 */ 2013 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2014 cv_wait(&seg->s_cv, &seg->s_lock); 2015 } 2016 } 2017 rsmseglock_release(seg); 2018 2019 /* It's ok to read the state without a lock */ 2020 switch (seg->s_state) { 2021 case RSM_STATE_EXPORT: 2022 case RSM_STATE_EXPORT_QUIESCING: 2023 case RSM_STATE_EXPORT_QUIESCED: 2024 e = rsm_unpublish(seg, 1); 2025 /* FALLTHRU */ 2026 case RSM_STATE_BIND_QUIESCED: 2027 /* FALLTHRU */ 2028 case RSM_STATE_BIND: 2029 e = rsm_unbind(seg); 2030 if (e != RSM_SUCCESS && force_flag == 1) 2031 return; 2032 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2033 /* FALLTHRU */ 2034 case RSM_STATE_NEW_QUIESCED: 2035 rsmseglock_acquire(seg); 2036 seg->s_state = RSM_STATE_NEW; 2037 cv_broadcast(&seg->s_cv); 2038 rsmseglock_release(seg); 2039 break; 2040 case RSM_STATE_NEW: 2041 break; 2042 case RSM_STATE_ZOMBIE: 2043 /* 2044 * Segments in this state have been removed off the 2045 * exported segments list and have been unpublished 2046 * and unbind. These segments have been removed during 2047 * a callback to the rsm_export_force_destroy, which 2048 * is called for the purpose of unlocking these 2049 * exported memory segments when a process exits but 2050 * leaves the segments locked down since rsm_close is 2051 * is not called for the segments. This can happen 2052 * when a process calls fork or exec and then exits. 2053 * Once the segments are in the ZOMBIE state, all that 2054 * remains is to destroy them when rsm_close is called. 2055 * This is done here. Thus, for such segments the 2056 * the state is changed to new so that later in this 2057 * function rsmseg_free is called. 2058 */ 2059 rsmseglock_acquire(seg); 2060 seg->s_state = RSM_STATE_NEW; 2061 rsmseglock_release(seg); 2062 break; 2063 case RSM_STATE_MAP_QUIESCE: 2064 case RSM_STATE_ACTIVE: 2065 /* Disconnect will handle the unmap */ 2066 case RSM_STATE_CONN_QUIESCE: 2067 case RSM_STATE_CONNECT: 2068 case RSM_STATE_DISCONNECT: 2069 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2070 (void) rsm_disconnect(seg); 2071 break; 2072 case RSM_STATE_MAPPING: 2073 /*FALLTHRU*/ 2074 case RSM_STATE_END: 2075 DBG_PRINTF((category, RSM_ERR, 2076 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2077 break; 2078 default: 2079 DBG_PRINTF((category, RSM_ERR, 2080 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2081 break; 2082 } 2083 2084 /* 2085 * check state. 2086 * - make sure you do crfree(s_cred); 2087 * release segment and minor number 2088 */ 2089 ASSERT(seg->s_state == RSM_STATE_NEW); 2090 2091 /* 2092 * The export_force_destroy callback is created to unlock 2093 * the exported segments of a process 2094 * when the process does a fork or exec and then exits calls this 2095 * function with the force flag set to 1 which indicates that the 2096 * segment state must be converted to ZOMBIE. This state means that the 2097 * segments still exist and have been unlocked and most importantly the 2098 * only operation allowed is to destroy them on an rsm_close. 2099 */ 2100 if (force_flag) { 2101 rsmseglock_acquire(seg); 2102 seg->s_state = RSM_STATE_ZOMBIE; 2103 rsmseglock_release(seg); 2104 } else { 2105 rsmseg_free(seg); 2106 } 2107 2108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2109 } 2110 2111 static int 2112 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2113 { 2114 minor_t rnum = getminor(dev); 2115 rsmresource_t *res; 2116 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2117 2118 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2119 2120 flag = flag; cred = cred; 2121 2122 if (otyp != OTYP_CHR) 2123 return (EINVAL); 2124 2125 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2126 2127 /* 2128 * At this point we are the last reference to the resource. 2129 * Free resource number from resource table. 2130 * It's ok to remove number before we free the segment. 2131 * We need to lock the resource to protect against remote calls. 2132 */ 2133 if (rnum == RSM_DRIVER_MINOR || 2134 (res = rsmresource_free(rnum)) == NULL) { 2135 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2136 return (DDI_SUCCESS); 2137 } 2138 2139 switch (res->rsmrc_type) { 2140 case RSM_RESOURCE_EXPORT_SEGMENT: 2141 case RSM_RESOURCE_IMPORT_SEGMENT: 2142 rsmseg_close((rsmseg_t *)res, 0); 2143 break; 2144 case RSM_RESOURCE_BAR: 2145 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2146 break; 2147 default: 2148 break; 2149 } 2150 2151 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2152 2153 return (DDI_SUCCESS); 2154 } 2155 2156 /* 2157 * rsm_inc_pgcnt 2158 * 2159 * Description: increment rsm page counter. 2160 * 2161 * Parameters: pgcnt_t pnum; number of pages to be used 2162 * 2163 * Returns: RSM_SUCCESS if memory limit not exceeded 2164 * ENOSPC if memory limit exceeded. In this case, the 2165 * page counter remains unchanged. 2166 * 2167 */ 2168 static int 2169 rsm_inc_pgcnt(pgcnt_t pnum) 2170 { 2171 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2172 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2173 return (RSM_SUCCESS); 2174 } 2175 2176 mutex_enter(&rsm_pgcnt_lock); 2177 2178 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2179 /* ensure that limits have not been exceeded */ 2180 mutex_exit(&rsm_pgcnt_lock); 2181 return (RSMERR_INSUFFICIENT_MEM); 2182 } 2183 2184 rsm_pgcnt += pnum; 2185 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2186 rsm_pgcnt)); 2187 mutex_exit(&rsm_pgcnt_lock); 2188 2189 return (RSM_SUCCESS); 2190 } 2191 2192 /* 2193 * rsm_dec_pgcnt 2194 * 2195 * Description: decrement rsm page counter. 2196 * 2197 * Parameters: pgcnt_t pnum; number of pages freed 2198 * 2199 */ 2200 static void 2201 rsm_dec_pgcnt(pgcnt_t pnum) 2202 { 2203 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2204 2205 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2206 return; 2207 } 2208 2209 mutex_enter(&rsm_pgcnt_lock); 2210 ASSERT(rsm_pgcnt >= pnum); 2211 rsm_pgcnt -= pnum; 2212 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2213 rsm_pgcnt)); 2214 mutex_exit(&rsm_pgcnt_lock); 2215 } 2216 2217 static struct umem_callback_ops rsm_as_ops = { 2218 UMEM_CALLBACK_VERSION, /* version number */ 2219 rsm_export_force_destroy, 2220 }; 2221 2222 static int 2223 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2224 proc_t *procp) 2225 { 2226 int error = RSM_SUCCESS; 2227 ulong_t pnum; 2228 struct umem_callback_ops *callbackops = &rsm_as_ops; 2229 2230 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2231 2232 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2233 2234 /* 2235 * Make sure vaddr and len are aligned on a page boundary 2236 */ 2237 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2238 return (RSMERR_BAD_ADDR); 2239 } 2240 2241 if (len & (PAGESIZE - 1)) { 2242 return (RSMERR_BAD_LENGTH); 2243 } 2244 2245 /* 2246 * Find number of pages 2247 */ 2248 pnum = btopr(len); 2249 error = rsm_inc_pgcnt(pnum); 2250 if (error != RSM_SUCCESS) { 2251 DBG_PRINTF((category, RSM_ERR, 2252 "rsm_bind_pages:mem limit exceeded\n")); 2253 return (RSMERR_INSUFFICIENT_MEM); 2254 } 2255 2256 error = umem_lockmemory(vaddr, len, 2257 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2258 cookie, 2259 callbackops, procp); 2260 2261 if (error) { 2262 rsm_dec_pgcnt(pnum); 2263 DBG_PRINTF((category, RSM_ERR, 2264 "rsm_bind_pages:ddi_umem_lock failed\n")); 2265 /* 2266 * ddi_umem_lock, in the case of failure, returns one of 2267 * the following three errors. These are translated into 2268 * the RSMERR namespace and returned. 2269 */ 2270 if (error == EFAULT) 2271 return (RSMERR_BAD_ADDR); 2272 else if (error == EACCES) 2273 return (RSMERR_PERM_DENIED); 2274 else 2275 return (RSMERR_INSUFFICIENT_MEM); 2276 } 2277 2278 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2279 2280 return (error); 2281 2282 } 2283 2284 static int 2285 rsm_unbind_pages(rsmseg_t *seg) 2286 { 2287 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2288 2289 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2290 2291 ASSERT(rsmseglock_held(seg)); 2292 2293 if (seg->s_cookie != NULL) { 2294 /* unlock address range */ 2295 ddi_umem_unlock(seg->s_cookie); 2296 rsm_dec_pgcnt(btopr(seg->s_len)); 2297 seg->s_cookie = NULL; 2298 } 2299 2300 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2301 2302 return (RSM_SUCCESS); 2303 } 2304 2305 2306 static int 2307 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2308 { 2309 int e; 2310 adapter_t *adapter; 2311 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2312 2313 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2314 2315 adapter = rsm_getadapter(msg, mode); 2316 if (adapter == NULL) { 2317 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2318 "rsm_bind done:no adapter\n")); 2319 return (RSMERR_CTLR_NOT_PRESENT); 2320 } 2321 2322 /* lock address range */ 2323 if (msg->vaddr == NULL) { 2324 rsmka_release_adapter(adapter); 2325 DBG_PRINTF((category, RSM_ERR, 2326 "rsm: rsm_bind done: invalid vaddr\n")); 2327 return (RSMERR_BAD_ADDR); 2328 } 2329 if (msg->len <= 0) { 2330 rsmka_release_adapter(adapter); 2331 DBG_PRINTF((category, RSM_ERR, 2332 "rsm_bind: invalid length\n")); 2333 return (RSMERR_BAD_LENGTH); 2334 } 2335 2336 /* Lock segment */ 2337 rsmseglock_acquire(seg); 2338 2339 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2340 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2341 DBG_PRINTF((category, RSM_DEBUG, 2342 "rsm_bind done: cv_wait INTERRUPTED")); 2343 rsmka_release_adapter(adapter); 2344 rsmseglock_release(seg); 2345 return (RSMERR_INTERRUPTED); 2346 } 2347 } 2348 2349 ASSERT(seg->s_state == RSM_STATE_NEW); 2350 2351 ASSERT(seg->s_cookie == NULL); 2352 2353 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2354 if (e == RSM_SUCCESS) { 2355 seg->s_flags |= RSM_USER_MEMORY; 2356 if (msg->perm & RSM_ALLOW_REBIND) { 2357 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2358 } 2359 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2360 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2361 } 2362 seg->s_region.r_vaddr = msg->vaddr; 2363 /* 2364 * Set the s_pid value in the segment structure. This is used 2365 * to identify exported segments belonging to a particular 2366 * process so that when the process exits, these segments can 2367 * be unlocked forcefully even if rsm_close is not called on 2368 * process exit since there maybe other processes referencing 2369 * them (for example on a fork or exec). 2370 * The s_pid value is also used to authenticate the process 2371 * doing a publish or unpublish on the export segment. Only 2372 * the creator of the export segment has a right to do a 2373 * publish or unpublish and unbind on the segment. 2374 */ 2375 seg->s_pid = ddi_get_pid(); 2376 seg->s_len = msg->len; 2377 seg->s_state = RSM_STATE_BIND; 2378 seg->s_adapter = adapter; 2379 seg->s_proc = curproc; 2380 } else { 2381 rsmka_release_adapter(adapter); 2382 DBG_PRINTF((category, RSM_WARNING, 2383 "unable to lock down pages\n")); 2384 } 2385 2386 msg->rnum = seg->s_minor; 2387 /* Unlock segment */ 2388 rsmseglock_release(seg); 2389 2390 if (e == RSM_SUCCESS) { 2391 /* copyout the resource number */ 2392 #ifdef _MULTI_DATAMODEL 2393 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2394 rsm_ioctlmsg32_t msg32; 2395 2396 msg32.rnum = msg->rnum; 2397 if (ddi_copyout((caddr_t)&msg32.rnum, 2398 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2399 sizeof (minor_t), mode)) { 2400 rsmka_release_adapter(adapter); 2401 e = RSMERR_BAD_ADDR; 2402 } 2403 } 2404 #endif 2405 if (ddi_copyout((caddr_t)&msg->rnum, 2406 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2407 sizeof (minor_t), mode)) { 2408 rsmka_release_adapter(adapter); 2409 e = RSMERR_BAD_ADDR; 2410 } 2411 } 2412 2413 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2414 2415 return (e); 2416 } 2417 2418 static void 2419 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2420 rsm_memseg_id_t ex_segid, 2421 ddi_umem_cookie_t cookie) 2422 2423 { 2424 rsmresource_t *p = NULL; 2425 rsmhash_table_t *rhash = &rsm_import_segs; 2426 uint_t index; 2427 2428 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2429 "rsm_remap_local_importers enter\n")); 2430 2431 index = rsmhash(ex_segid); 2432 2433 rw_enter(&rhash->rsmhash_rw, RW_READER); 2434 2435 p = rsmhash_getbkt(rhash, index); 2436 2437 for (; p; p = p->rsmrc_next) { 2438 rsmseg_t *seg = (rsmseg_t *)p; 2439 rsmseglock_acquire(seg); 2440 /* 2441 * Change the s_cookie value of only the local importers 2442 * which have been mapped (in state RSM_STATE_ACTIVE). 2443 * Note that there is no need to change the s_cookie value 2444 * if the imported segment is in RSM_STATE_MAPPING since 2445 * eventually the s_cookie will be updated via the mapping 2446 * functionality. 2447 */ 2448 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2449 (seg->s_state == RSM_STATE_ACTIVE)) { 2450 seg->s_cookie = cookie; 2451 } 2452 rsmseglock_release(seg); 2453 } 2454 rw_exit(&rhash->rsmhash_rw); 2455 2456 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2457 "rsm_remap_local_importers done\n")); 2458 } 2459 2460 static int 2461 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2462 { 2463 int e; 2464 adapter_t *adapter; 2465 ddi_umem_cookie_t cookie; 2466 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2467 2468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2469 2470 /* Check for permissions to rebind */ 2471 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2472 return (RSMERR_REBIND_NOT_ALLOWED); 2473 } 2474 2475 if (seg->s_pid != ddi_get_pid() && 2476 ddi_get_pid() != 0) { 2477 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2478 return (RSMERR_NOT_CREATOR); 2479 } 2480 2481 /* 2482 * We will not be allowing partial rebind and hence length passed 2483 * in must be same as segment length 2484 */ 2485 if (msg->vaddr == NULL) { 2486 DBG_PRINTF((category, RSM_ERR, 2487 "rsm_rebind done: null msg->vaddr\n")); 2488 return (RSMERR_BAD_ADDR); 2489 } 2490 if (msg->len != seg->s_len) { 2491 DBG_PRINTF((category, RSM_ERR, 2492 "rsm_rebind: invalid length\n")); 2493 return (RSMERR_BAD_LENGTH); 2494 } 2495 2496 /* Lock segment */ 2497 rsmseglock_acquire(seg); 2498 2499 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2500 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2501 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2502 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2503 rsmseglock_release(seg); 2504 DBG_PRINTF((category, RSM_DEBUG, 2505 "rsm_rebind done: cv_wait INTERRUPTED")); 2506 return (RSMERR_INTERRUPTED); 2507 } 2508 } 2509 2510 /* verify segment state */ 2511 if ((seg->s_state != RSM_STATE_BIND) && 2512 (seg->s_state != RSM_STATE_EXPORT)) { 2513 /* Unlock segment */ 2514 rsmseglock_release(seg); 2515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2516 "rsm_rebind done: invalid state\n")); 2517 return (RSMERR_BAD_SEG_HNDL); 2518 } 2519 2520 ASSERT(seg->s_cookie != NULL); 2521 2522 if (msg->vaddr == seg->s_region.r_vaddr) { 2523 rsmseglock_release(seg); 2524 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2525 return (RSM_SUCCESS); 2526 } 2527 2528 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2529 if (e == RSM_SUCCESS) { 2530 struct buf *xbuf; 2531 dev_t sdev = 0; 2532 rsm_memory_local_t mem; 2533 2534 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2535 sdev, 0, NULL, DDI_UMEM_SLEEP); 2536 ASSERT(xbuf != NULL); 2537 2538 mem.ms_type = RSM_MEM_BUF; 2539 mem.ms_bp = xbuf; 2540 2541 adapter = seg->s_adapter; 2542 e = adapter->rsmpi_ops->rsm_rebind( 2543 seg->s_handle.out, 0, &mem, 2544 RSM_RESOURCE_DONTWAIT, NULL); 2545 2546 if (e == RSM_SUCCESS) { 2547 /* 2548 * unbind the older pages, and unload local importers; 2549 * but don't disconnect importers 2550 */ 2551 (void) rsm_unbind_pages(seg); 2552 seg->s_cookie = cookie; 2553 seg->s_region.r_vaddr = msg->vaddr; 2554 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2555 cookie); 2556 } else { 2557 /* 2558 * Unbind the pages associated with "cookie" by the 2559 * rsm_bind_pages calls prior to this. This is 2560 * similar to what is done in the rsm_unbind_pages 2561 * routine for the seg->s_cookie. 2562 */ 2563 ddi_umem_unlock(cookie); 2564 rsm_dec_pgcnt(btopr(msg->len)); 2565 DBG_PRINTF((category, RSM_ERR, 2566 "rsm_rebind failed with %d\n", e)); 2567 } 2568 /* 2569 * At present there is no dependency on the existence of xbuf. 2570 * So we can free it here. If in the future this changes, it can 2571 * be freed sometime during the segment destroy. 2572 */ 2573 freerbuf(xbuf); 2574 } 2575 2576 /* Unlock segment */ 2577 rsmseglock_release(seg); 2578 2579 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2580 2581 return (e); 2582 } 2583 2584 static int 2585 rsm_unbind(rsmseg_t *seg) 2586 { 2587 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2588 2589 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2590 2591 rsmseglock_acquire(seg); 2592 2593 /* verify segment state */ 2594 if ((seg->s_state != RSM_STATE_BIND) && 2595 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2596 rsmseglock_release(seg); 2597 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2598 "rsm_unbind: invalid state\n")); 2599 return (RSMERR_BAD_SEG_HNDL); 2600 } 2601 2602 /* unlock current range */ 2603 (void) rsm_unbind_pages(seg); 2604 2605 if (seg->s_state == RSM_STATE_BIND) { 2606 seg->s_state = RSM_STATE_NEW; 2607 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2608 seg->s_state = RSM_STATE_NEW_QUIESCED; 2609 } 2610 2611 rsmseglock_release(seg); 2612 2613 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2614 2615 return (RSM_SUCCESS); 2616 } 2617 2618 /* **************************** Exporter Access List Management ******* */ 2619 static void 2620 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2621 { 2622 int acl_sz; 2623 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2624 2625 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2626 2627 /* acl could be NULL */ 2628 2629 if (acl != NULL && acl_len > 0) { 2630 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2631 kmem_free((void *)acl, acl_sz); 2632 } 2633 2634 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2635 } 2636 2637 static void 2638 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2639 { 2640 int acl_sz; 2641 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2642 2643 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2644 2645 if (acl != NULL && acl_len > 0) { 2646 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2647 kmem_free((void *)acl, acl_sz); 2648 } 2649 2650 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2651 2652 } 2653 2654 static int 2655 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2656 rsmapi_access_entry_t **list, int *len, int loopback) 2657 { 2658 rsmapi_access_entry_t *acl; 2659 int acl_len; 2660 int i; 2661 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2662 2663 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2664 2665 *len = 0; 2666 *list = NULL; 2667 2668 acl_len = msg->acl_len; 2669 if ((loopback && acl_len > 1) || (acl_len < 0) || 2670 (acl_len > MAX_NODES)) { 2671 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2672 "rsmacl_build done: acl invalid\n")); 2673 return (RSMERR_BAD_ACL); 2674 } 2675 2676 if (acl_len > 0 && acl_len <= MAX_NODES) { 2677 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2678 2679 acl = kmem_alloc(acl_size, KM_SLEEP); 2680 2681 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2682 acl_size, mode)) { 2683 kmem_free((void *) acl, acl_size); 2684 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2685 "rsmacl_build done: BAD_ADDR\n")); 2686 return (RSMERR_BAD_ADDR); 2687 } 2688 2689 /* 2690 * Verify access list 2691 */ 2692 for (i = 0; i < acl_len; i++) { 2693 if (acl[i].ae_node > MAX_NODES || 2694 (loopback && (acl[i].ae_node != my_nodeid)) || 2695 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2696 /* invalid entry */ 2697 kmem_free((void *) acl, acl_size); 2698 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2699 "rsmacl_build done: EINVAL\n")); 2700 return (RSMERR_BAD_ACL); 2701 } 2702 } 2703 2704 *len = acl_len; 2705 *list = acl; 2706 } 2707 2708 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2709 2710 return (DDI_SUCCESS); 2711 } 2712 2713 static int 2714 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2715 int acl_len, adapter_t *adapter) 2716 { 2717 rsm_access_entry_t *acl; 2718 rsm_addr_t hwaddr; 2719 int i; 2720 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2721 2722 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2723 2724 if (src != NULL) { 2725 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2726 acl = kmem_alloc(acl_size, KM_SLEEP); 2727 2728 /* 2729 * translate access list 2730 */ 2731 for (i = 0; i < acl_len; i++) { 2732 if (src[i].ae_node == my_nodeid) { 2733 acl[i].ae_addr = adapter->hwaddr; 2734 } else { 2735 hwaddr = get_remote_hwaddr(adapter, 2736 src[i].ae_node); 2737 if ((int64_t)hwaddr < 0) { 2738 /* invalid hwaddr */ 2739 kmem_free((void *) acl, acl_size); 2740 DBG_PRINTF((category, 2741 RSM_DEBUG_VERBOSE, 2742 "rsmpiacl_create done:" 2743 "EINVAL hwaddr\n")); 2744 return (RSMERR_INTERNAL_ERROR); 2745 } 2746 acl[i].ae_addr = hwaddr; 2747 } 2748 /* rsmpi understands only RSM_PERM_XXXX */ 2749 acl[i].ae_permission = 2750 src[i].ae_permission & RSM_PERM_RDWR; 2751 } 2752 *dest = acl; 2753 } else { 2754 *dest = NULL; 2755 } 2756 2757 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2758 2759 return (RSM_SUCCESS); 2760 } 2761 2762 static int 2763 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2764 rsmipc_reply_t *reply) 2765 { 2766 2767 int i; 2768 rsmseg_t *seg; 2769 rsm_memseg_id_t key = req->rsmipc_key; 2770 rsm_permission_t perm = req->rsmipc_perm; 2771 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2772 2773 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2774 "rsmsegacl_validate enter\n")); 2775 2776 /* 2777 * Find segment and grab its lock. The reason why we grab the segment 2778 * lock in side the search is to avoid the race when the segment is 2779 * being deleted and we already have a pointer to it. 2780 */ 2781 seg = rsmexport_lookup(key); 2782 if (!seg) { 2783 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2784 "rsmsegacl_validate done: %u ENXIO\n", key)); 2785 return (RSMERR_SEG_NOT_PUBLISHED); 2786 } 2787 2788 ASSERT(rsmseglock_held(seg)); 2789 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2790 2791 /* 2792 * We implement a 2-level protection scheme. 2793 * First, we check if local/remote host has access rights. 2794 * Second, we check if the user has access rights. 2795 * 2796 * This routine only validates the rnode access_list 2797 */ 2798 if (seg->s_acl_len > 0) { 2799 /* 2800 * Check host access list 2801 */ 2802 ASSERT(seg->s_acl != NULL); 2803 for (i = 0; i < seg->s_acl_len; i++) { 2804 if (seg->s_acl[i].ae_node == rnode) { 2805 perm &= seg->s_acl[i].ae_permission; 2806 goto found; 2807 } 2808 } 2809 /* rnode is not found in the list */ 2810 rsmseglock_release(seg); 2811 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2812 "rsmsegacl_validate done: EPERM\n")); 2813 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2814 } else { 2815 /* use default owner creation umask */ 2816 perm &= seg->s_mode; 2817 } 2818 2819 found: 2820 /* update perm for this node */ 2821 reply->rsmipc_mode = perm; 2822 reply->rsmipc_uid = seg->s_uid; 2823 reply->rsmipc_gid = seg->s_gid; 2824 reply->rsmipc_segid = seg->s_segid; 2825 reply->rsmipc_seglen = seg->s_len; 2826 2827 /* 2828 * Perm of requesting node is valid; source will validate user 2829 */ 2830 rsmseglock_release(seg); 2831 2832 /* 2833 * Add the importer to the list right away, if connect fails 2834 * the importer will ask the exporter to remove it. 2835 */ 2836 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2837 req->rsmipc_segment_cookie); 2838 2839 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2840 2841 return (RSM_SUCCESS); 2842 } 2843 2844 2845 /* ************************** Exporter Calls ************************* */ 2846 2847 static int 2848 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2849 { 2850 int e; 2851 int acl_len; 2852 rsmapi_access_entry_t *acl; 2853 rsm_access_entry_t *rsmpi_acl; 2854 rsm_memory_local_t mem; 2855 struct buf *xbuf; 2856 dev_t sdev = 0; 2857 adapter_t *adapter; 2858 rsm_memseg_id_t segment_id = 0; 2859 int loopback_flag = 0; 2860 int create_flags = 0; 2861 rsm_resource_callback_t callback_flag; 2862 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2863 2864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2865 2866 if (seg->s_adapter == &loopback_adapter) 2867 loopback_flag = 1; 2868 2869 if (seg->s_pid != ddi_get_pid() && 2870 ddi_get_pid() != 0) { 2871 DBG_PRINTF((category, RSM_ERR, 2872 "rsm_publish: Not creator\n")); 2873 return (RSMERR_NOT_CREATOR); 2874 } 2875 2876 /* 2877 * Get per node access list 2878 */ 2879 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2880 if (e != DDI_SUCCESS) { 2881 DBG_PRINTF((category, RSM_ERR, 2882 "rsm_publish done: rsmacl_build failed\n")); 2883 return (e); 2884 } 2885 2886 /* 2887 * The application provided msg->key is used for resolving a 2888 * segment id according to the following: 2889 * key = 0 Kernel Agent selects the segment id 2890 * key <= RSM_DLPI_ID_END Reserved for system usage except 2891 * RSMLIB range 2892 * key < RSM_USER_APP_ID_BASE segment id = key 2893 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2894 * 2895 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2896 * overflows to zero after 0x80000000 allocations. 2897 * An algorithm is needed which allows reinitialization and provides 2898 * for reallocation after overflow. For now, ENOMEM is returned 2899 * once the overflow condition has occurred. 2900 */ 2901 if (msg->key == 0) { 2902 mutex_enter(&rsm_lock); 2903 segment_id = rsm_nextavail_segmentid; 2904 if (segment_id != 0) { 2905 rsm_nextavail_segmentid++; 2906 mutex_exit(&rsm_lock); 2907 } else { 2908 mutex_exit(&rsm_lock); 2909 DBG_PRINTF((category, RSM_ERR, 2910 "rsm_publish done: no more keys avlbl\n")); 2911 return (RSMERR_INSUFFICIENT_RESOURCES); 2912 } 2913 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2914 /* range reserved for internal use by base/ndi libraries */ 2915 segment_id = msg->key; 2916 else if (msg->key <= RSM_DLPI_ID_END) 2917 return (RSMERR_RESERVED_SEGID); 2918 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2919 segment_id = msg->key; 2920 else { 2921 DBG_PRINTF((category, RSM_ERR, 2922 "rsm_publish done: invalid key %u\n", msg->key)); 2923 return (RSMERR_RESERVED_SEGID); 2924 } 2925 2926 /* Add key to exportlist; The segment lock is held on success */ 2927 e = rsmexport_add(seg, segment_id); 2928 if (e) { 2929 rsmacl_free(acl, acl_len); 2930 DBG_PRINTF((category, RSM_ERR, 2931 "rsm_publish done: export_add failed: %d\n", e)); 2932 return (e); 2933 } 2934 2935 seg->s_segid = segment_id; 2936 2937 if ((seg->s_state != RSM_STATE_BIND) && 2938 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2939 /* state changed since then, free acl and return */ 2940 rsmseglock_release(seg); 2941 rsmexport_rm(seg); 2942 rsmacl_free(acl, acl_len); 2943 DBG_PRINTF((category, RSM_ERR, 2944 "rsm_publish done: segment in wrong state: %d\n", 2945 seg->s_state)); 2946 return (RSMERR_BAD_SEG_HNDL); 2947 } 2948 2949 /* 2950 * If this is for a local memory handle and permissions are zero, 2951 * then the surrogate segment is very large and we want to skip 2952 * allocation of DVMA space. 2953 * 2954 * Careful! If the user didn't use an ACL list, acl will be a NULL 2955 * pointer. Check that before dereferencing it. 2956 */ 2957 if (acl != (rsmapi_access_entry_t *)NULL) { 2958 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2959 goto skipdriver; 2960 } 2961 2962 /* create segment */ 2963 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2964 sdev, 0, NULL, DDI_UMEM_SLEEP); 2965 ASSERT(xbuf != NULL); 2966 2967 mem.ms_type = RSM_MEM_BUF; 2968 mem.ms_bp = xbuf; 2969 2970 /* This call includes a bind operations */ 2971 2972 adapter = seg->s_adapter; 2973 /* 2974 * create a acl list with hwaddr for RSMPI publish 2975 */ 2976 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2977 2978 if (e != RSM_SUCCESS) { 2979 rsmseglock_release(seg); 2980 rsmexport_rm(seg); 2981 rsmacl_free(acl, acl_len); 2982 freerbuf(xbuf); 2983 DBG_PRINTF((category, RSM_ERR, 2984 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2985 return (e); 2986 } 2987 2988 if (seg->s_state == RSM_STATE_BIND) { 2989 /* create segment */ 2990 2991 /* This call includes a bind operations */ 2992 2993 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2994 create_flags = RSM_ALLOW_UNBIND_REBIND; 2995 } 2996 2997 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 2998 callback_flag = RSM_RESOURCE_DONTWAIT; 2999 } else { 3000 callback_flag = RSM_RESOURCE_SLEEP; 3001 } 3002 3003 e = adapter->rsmpi_ops->rsm_seg_create( 3004 adapter->rsmpi_handle, 3005 &seg->s_handle.out, seg->s_len, 3006 create_flags, &mem, 3007 callback_flag, NULL); 3008 /* 3009 * At present there is no dependency on the existence of xbuf. 3010 * So we can free it here. If in the future this changes, it can 3011 * be freed sometime during the segment destroy. 3012 */ 3013 freerbuf(xbuf); 3014 3015 if (e != RSM_SUCCESS) { 3016 rsmseglock_release(seg); 3017 rsmexport_rm(seg); 3018 rsmacl_free(acl, acl_len); 3019 rsmpiacl_free(rsmpi_acl, acl_len); 3020 DBG_PRINTF((category, RSM_ERR, 3021 "rsm_publish done: export_create failed: %d\n", e)); 3022 /* 3023 * The following assertion ensures that the two errors 3024 * related to the length and its alignment do not occur 3025 * since they have been checked during export_create 3026 */ 3027 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3028 e != RSMERR_BAD_LENGTH); 3029 if (e == RSMERR_NOT_MEM) 3030 e = RSMERR_INSUFFICIENT_MEM; 3031 3032 return (e); 3033 } 3034 /* export segment, this should create an IMMU mapping */ 3035 e = adapter->rsmpi_ops->rsm_publish( 3036 seg->s_handle.out, 3037 rsmpi_acl, acl_len, 3038 seg->s_segid, 3039 RSM_RESOURCE_DONTWAIT, NULL); 3040 3041 if (e != RSM_SUCCESS) { 3042 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3043 rsmseglock_release(seg); 3044 rsmexport_rm(seg); 3045 rsmacl_free(acl, acl_len); 3046 rsmpiacl_free(rsmpi_acl, acl_len); 3047 DBG_PRINTF((category, RSM_ERR, 3048 "rsm_publish done: export_publish failed: %d\n", 3049 e)); 3050 return (e); 3051 } 3052 } 3053 3054 seg->s_acl_in = rsmpi_acl; 3055 3056 skipdriver: 3057 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3058 seg->s_acl_len = acl_len; 3059 seg->s_acl = acl; 3060 3061 if (seg->s_state == RSM_STATE_BIND) { 3062 seg->s_state = RSM_STATE_EXPORT; 3063 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3064 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3065 cv_broadcast(&seg->s_cv); 3066 } 3067 3068 rsmseglock_release(seg); 3069 3070 /* 3071 * If the segment id was solicited, then return it in 3072 * the original incoming message. 3073 */ 3074 if (msg->key == 0) { 3075 msg->key = segment_id; 3076 #ifdef _MULTI_DATAMODEL 3077 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3078 rsm_ioctlmsg32_t msg32; 3079 3080 msg32.key = msg->key; 3081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3082 "rsm_publish done\n")); 3083 return (ddi_copyout((caddr_t)&msg32, 3084 (caddr_t)dataptr, sizeof (msg32), mode)); 3085 } 3086 #endif 3087 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3088 "rsm_publish done\n")); 3089 return (ddi_copyout((caddr_t)msg, 3090 (caddr_t)dataptr, sizeof (*msg), mode)); 3091 } 3092 3093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3094 return (DDI_SUCCESS); 3095 } 3096 3097 /* 3098 * This function modifies the access control list of an already published 3099 * segment. There is no effect on import segments which are already 3100 * connected. 3101 */ 3102 static int 3103 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3104 { 3105 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3106 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3107 int new_acl_len, old_acl_len, tmp_acl_len; 3108 int e, i; 3109 adapter_t *adapter; 3110 int loopback_flag = 0; 3111 rsm_memseg_id_t key; 3112 rsm_permission_t permission; 3113 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3114 3115 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3116 3117 if ((seg->s_state != RSM_STATE_EXPORT) && 3118 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3119 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3120 return (RSMERR_SEG_NOT_PUBLISHED); 3121 3122 if (seg->s_pid != ddi_get_pid() && 3123 ddi_get_pid() != 0) { 3124 DBG_PRINTF((category, RSM_ERR, 3125 "rsm_republish: Not owner\n")); 3126 return (RSMERR_NOT_CREATOR); 3127 } 3128 3129 if (seg->s_adapter == &loopback_adapter) 3130 loopback_flag = 1; 3131 3132 /* 3133 * Build new list first 3134 */ 3135 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3136 if (e) { 3137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3138 "rsm_republish done: rsmacl_build failed %d", e)); 3139 return (e); 3140 } 3141 3142 /* Lock segment */ 3143 rsmseglock_acquire(seg); 3144 /* 3145 * a republish is in progress - REPUBLISH message is being 3146 * sent to the importers so wait for it to complete OR 3147 * wait till DR completes 3148 */ 3149 while (((seg->s_state == RSM_STATE_EXPORT) && 3150 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3151 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3152 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3153 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3154 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3155 "rsm_republish done: cv_wait INTERRUPTED")); 3156 rsmseglock_release(seg); 3157 rsmacl_free(new_acl, new_acl_len); 3158 return (RSMERR_INTERRUPTED); 3159 } 3160 } 3161 3162 /* recheck if state is valid */ 3163 if (seg->s_state != RSM_STATE_EXPORT) { 3164 rsmseglock_release(seg); 3165 rsmacl_free(new_acl, new_acl_len); 3166 return (RSMERR_SEG_NOT_PUBLISHED); 3167 } 3168 3169 key = seg->s_key; 3170 old_acl = seg->s_acl; 3171 old_acl_len = seg->s_acl_len; 3172 3173 seg->s_acl = new_acl; 3174 seg->s_acl_len = new_acl_len; 3175 3176 /* 3177 * This call will only be meaningful if and when the interconnect 3178 * layer makes use of the access list 3179 */ 3180 adapter = seg->s_adapter; 3181 /* 3182 * create a acl list with hwaddr for RSMPI publish 3183 */ 3184 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3185 3186 if (e != RSM_SUCCESS) { 3187 seg->s_acl = old_acl; 3188 seg->s_acl_len = old_acl_len; 3189 rsmseglock_release(seg); 3190 rsmacl_free(new_acl, new_acl_len); 3191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3192 "rsm_republish done: rsmpiacl_create failed %d", e)); 3193 return (e); 3194 } 3195 rsmpi_old_acl = seg->s_acl_in; 3196 seg->s_acl_in = rsmpi_new_acl; 3197 3198 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3199 seg->s_acl_in, seg->s_acl_len, 3200 RSM_RESOURCE_DONTWAIT, NULL); 3201 3202 if (e != RSM_SUCCESS) { 3203 seg->s_acl = old_acl; 3204 seg->s_acl_in = rsmpi_old_acl; 3205 seg->s_acl_len = old_acl_len; 3206 rsmseglock_release(seg); 3207 rsmacl_free(new_acl, new_acl_len); 3208 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3209 3210 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3211 "rsm_republish done: rsmpi republish failed %d\n", e)); 3212 return (e); 3213 } 3214 3215 /* create a tmp copy of the new acl */ 3216 tmp_acl_len = new_acl_len; 3217 if (tmp_acl_len > 0) { 3218 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3219 for (i = 0; i < tmp_acl_len; i++) { 3220 tmp_acl[i].ae_node = new_acl[i].ae_node; 3221 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3222 } 3223 /* 3224 * The default permission of a node which was in the old 3225 * ACL but not in the new ACL is 0 ie no access. 3226 */ 3227 permission = 0; 3228 } else { 3229 /* 3230 * NULL acl means all importers can connect and 3231 * default permission will be owner creation umask 3232 */ 3233 tmp_acl = NULL; 3234 permission = seg->s_mode; 3235 } 3236 3237 /* make other republishers to wait for republish to complete */ 3238 seg->s_flags |= RSM_REPUBLISH_WAIT; 3239 3240 rsmseglock_release(seg); 3241 3242 /* send the new perms to the importing nodes */ 3243 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3244 3245 rsmseglock_acquire(seg); 3246 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3247 /* wake up any one waiting for republish to complete */ 3248 cv_broadcast(&seg->s_cv); 3249 rsmseglock_release(seg); 3250 3251 rsmacl_free(tmp_acl, tmp_acl_len); 3252 rsmacl_free(old_acl, old_acl_len); 3253 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3254 3255 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3256 return (DDI_SUCCESS); 3257 } 3258 3259 static int 3260 rsm_unpublish(rsmseg_t *seg, int mode) 3261 { 3262 rsmapi_access_entry_t *acl; 3263 rsm_access_entry_t *rsmpi_acl; 3264 int acl_len; 3265 int e; 3266 clock_t ticks; 3267 adapter_t *adapter; 3268 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3269 3270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3271 3272 if (seg->s_pid != ddi_get_pid() && 3273 ddi_get_pid() != 0) { 3274 DBG_PRINTF((category, RSM_ERR, 3275 "rsm_unpublish: Not creator\n")); 3276 return (RSMERR_NOT_CREATOR); 3277 } 3278 3279 rsmseglock_acquire(seg); 3280 /* 3281 * wait for QUIESCING to complete here before rsmexport_rm 3282 * is called because the SUSPEND_COMPLETE mesg which changes 3283 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3284 * signals the cv_wait needs to find it in the hashtable. 3285 */ 3286 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3287 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3288 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3289 rsmseglock_release(seg); 3290 DBG_PRINTF((category, RSM_ERR, 3291 "rsm_unpublish done: cv_wait INTR qscing" 3292 "getv/putv in progress")); 3293 return (RSMERR_INTERRUPTED); 3294 } 3295 } 3296 3297 /* verify segment state */ 3298 if ((seg->s_state != RSM_STATE_EXPORT) && 3299 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3300 rsmseglock_release(seg); 3301 DBG_PRINTF((category, RSM_ERR, 3302 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3303 return (RSMERR_SEG_NOT_PUBLISHED); 3304 } 3305 3306 rsmseglock_release(seg); 3307 3308 rsmexport_rm(seg); 3309 3310 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3311 3312 rsmseglock_acquire(seg); 3313 /* 3314 * wait for republish to complete 3315 */ 3316 while ((seg->s_state == RSM_STATE_EXPORT) && 3317 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3318 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3319 DBG_PRINTF((category, RSM_ERR, 3320 "rsm_unpublish done: cv_wait INTR repubing")); 3321 rsmseglock_release(seg); 3322 return (RSMERR_INTERRUPTED); 3323 } 3324 } 3325 3326 if ((seg->s_state != RSM_STATE_EXPORT) && 3327 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3328 DBG_PRINTF((category, RSM_ERR, 3329 "rsm_unpublish done: invalid state")); 3330 rsmseglock_release(seg); 3331 return (RSMERR_SEG_NOT_PUBLISHED); 3332 } 3333 3334 /* 3335 * check for putv/get surrogate segment which was not published 3336 * to the driver. 3337 * 3338 * Be certain to see if there is an ACL first! If this segment was 3339 * not published with an ACL, acl will be a null pointer. Check 3340 * that before dereferencing it. 3341 */ 3342 acl = seg->s_acl; 3343 if (acl != (rsmapi_access_entry_t *)NULL) { 3344 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3345 goto bypass; 3346 } 3347 3348 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3349 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3350 goto bypass; 3351 3352 adapter = seg->s_adapter; 3353 for (;;) { 3354 if (seg->s_state != RSM_STATE_EXPORT) { 3355 rsmseglock_release(seg); 3356 DBG_PRINTF((category, RSM_ERR, 3357 "rsm_unpublish done: bad state %x\n", 3358 seg->s_state)); 3359 return (RSMERR_SEG_NOT_PUBLISHED); 3360 } 3361 3362 /* unpublish from adapter */ 3363 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3364 3365 if (e == RSM_SUCCESS) { 3366 break; 3367 } 3368 3369 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3370 /* 3371 * wait for unpublish to succeed, it's busy. 3372 */ 3373 seg->s_flags |= RSM_EXPORT_WAIT; 3374 3375 /* wait for a max of 1 ms - this is an empirical */ 3376 /* value that was found by some minimal testing */ 3377 /* can be fine tuned when we have better numbers */ 3378 /* A long term fix would be to send cv_signal */ 3379 /* from the intr callback routine */ 3380 (void) drv_getparm(LBOLT, &ticks); 3381 ticks += drv_usectohz(1000); 3382 /* currently nobody signals this wait */ 3383 (void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks); 3384 3385 DBG_PRINTF((category, RSM_ERR, 3386 "rsm_unpublish: SEG_IN_USE\n")); 3387 3388 seg->s_flags &= ~RSM_EXPORT_WAIT; 3389 } else { 3390 if (mode == 1) { 3391 DBG_PRINTF((category, RSM_ERR, 3392 "rsm:rsmpi unpublish err %x\n", e)); 3393 seg->s_state = RSM_STATE_BIND; 3394 } 3395 rsmseglock_release(seg); 3396 return (e); 3397 } 3398 } 3399 3400 /* Free segment */ 3401 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3402 3403 if (e != RSM_SUCCESS) { 3404 DBG_PRINTF((category, RSM_ERR, 3405 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3406 seg->s_key, e)); 3407 } 3408 3409 bypass: 3410 acl = seg->s_acl; 3411 rsmpi_acl = seg->s_acl_in; 3412 acl_len = seg->s_acl_len; 3413 3414 seg->s_acl = NULL; 3415 seg->s_acl_in = NULL; 3416 seg->s_acl_len = 0; 3417 3418 if (seg->s_state == RSM_STATE_EXPORT) { 3419 seg->s_state = RSM_STATE_BIND; 3420 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3421 seg->s_state = RSM_STATE_BIND_QUIESCED; 3422 cv_broadcast(&seg->s_cv); 3423 } 3424 3425 rsmseglock_release(seg); 3426 3427 rsmacl_free(acl, acl_len); 3428 rsmpiacl_free(rsmpi_acl, acl_len); 3429 3430 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3431 3432 return (DDI_SUCCESS); 3433 } 3434 3435 /* 3436 * Called from rsm_unpublish to force an unload and disconnection of all 3437 * importers of the unpublished segment. 3438 * 3439 * First build the list of segments requiring a force disconnect, then 3440 * send a request for each. 3441 */ 3442 static void 3443 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3444 rsm_node_id_t ex_nodeid) 3445 { 3446 rsmipc_request_t request; 3447 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3448 importing_token_t *force_disconnect_list = NULL; 3449 int index; 3450 3451 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3452 "rsm_send_importer_disconnects enter\n")); 3453 3454 index = rsmhash(ex_segid); 3455 3456 mutex_enter(&importer_list.lock); 3457 3458 prev_token = NULL; 3459 token = importer_list.bucket[index]; 3460 3461 while (token != NULL) { 3462 if (token->key == ex_segid) { 3463 /* 3464 * take it off the importer list and add it 3465 * to the force disconnect list. 3466 */ 3467 if (prev_token == NULL) 3468 importer_list.bucket[index] = token->next; 3469 else 3470 prev_token->next = token->next; 3471 tmp_token = token; 3472 token = token->next; 3473 if (force_disconnect_list == NULL) { 3474 force_disconnect_list = tmp_token; 3475 tmp_token->next = NULL; 3476 } else { 3477 tokp = force_disconnect_list; 3478 /* 3479 * make sure that the tmp_token's node 3480 * is not already on the force disconnect 3481 * list. 3482 */ 3483 while (tokp != NULL) { 3484 if (tokp->importing_node == 3485 tmp_token->importing_node) { 3486 break; 3487 } 3488 tokp = tokp->next; 3489 } 3490 if (tokp == NULL) { 3491 tmp_token->next = 3492 force_disconnect_list; 3493 force_disconnect_list = tmp_token; 3494 } else { 3495 kmem_free((void *)tmp_token, 3496 sizeof (*token)); 3497 } 3498 } 3499 3500 } else { 3501 prev_token = token; 3502 token = token->next; 3503 } 3504 } 3505 mutex_exit(&importer_list.lock); 3506 3507 token = force_disconnect_list; 3508 while (token != NULL) { 3509 if (token->importing_node == my_nodeid) { 3510 rsm_force_unload(ex_nodeid, ex_segid, 3511 DISCONNECT); 3512 } else { 3513 request.rsmipc_hdr.rsmipc_type = 3514 RSMIPC_MSG_DISCONNECT; 3515 request.rsmipc_key = token->key; 3516 for (;;) { 3517 if (rsmipc_send(token->importing_node, 3518 &request, 3519 RSM_NO_REPLY) == RSM_SUCCESS) { 3520 break; 3521 } else { 3522 delay(drv_usectohz(10000)); 3523 } 3524 } 3525 } 3526 tmp_token = token; 3527 token = token->next; 3528 kmem_free((void *)tmp_token, sizeof (*token)); 3529 } 3530 3531 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3532 "rsm_send_importer_disconnects done\n")); 3533 } 3534 3535 /* 3536 * This function is used as a callback for unlocking the pages locked 3537 * down by a process which then does a fork or an exec. 3538 * It marks the export segments corresponding to umem cookie given by 3539 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3540 * destroyed later when an rsm_close occurs). 3541 */ 3542 static void 3543 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3544 { 3545 rsmresource_blk_t *blk; 3546 rsmresource_t *p; 3547 rsmseg_t *eseg = NULL; 3548 int i, j; 3549 int found = 0; 3550 3551 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3552 "rsm_export_force_destroy enter\n")); 3553 3554 /* 3555 * Walk the resource list and locate the export segment (either 3556 * in the BIND or the EXPORT state) which corresponds to the 3557 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3558 * Change the state to ZOMBIE by calling rsmseg_close with the 3559 * force_flag argument (the second argument) set to 1. Also, 3560 * unpublish and unbind the segment, but don't free it. Free it 3561 * only on a rsm_close call for the segment. 3562 */ 3563 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3564 3565 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3566 blk = rsm_resource.rsmrc_root[i]; 3567 if (blk == NULL) { 3568 continue; 3569 } 3570 3571 for (j = 0; j < RSMRC_BLKSZ; j++) { 3572 p = blk->rsmrcblk_blks[j]; 3573 if ((p != NULL) && (p != RSMRC_RESERVED) && 3574 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3575 eseg = (rsmseg_t *)p; 3576 if (eseg->s_cookie != ck) 3577 continue; /* continue searching */ 3578 /* 3579 * Found the segment, set flag to indicate 3580 * force destroy processing is in progress 3581 */ 3582 rsmseglock_acquire(eseg); 3583 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3584 rsmseglock_release(eseg); 3585 found = 1; 3586 break; 3587 } 3588 } 3589 3590 if (found) 3591 break; 3592 } 3593 3594 rw_exit(&rsm_resource.rsmrc_lock); 3595 3596 if (found) { 3597 ASSERT(eseg != NULL); 3598 /* call rsmseg_close with force flag set to 1 */ 3599 rsmseg_close(eseg, 1); 3600 /* 3601 * force destroy processing done, clear flag and signal any 3602 * thread waiting in rsmseg_close. 3603 */ 3604 rsmseglock_acquire(eseg); 3605 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3606 cv_broadcast(&eseg->s_cv); 3607 rsmseglock_release(eseg); 3608 } 3609 3610 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3611 "rsm_export_force_destroy done\n")); 3612 } 3613 3614 /* ******************************* Remote Calls *********************** */ 3615 static void 3616 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3617 { 3618 rsmipc_reply_t reply; 3619 DBG_DEFINE(category, 3620 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3621 3622 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3623 "rsm_intr_segconnect enter\n")); 3624 3625 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3626 3627 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3628 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3629 3630 (void) rsmipc_send(src, NULL, &reply); 3631 3632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3633 "rsm_intr_segconnect done\n")); 3634 } 3635 3636 3637 /* 3638 * When an exported segment is unpublished the exporter sends an ipc 3639 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3640 * calls this function. The import list is scanned; segments which match the 3641 * exported segment id are unloaded and disconnected. 3642 * 3643 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3644 * 3645 */ 3646 static void 3647 rsm_force_unload(rsm_node_id_t src_nodeid, 3648 rsm_memseg_id_t ex_segid, 3649 boolean_t disconnect_flag) 3650 3651 { 3652 rsmresource_t *p = NULL; 3653 rsmhash_table_t *rhash = &rsm_import_segs; 3654 uint_t index; 3655 DBG_DEFINE(category, 3656 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3657 3658 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3659 3660 index = rsmhash(ex_segid); 3661 3662 rw_enter(&rhash->rsmhash_rw, RW_READER); 3663 3664 p = rsmhash_getbkt(rhash, index); 3665 3666 for (; p; p = p->rsmrc_next) { 3667 rsmseg_t *seg = (rsmseg_t *)p; 3668 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3669 /* 3670 * In order to make rsmseg_unload and rsm_force_unload 3671 * thread safe, acquire the segment lock here. 3672 * rsmseg_unload is responsible for releasing the lock. 3673 * rsmseg_unload releases the lock just before a call 3674 * to rsmipc_send or in case of an early exit which 3675 * occurs if the segment was in the state 3676 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3677 */ 3678 rsmseglock_acquire(seg); 3679 if (disconnect_flag) 3680 seg->s_flags |= RSM_FORCE_DISCONNECT; 3681 rsmseg_unload(seg); 3682 } 3683 } 3684 rw_exit(&rhash->rsmhash_rw); 3685 3686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3687 } 3688 3689 static void 3690 rsm_intr_reply(rsmipc_msghdr_t *msg) 3691 { 3692 /* 3693 * Find slot for cookie in reply. 3694 * Match sequence with sequence in cookie 3695 * If no match; return 3696 * Try to grap lock of slot, if locked return 3697 * copy data into reply slot area 3698 * signal waiter 3699 */ 3700 rsmipc_slot_t *slot; 3701 rsmipc_cookie_t *cookie; 3702 void *data = (void *) msg; 3703 size_t size = sizeof (rsmipc_reply_t); 3704 DBG_DEFINE(category, 3705 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3706 3707 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3708 3709 cookie = &msg->rsmipc_cookie; 3710 if (cookie->ic.index >= RSMIPC_SZ) { 3711 DBG_PRINTF((category, RSM_ERR, 3712 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3713 return; 3714 } 3715 3716 ASSERT(cookie->ic.index < RSMIPC_SZ); 3717 slot = &rsm_ipc.slots[cookie->ic.index]; 3718 mutex_enter(&slot->rsmipc_lock); 3719 if (slot->rsmipc_cookie.value == cookie->value) { 3720 /* found a match */ 3721 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3722 bcopy(data, slot->rsmipc_data, size); 3723 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3724 cv_signal(&slot->rsmipc_cv); 3725 } 3726 } else { 3727 DBG_PRINTF((category, RSM_DEBUG, 3728 "rsm: rsm_intr_reply mismatched reply %d\n", 3729 cookie->ic.index)); 3730 } 3731 mutex_exit(&slot->rsmipc_lock); 3732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3733 } 3734 3735 /* 3736 * This function gets dispatched on the worker thread when we receive 3737 * the SQREADY message. This function sends the SQREADY_ACK message. 3738 */ 3739 static void 3740 rsm_sqready_ack_deferred(void *arg) 3741 { 3742 path_t *path = (path_t *)arg; 3743 DBG_DEFINE(category, 3744 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3745 3746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3747 "rsm_sqready_ack_deferred enter\n")); 3748 3749 mutex_enter(&path->mutex); 3750 3751 /* 3752 * If path is not active no point in sending the ACK 3753 * because the whole SQREADY protocol will again start 3754 * when the path becomes active. 3755 */ 3756 if (path->state != RSMKA_PATH_ACTIVE) { 3757 /* 3758 * decrement the path refcnt incremented in rsm_proc_sqready 3759 */ 3760 PATH_RELE_NOLOCK(path); 3761 mutex_exit(&path->mutex); 3762 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3763 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3764 return; 3765 } 3766 3767 /* send an SQREADY_ACK message */ 3768 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3769 3770 /* initialize credits to the max level */ 3771 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3772 3773 /* wake up any send that is waiting for credits */ 3774 cv_broadcast(&path->sendq_token.sendq_cv); 3775 3776 /* 3777 * decrement the path refcnt since we incremented it in 3778 * rsm_proc_sqready 3779 */ 3780 PATH_RELE_NOLOCK(path); 3781 3782 mutex_exit(&path->mutex); 3783 3784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3785 "rsm_sqready_ack_deferred done\n")); 3786 } 3787 3788 /* 3789 * Process the SQREADY message 3790 */ 3791 static void 3792 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3793 rsm_intr_hand_arg_t arg) 3794 { 3795 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3796 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3797 path_t *path; 3798 DBG_DEFINE(category, 3799 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3800 3801 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3802 3803 /* look up the path - incr the path refcnt */ 3804 path = rsm_find_path(hdlr_argp->adapter_name, 3805 hdlr_argp->adapter_instance, src_hwaddr); 3806 3807 /* 3808 * No path exists or path is not active - drop the message 3809 */ 3810 if (path == NULL) { 3811 DBG_PRINTF((category, RSM_DEBUG, 3812 "rsm_proc_sqready done: msg dropped no path\n")); 3813 return; 3814 } 3815 3816 mutex_exit(&path->mutex); 3817 3818 /* drain any tasks from the previous incarnation */ 3819 taskq_wait(path->recv_taskq); 3820 3821 mutex_enter(&path->mutex); 3822 /* 3823 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3824 * in the meanwhile we received an SQREADY message, blindly reset 3825 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3826 * and forget about the SQREADY that we sent. 3827 */ 3828 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3829 3830 if (path->state != RSMKA_PATH_ACTIVE) { 3831 /* decr refcnt and drop the mutex */ 3832 PATH_RELE_NOLOCK(path); 3833 mutex_exit(&path->mutex); 3834 DBG_PRINTF((category, RSM_DEBUG, 3835 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3836 return; 3837 } 3838 3839 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3840 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3841 3842 /* 3843 * The sender's local incarnation number is our remote incarnation 3844 * number save it in the path data structure 3845 */ 3846 path->remote_incn = msg->rsmipc_local_incn; 3847 path->sendq_token.msgbuf_avail = 0; 3848 path->procmsg_cnt = 0; 3849 3850 /* 3851 * path is active - dispatch task to send SQREADY_ACK - remember 3852 * RSMPI calls can't be done in interrupt context 3853 * 3854 * We can use the recv_taskq to send because the remote endpoint 3855 * cannot start sending messages till it receives SQREADY_ACK hence 3856 * at this point there are no tasks on recv_taskq. 3857 * 3858 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3859 */ 3860 (void) taskq_dispatch(path->recv_taskq, 3861 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3862 3863 mutex_exit(&path->mutex); 3864 3865 3866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3867 } 3868 3869 /* 3870 * Process the SQREADY_ACK message 3871 */ 3872 static void 3873 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3874 rsm_intr_hand_arg_t arg) 3875 { 3876 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3877 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3878 path_t *path; 3879 DBG_DEFINE(category, 3880 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3881 3882 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3883 "rsm_proc_sqready_ack enter\n")); 3884 3885 /* look up the path - incr the path refcnt */ 3886 path = rsm_find_path(hdlr_argp->adapter_name, 3887 hdlr_argp->adapter_instance, src_hwaddr); 3888 3889 /* 3890 * drop the message if - no path exists or path is not active 3891 * or if its not waiting for SQREADY_ACK message 3892 */ 3893 if (path == NULL) { 3894 DBG_PRINTF((category, RSM_DEBUG, 3895 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3896 return; 3897 } 3898 3899 if ((path->state != RSMKA_PATH_ACTIVE) || 3900 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3901 /* decrement the refcnt */ 3902 PATH_RELE_NOLOCK(path); 3903 mutex_exit(&path->mutex); 3904 DBG_PRINTF((category, RSM_DEBUG, 3905 "rsm_proc_sqready_ack done: msg dropped\n")); 3906 return; 3907 } 3908 3909 /* 3910 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3911 * sent, if not drop it. 3912 */ 3913 if (path->local_incn != msghdr->rsmipc_incn) { 3914 /* decrement the refcnt */ 3915 PATH_RELE_NOLOCK(path); 3916 mutex_exit(&path->mutex); 3917 DBG_PRINTF((category, RSM_DEBUG, 3918 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3919 msghdr->rsmipc_incn)); 3920 return; 3921 } 3922 3923 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3924 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3925 3926 /* 3927 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3928 */ 3929 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3930 3931 /* save the remote sendq incn number */ 3932 path->remote_incn = msg->rsmipc_local_incn; 3933 3934 /* initialize credits to the max level */ 3935 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3936 3937 /* wake up any send that is waiting for credits */ 3938 cv_broadcast(&path->sendq_token.sendq_cv); 3939 3940 /* decrement the refcnt */ 3941 PATH_RELE_NOLOCK(path); 3942 3943 mutex_exit(&path->mutex); 3944 3945 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3946 "rsm_proc_sqready_ack done\n")); 3947 } 3948 3949 /* 3950 * process the RSMIPC_MSG_CREDIT message 3951 */ 3952 static void 3953 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3954 rsm_intr_hand_arg_t arg) 3955 { 3956 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3957 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3958 path_t *path; 3959 DBG_DEFINE(category, 3960 RSM_KERNEL_AGENT | RSM_FUNC_ALL | 3961 RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3962 3963 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3964 3965 /* look up the path - incr the path refcnt */ 3966 path = rsm_find_path(hdlr_argp->adapter_name, 3967 hdlr_argp->adapter_instance, src_hwaddr); 3968 3969 if (path == NULL) { 3970 DBG_PRINTF((category, RSM_DEBUG, 3971 "rsm_add_credits enter: path not found\n")); 3972 return; 3973 } 3974 3975 /* the path is not active - discard credits */ 3976 if (path->state != RSMKA_PATH_ACTIVE) { 3977 PATH_RELE_NOLOCK(path); 3978 mutex_exit(&path->mutex); 3979 DBG_PRINTF((category, RSM_DEBUG, 3980 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3981 return; 3982 } 3983 3984 /* 3985 * Check if these credits are for current incarnation of the path. 3986 */ 3987 if (path->local_incn != msghdr->rsmipc_incn) { 3988 /* decrement the refcnt */ 3989 PATH_RELE_NOLOCK(path); 3990 mutex_exit(&path->mutex); 3991 DBG_PRINTF((category, RSM_DEBUG, 3992 "rsm_add_credits enter: old incn %lld\n", 3993 msghdr->rsmipc_incn)); 3994 return; 3995 } 3996 3997 DBG_PRINTF((category, RSM_DEBUG, 3998 "rsm_add_credits:path=%lx new-creds=%d " 3999 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 4000 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 4001 src_hwaddr)); 4002 4003 4004 /* add credits to the path's sendq */ 4005 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4006 4007 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4008 4009 /* wake up any send that is waiting for credits */ 4010 cv_broadcast(&path->sendq_token.sendq_cv); 4011 4012 /* decrement the refcnt */ 4013 PATH_RELE_NOLOCK(path); 4014 4015 mutex_exit(&path->mutex); 4016 4017 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4018 } 4019 4020 static void 4021 rsm_intr_event(rsmipc_request_t *msg) 4022 { 4023 rsmseg_t *seg; 4024 rsmresource_t *p; 4025 rsm_node_id_t src_node; 4026 DBG_DEFINE(category, 4027 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4028 4029 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4030 4031 src_node = msg->rsmipc_hdr.rsmipc_src; 4032 4033 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4034 /* This is for an import segment */ 4035 uint_t hashval = rsmhash(msg->rsmipc_key); 4036 4037 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4038 4039 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4040 4041 for (; p; p = p->rsmrc_next) { 4042 if ((p->rsmrc_key == msg->rsmipc_key) && 4043 (p->rsmrc_node == src_node)) { 4044 seg = (rsmseg_t *)p; 4045 rsmseglock_acquire(seg); 4046 4047 atomic_add_32(&seg->s_pollevent, 1); 4048 4049 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4050 pollwakeup(&seg->s_poll, POLLRDNORM); 4051 4052 rsmseglock_release(seg); 4053 } 4054 } 4055 4056 rw_exit(&rsm_import_segs.rsmhash_rw); 4057 } else { 4058 /* This is for an export segment */ 4059 seg = rsmexport_lookup(msg->rsmipc_key); 4060 if (!seg) { 4061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4062 "rsm_intr_event done: exp seg not found\n")); 4063 return; 4064 } 4065 4066 ASSERT(rsmseglock_held(seg)); 4067 4068 atomic_add_32(&seg->s_pollevent, 1); 4069 4070 /* 4071 * We must hold the segment lock here, or else the segment 4072 * can be freed while pollwakeup is using it. This implies 4073 * that we MUST NOT grab the segment lock during rsm_chpoll, 4074 * as outlined in the chpoll(2) man page. 4075 */ 4076 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4077 pollwakeup(&seg->s_poll, POLLRDNORM); 4078 4079 rsmseglock_release(seg); 4080 } 4081 4082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4083 } 4084 4085 /* 4086 * The exporter did a republish and changed the ACL - this change is only 4087 * visible to new importers. 4088 */ 4089 static void 4090 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4091 rsm_permission_t perm) 4092 { 4093 4094 rsmresource_t *p; 4095 rsmseg_t *seg; 4096 uint_t hashval = rsmhash(key); 4097 DBG_DEFINE(category, 4098 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4099 4100 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4101 4102 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4103 4104 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4105 4106 for (; p; p = p->rsmrc_next) { 4107 /* 4108 * find the importer and update the permission in the shared 4109 * data structure. Any new importers will use the new perms 4110 */ 4111 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4112 seg = (rsmseg_t *)p; 4113 4114 rsmseglock_acquire(seg); 4115 rsmsharelock_acquire(seg); 4116 seg->s_share->rsmsi_mode = perm; 4117 rsmsharelock_release(seg); 4118 rsmseglock_release(seg); 4119 4120 break; 4121 } 4122 } 4123 4124 rw_exit(&rsm_import_segs.rsmhash_rw); 4125 4126 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4127 } 4128 4129 void 4130 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4131 { 4132 int done = 1; /* indicate all SUSPENDS have been acked */ 4133 list_element_t *elem; 4134 DBG_DEFINE(category, 4135 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4136 4137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4138 "rsm_suspend_complete enter\n")); 4139 4140 mutex_enter(&rsm_suspend_list.list_lock); 4141 4142 if (rsm_suspend_list.list_head == NULL) { 4143 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4144 "rsm_suspend_complete done: suspend_list is empty\n")); 4145 mutex_exit(&rsm_suspend_list.list_lock); 4146 return; 4147 } 4148 4149 elem = rsm_suspend_list.list_head; 4150 while (elem != NULL) { 4151 if (elem->nodeid == src_node) { 4152 /* clear the pending flag for the node */ 4153 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4154 elem->flags |= flag; 4155 } 4156 4157 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4158 done = 0; /* still some nodes have not yet ACKED */ 4159 4160 elem = elem->next; 4161 } 4162 4163 mutex_exit(&rsm_suspend_list.list_lock); 4164 4165 if (!done) { 4166 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4167 "rsm_suspend_complete done: acks pending\n")); 4168 return; 4169 } 4170 /* 4171 * Now that we are done with suspending all the remote importers 4172 * time to quiesce the local exporters 4173 */ 4174 exporter_quiesce(); 4175 4176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4177 "rsm_suspend_complete done\n")); 4178 } 4179 4180 static void 4181 exporter_quiesce() 4182 { 4183 int i, e; 4184 rsmresource_t *current; 4185 rsmseg_t *seg; 4186 adapter_t *adapter; 4187 DBG_DEFINE(category, 4188 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4189 4190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4191 /* 4192 * The importers send a SUSPEND_COMPLETE to the exporter node 4193 * Unpublish, unbind the export segment and 4194 * move the segments to the EXPORT_QUIESCED state 4195 */ 4196 4197 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4198 4199 for (i = 0; i < rsm_hash_size; i++) { 4200 current = rsm_export_segs.bucket[i]; 4201 while (current != NULL) { 4202 seg = (rsmseg_t *)current; 4203 rsmseglock_acquire(seg); 4204 if (current->rsmrc_state == 4205 RSM_STATE_EXPORT_QUIESCING) { 4206 adapter = seg->s_adapter; 4207 /* 4208 * some local memory handles are not published 4209 * check if it was published 4210 */ 4211 if ((seg->s_acl == NULL) || 4212 (seg->s_acl[0].ae_node != my_nodeid) || 4213 (seg->s_acl[0].ae_permission != 0)) { 4214 4215 e = adapter->rsmpi_ops->rsm_unpublish( 4216 seg->s_handle.out); 4217 DBG_PRINTF((category, RSM_DEBUG, 4218 "exporter_quiesce:unpub %d\n", e)); 4219 4220 e = adapter->rsmpi_ops->rsm_seg_destroy( 4221 seg->s_handle.out); 4222 4223 DBG_PRINTF((category, RSM_DEBUG, 4224 "exporter_quiesce:destroy %d\n", 4225 e)); 4226 } 4227 4228 (void) rsm_unbind_pages(seg); 4229 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4230 cv_broadcast(&seg->s_cv); 4231 } 4232 rsmseglock_release(seg); 4233 current = current->rsmrc_next; 4234 } 4235 } 4236 rw_exit(&rsm_export_segs.rsmhash_rw); 4237 4238 /* 4239 * All the local segments we are done with the pre-del processing 4240 * - time to move to PREDEL_COMPLETED. 4241 */ 4242 4243 mutex_enter(&rsm_drv_data.drv_lock); 4244 4245 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4246 4247 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4248 4249 cv_broadcast(&rsm_drv_data.drv_cv); 4250 4251 mutex_exit(&rsm_drv_data.drv_lock); 4252 4253 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4254 } 4255 4256 static void 4257 importer_suspend(rsm_node_id_t src_node) 4258 { 4259 int i; 4260 int susp_flg; /* true means already suspended */ 4261 int num_importers; 4262 rsmresource_t *p = NULL, *curp; 4263 rsmhash_table_t *rhash = &rsm_import_segs; 4264 rsmseg_t *seg; 4265 rsmipc_request_t request; 4266 DBG_DEFINE(category, 4267 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4268 4269 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4270 4271 rw_enter(&rhash->rsmhash_rw, RW_READER); 4272 for (i = 0; i < rsm_hash_size; i++) { 4273 p = rhash->bucket[i]; 4274 4275 /* 4276 * Suspend all importers with same <node, key> pair. 4277 * After the last one of the shared importers has been 4278 * suspended - suspend the shared mappings/connection. 4279 */ 4280 for (; p; p = p->rsmrc_next) { 4281 rsmseg_t *first = (rsmseg_t *)p; 4282 if ((first->s_node != src_node) || 4283 (first->s_state == RSM_STATE_DISCONNECT)) 4284 continue; /* go to next entry */ 4285 /* 4286 * search the rest of the bucket for 4287 * other siblings (imprtrs with the same key) 4288 * of "first" and suspend them. 4289 * All importers with same key fall in 4290 * the same bucket. 4291 */ 4292 num_importers = 0; 4293 for (curp = p; curp; curp = curp->rsmrc_next) { 4294 seg = (rsmseg_t *)curp; 4295 4296 rsmseglock_acquire(seg); 4297 4298 if ((seg->s_node != first->s_node) || 4299 (seg->s_key != first->s_key) || 4300 (seg->s_state == RSM_STATE_DISCONNECT)) { 4301 /* 4302 * either not a peer segment or its a 4303 * disconnected segment - skip it 4304 */ 4305 rsmseglock_release(seg); 4306 continue; 4307 } 4308 4309 rsmseg_suspend(seg, &susp_flg); 4310 4311 if (susp_flg) { /* seg already suspended */ 4312 rsmseglock_release(seg); 4313 break; /* the inner for loop */ 4314 } 4315 4316 num_importers++; 4317 rsmsharelock_acquire(seg); 4318 /* 4319 * we've processed all importers that are 4320 * siblings of "first" 4321 */ 4322 if (num_importers == 4323 seg->s_share->rsmsi_refcnt) { 4324 rsmsharelock_release(seg); 4325 rsmseglock_release(seg); 4326 break; 4327 } 4328 rsmsharelock_release(seg); 4329 rsmseglock_release(seg); 4330 } 4331 4332 /* 4333 * All the importers with the same key and 4334 * nodeid as "first" have been suspended. 4335 * Now suspend the shared connect/mapping. 4336 * This is done only once. 4337 */ 4338 if (!susp_flg) { 4339 rsmsegshare_suspend(seg); 4340 } 4341 } 4342 } 4343 4344 rw_exit(&rhash->rsmhash_rw); 4345 4346 /* send an ACK for SUSPEND message */ 4347 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4348 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4349 4350 4351 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4352 4353 } 4354 4355 static void 4356 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4357 { 4358 int recheck_state; 4359 rsmcookie_t *hdl; 4360 DBG_DEFINE(category, 4361 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4362 4363 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4364 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4365 4366 *susp_flg = 0; 4367 4368 ASSERT(rsmseglock_held(seg)); 4369 /* wait if putv/getv is in progress */ 4370 while (seg->s_rdmacnt > 0) 4371 cv_wait(&seg->s_cv, &seg->s_lock); 4372 4373 do { 4374 recheck_state = 0; 4375 4376 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4377 "rsmseg_suspend:segment %x state=%d\n", 4378 seg->s_key, seg->s_state)); 4379 4380 switch (seg->s_state) { 4381 case RSM_STATE_NEW: 4382 /* not a valid state */ 4383 break; 4384 case RSM_STATE_CONNECTING: 4385 seg->s_state = RSM_STATE_ABORT_CONNECT; 4386 break; 4387 case RSM_STATE_ABORT_CONNECT: 4388 break; 4389 case RSM_STATE_CONNECT: 4390 seg->s_handle.in = NULL; 4391 seg->s_state = RSM_STATE_CONN_QUIESCE; 4392 break; 4393 case RSM_STATE_MAPPING: 4394 /* wait until segment leaves the mapping state */ 4395 while (seg->s_state == RSM_STATE_MAPPING) 4396 cv_wait(&seg->s_cv, &seg->s_lock); 4397 recheck_state = 1; 4398 break; 4399 case RSM_STATE_ACTIVE: 4400 /* unload the mappings */ 4401 if (seg->s_ckl != NULL) { 4402 hdl = seg->s_ckl; 4403 for (; hdl != NULL; hdl = hdl->c_next) { 4404 (void) devmap_unload(hdl->c_dhp, 4405 hdl->c_off, hdl->c_len); 4406 } 4407 } 4408 seg->s_mapinfo = NULL; 4409 seg->s_state = RSM_STATE_MAP_QUIESCE; 4410 break; 4411 case RSM_STATE_CONN_QUIESCE: 4412 /* FALLTHRU */ 4413 case RSM_STATE_MAP_QUIESCE: 4414 /* rsmseg_suspend already done for seg */ 4415 *susp_flg = 1; 4416 break; 4417 case RSM_STATE_DISCONNECT: 4418 break; 4419 default: 4420 ASSERT(0); /* invalid state */ 4421 } 4422 } while (recheck_state); 4423 4424 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4425 } 4426 4427 static void 4428 rsmsegshare_suspend(rsmseg_t *seg) 4429 { 4430 int e; 4431 adapter_t *adapter; 4432 rsm_import_share_t *sharedp; 4433 DBG_DEFINE(category, 4434 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4435 4436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4437 "rsmsegshare_suspend enter\n")); 4438 4439 rsmseglock_acquire(seg); 4440 rsmsharelock_acquire(seg); 4441 4442 sharedp = seg->s_share; 4443 adapter = seg->s_adapter; 4444 switch (sharedp->rsmsi_state) { 4445 case RSMSI_STATE_NEW: 4446 break; 4447 case RSMSI_STATE_CONNECTING: 4448 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4449 break; 4450 case RSMSI_STATE_ABORT_CONNECT: 4451 break; 4452 case RSMSI_STATE_CONNECTED: 4453 /* do the rsmpi disconnect */ 4454 if (sharedp->rsmsi_node != my_nodeid) { 4455 e = adapter->rsmpi_ops-> 4456 rsm_disconnect(sharedp->rsmsi_handle); 4457 4458 DBG_PRINTF((category, RSM_DEBUG, 4459 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4460 sharedp->rsmsi_segid, e)); 4461 } 4462 4463 sharedp->rsmsi_handle = NULL; 4464 4465 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4466 break; 4467 case RSMSI_STATE_CONN_QUIESCE: 4468 break; 4469 case RSMSI_STATE_MAPPED: 4470 /* do the rsmpi unmap and disconnect */ 4471 if (sharedp->rsmsi_node != my_nodeid) { 4472 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4473 4474 DBG_PRINTF((category, RSM_DEBUG, 4475 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4476 4477 e = adapter->rsmpi_ops-> 4478 rsm_disconnect(sharedp->rsmsi_handle); 4479 DBG_PRINTF((category, RSM_DEBUG, 4480 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4481 sharedp->rsmsi_segid, e)); 4482 } 4483 4484 sharedp->rsmsi_handle = NULL; 4485 4486 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4487 break; 4488 case RSMSI_STATE_MAP_QUIESCE: 4489 break; 4490 case RSMSI_STATE_DISCONNECTED: 4491 break; 4492 default: 4493 ASSERT(0); /* invalid state */ 4494 } 4495 4496 rsmsharelock_release(seg); 4497 rsmseglock_release(seg); 4498 4499 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4500 "rsmsegshare_suspend done\n")); 4501 } 4502 4503 /* 4504 * This should get called on receiving a RESUME message or from 4505 * the pathmanger if the node undergoing DR dies. 4506 */ 4507 static void 4508 importer_resume(rsm_node_id_t src_node) 4509 { 4510 int i; 4511 rsmresource_t *p = NULL; 4512 rsmhash_table_t *rhash = &rsm_import_segs; 4513 void *cookie; 4514 DBG_DEFINE(category, 4515 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4516 4517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4518 4519 rw_enter(&rhash->rsmhash_rw, RW_READER); 4520 4521 for (i = 0; i < rsm_hash_size; i++) { 4522 p = rhash->bucket[i]; 4523 4524 for (; p; p = p->rsmrc_next) { 4525 rsmseg_t *seg = (rsmseg_t *)p; 4526 4527 rsmseglock_acquire(seg); 4528 4529 /* process only importers of node undergoing DR */ 4530 if (seg->s_node != src_node) { 4531 rsmseglock_release(seg); 4532 continue; 4533 } 4534 4535 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4536 rsmipc_request_t request; 4537 /* 4538 * rsmpi map/connect failed 4539 * inform the exporter so that it can 4540 * remove the importer. 4541 */ 4542 request.rsmipc_hdr.rsmipc_type = 4543 RSMIPC_MSG_NOTIMPORTING; 4544 request.rsmipc_key = seg->s_segid; 4545 request.rsmipc_segment_cookie = cookie; 4546 rsmseglock_release(seg); 4547 (void) rsmipc_send(seg->s_node, &request, 4548 RSM_NO_REPLY); 4549 } else { 4550 rsmseglock_release(seg); 4551 } 4552 } 4553 } 4554 4555 rw_exit(&rhash->rsmhash_rw); 4556 4557 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4558 } 4559 4560 static int 4561 rsmseg_resume(rsmseg_t *seg, void **cookie) 4562 { 4563 int e; 4564 int retc; 4565 off_t dev_offset; 4566 size_t maplen; 4567 uint_t maxprot; 4568 rsm_mapinfo_t *p; 4569 rsmcookie_t *hdl; 4570 rsm_import_share_t *sharedp; 4571 DBG_DEFINE(category, 4572 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4573 4574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4575 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4576 4577 *cookie = NULL; 4578 4579 ASSERT(rsmseglock_held(seg)); 4580 4581 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4582 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4583 return (RSM_SUCCESS); 4584 } 4585 4586 sharedp = seg->s_share; 4587 4588 rsmsharelock_acquire(seg); 4589 4590 /* resume the shared connection and/or mapping */ 4591 retc = rsmsegshare_resume(seg); 4592 4593 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4594 /* shared state can either be connected or mapped */ 4595 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4596 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4597 ASSERT(retc == RSM_SUCCESS); 4598 seg->s_handle.in = sharedp->rsmsi_handle; 4599 rsmsharelock_release(seg); 4600 seg->s_state = RSM_STATE_CONNECT; 4601 4602 } else { /* error in rsmpi connect during resume */ 4603 seg->s_handle.in = NULL; 4604 seg->s_state = RSM_STATE_DISCONNECT; 4605 4606 sharedp->rsmsi_refcnt--; 4607 cookie = (void *)sharedp->rsmsi_cookie; 4608 4609 if (sharedp->rsmsi_refcnt == 0) { 4610 ASSERT(sharedp->rsmsi_mapcnt == 0); 4611 rsmsharelock_release(seg); 4612 4613 /* clean up the shared data structure */ 4614 mutex_destroy(&sharedp->rsmsi_lock); 4615 cv_destroy(&sharedp->rsmsi_cv); 4616 kmem_free((void *)(sharedp), 4617 sizeof (rsm_import_share_t)); 4618 4619 } else { 4620 rsmsharelock_release(seg); 4621 } 4622 /* 4623 * The following needs to be done after any 4624 * rsmsharelock calls which use seg->s_share. 4625 */ 4626 seg->s_share = NULL; 4627 } 4628 4629 /* signal any waiting segment */ 4630 cv_broadcast(&seg->s_cv); 4631 4632 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4633 "rsmseg_resume done:state=%d\n", seg->s_state)); 4634 return (retc); 4635 } 4636 4637 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4638 4639 /* Setup protections for remap */ 4640 maxprot = PROT_USER; 4641 if (seg->s_mode & RSM_PERM_READ) { 4642 maxprot |= PROT_READ; 4643 } 4644 if (seg->s_mode & RSM_PERM_WRITE) { 4645 maxprot |= PROT_WRITE; 4646 } 4647 4648 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4649 /* error in rsmpi connect or map during resume */ 4650 4651 /* remap to trash page */ 4652 ASSERT(seg->s_ckl != NULL); 4653 4654 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4655 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4656 remap_cookie, hdl->c_off, hdl->c_len, 4657 maxprot, 0, NULL); 4658 4659 DBG_PRINTF((category, RSM_ERR, 4660 "rsmseg_resume:remap=%d\n", e)); 4661 } 4662 4663 seg->s_handle.in = NULL; 4664 seg->s_state = RSM_STATE_DISCONNECT; 4665 4666 sharedp->rsmsi_refcnt--; 4667 4668 sharedp->rsmsi_mapcnt--; 4669 seg->s_mapinfo = NULL; 4670 4671 if (sharedp->rsmsi_refcnt == 0) { 4672 ASSERT(sharedp->rsmsi_mapcnt == 0); 4673 rsmsharelock_release(seg); 4674 4675 /* clean up the shared data structure */ 4676 mutex_destroy(&sharedp->rsmsi_lock); 4677 cv_destroy(&sharedp->rsmsi_cv); 4678 kmem_free((void *)(sharedp), 4679 sizeof (rsm_import_share_t)); 4680 4681 } else { 4682 rsmsharelock_release(seg); 4683 } 4684 /* 4685 * The following needs to be done after any 4686 * rsmsharelock calls which use seg->s_share. 4687 */ 4688 seg->s_share = NULL; 4689 4690 /* signal any waiting segment */ 4691 cv_broadcast(&seg->s_cv); 4692 4693 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4694 "rsmseg_resume done:seg=%x,err=%d\n", 4695 seg->s_key, retc)); 4696 return (retc); 4697 4698 } 4699 4700 seg->s_handle.in = sharedp->rsmsi_handle; 4701 4702 if (seg->s_node == my_nodeid) { /* loopback */ 4703 ASSERT(seg->s_mapinfo == NULL); 4704 4705 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4706 e = devmap_umem_remap(hdl->c_dhp, 4707 rsm_dip, seg->s_cookie, 4708 hdl->c_off, hdl->c_len, 4709 maxprot, 0, NULL); 4710 4711 DBG_PRINTF((category, RSM_ERR, 4712 "rsmseg_resume:remap=%d\n", e)); 4713 } 4714 } else { /* remote exporter */ 4715 /* remap to the new rsmpi maps */ 4716 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4717 4718 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4719 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4720 &dev_offset, &maplen); 4721 e = devmap_devmem_remap(hdl->c_dhp, 4722 p->dip, p->dev_register, dev_offset, 4723 maplen, maxprot, 0, NULL); 4724 4725 DBG_PRINTF((category, RSM_ERR, 4726 "rsmseg_resume:remap=%d\n", e)); 4727 } 4728 } 4729 4730 rsmsharelock_release(seg); 4731 4732 seg->s_state = RSM_STATE_ACTIVE; 4733 cv_broadcast(&seg->s_cv); 4734 4735 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4736 4737 return (retc); 4738 } 4739 4740 static int 4741 rsmsegshare_resume(rsmseg_t *seg) 4742 { 4743 int e = RSM_SUCCESS; 4744 adapter_t *adapter; 4745 rsm_import_share_t *sharedp; 4746 DBG_DEFINE(category, 4747 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4748 4749 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4750 4751 ASSERT(rsmseglock_held(seg)); 4752 ASSERT(rsmsharelock_held(seg)); 4753 4754 sharedp = seg->s_share; 4755 4756 /* 4757 * If we are not in a xxxx_QUIESCE state that means shared 4758 * connect/mapping processing has been already been done 4759 * so return success. 4760 */ 4761 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4762 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4763 return (RSM_SUCCESS); 4764 } 4765 4766 adapter = seg->s_adapter; 4767 4768 if (sharedp->rsmsi_node != my_nodeid) { 4769 rsm_addr_t hwaddr; 4770 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4771 4772 e = adapter->rsmpi_ops->rsm_connect( 4773 adapter->rsmpi_handle, hwaddr, 4774 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4775 4776 DBG_PRINTF((category, RSM_DEBUG, 4777 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4778 sharedp->rsmsi_segid, e)); 4779 4780 if (e != RSM_SUCCESS) { 4781 /* when do we send the NOT_IMPORTING message */ 4782 sharedp->rsmsi_handle = NULL; 4783 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4784 /* signal any waiting segment */ 4785 cv_broadcast(&sharedp->rsmsi_cv); 4786 return (e); 4787 } 4788 } 4789 4790 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4791 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4792 /* signal any waiting segment */ 4793 cv_broadcast(&sharedp->rsmsi_cv); 4794 return (e); 4795 } 4796 4797 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4798 4799 /* do the rsmpi map of the whole segment here */ 4800 if (sharedp->rsmsi_node != my_nodeid) { 4801 size_t mapped_len; 4802 rsm_mapinfo_t *p; 4803 4804 /* 4805 * We need to do rsmpi maps with <off, lens> identical to 4806 * the old mapinfo list because the segment mapping handles 4807 * dhp and such need the fragmentation of rsmpi maps to be 4808 * identical to what it was during the mmap of the segment 4809 */ 4810 p = sharedp->rsmsi_mapinfo; 4811 4812 while (p != NULL) { 4813 mapped_len = 0; 4814 4815 e = adapter->rsmpi_ops->rsm_map( 4816 sharedp->rsmsi_handle, p->start_offset, 4817 p->individual_len, &mapped_len, 4818 &p->dip, &p->dev_register, &p->dev_offset, 4819 NULL, NULL); 4820 4821 if (e != 0) { 4822 DBG_PRINTF((category, RSM_ERR, 4823 "rsmsegshare_resume: rsmpi map err=%d\n", 4824 e)); 4825 break; 4826 } 4827 4828 if (mapped_len != p->individual_len) { 4829 DBG_PRINTF((category, RSM_ERR, 4830 "rsmsegshare_resume: rsmpi maplen" 4831 "< reqlen=%lx\n", mapped_len)); 4832 e = RSMERR_BAD_LENGTH; 4833 break; 4834 } 4835 4836 p = p->next; 4837 4838 } 4839 4840 4841 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4842 int err; 4843 /* Check if this is the first rsm_map */ 4844 if (p != sharedp->rsmsi_mapinfo) { 4845 /* 4846 * A single rsm_unmap undoes multiple rsm_maps. 4847 */ 4848 (void) seg->s_adapter->rsmpi_ops-> 4849 rsm_unmap(sharedp->rsmsi_handle); 4850 } 4851 4852 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4853 sharedp->rsmsi_mapinfo = NULL; 4854 4855 err = adapter->rsmpi_ops-> 4856 rsm_disconnect(sharedp->rsmsi_handle); 4857 4858 DBG_PRINTF((category, RSM_DEBUG, 4859 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4860 sharedp->rsmsi_segid, err)); 4861 4862 sharedp->rsmsi_handle = NULL; 4863 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4864 4865 /* signal the waiting segments */ 4866 cv_broadcast(&sharedp->rsmsi_cv); 4867 DBG_PRINTF((category, RSM_DEBUG, 4868 "rsmsegshare_resume done: rsmpi map err\n")); 4869 return (e); 4870 } 4871 } 4872 4873 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4874 4875 /* signal any waiting segment */ 4876 cv_broadcast(&sharedp->rsmsi_cv); 4877 4878 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4879 4880 return (e); 4881 } 4882 4883 /* 4884 * this is the routine that gets called by recv_taskq which is the 4885 * thread that processes messages that are flow-controlled. 4886 */ 4887 static void 4888 rsm_intr_proc_deferred(void *arg) 4889 { 4890 path_t *path = (path_t *)arg; 4891 rsmipc_request_t *msg; 4892 rsmipc_msghdr_t *msghdr; 4893 rsm_node_id_t src_node; 4894 msgbuf_elem_t *head; 4895 int e; 4896 DBG_DEFINE(category, 4897 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4898 4899 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4900 "rsm_intr_proc_deferred enter\n")); 4901 4902 mutex_enter(&path->mutex); 4903 4904 /* use the head of the msgbuf_queue */ 4905 head = rsmka_gethead_msgbuf(path); 4906 4907 mutex_exit(&path->mutex); 4908 4909 msg = (rsmipc_request_t *)&(head->msg); 4910 msghdr = (rsmipc_msghdr_t *)msg; 4911 4912 src_node = msghdr->rsmipc_src; 4913 4914 /* 4915 * messages that need to send a reply should check the message version 4916 * before processing the message. And all messages that need to 4917 * send a reply should be processed here by the worker thread. 4918 */ 4919 switch (msghdr->rsmipc_type) { 4920 case RSMIPC_MSG_SEGCONNECT: 4921 if (msghdr->rsmipc_version != RSM_VERSION) { 4922 rsmipc_reply_t reply; 4923 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4924 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4925 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4926 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4927 } else { 4928 rsm_intr_segconnect(src_node, msg); 4929 } 4930 break; 4931 case RSMIPC_MSG_DISCONNECT: 4932 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4933 break; 4934 case RSMIPC_MSG_SUSPEND: 4935 importer_suspend(src_node); 4936 break; 4937 case RSMIPC_MSG_SUSPEND_DONE: 4938 rsm_suspend_complete(src_node, 0); 4939 break; 4940 case RSMIPC_MSG_RESUME: 4941 importer_resume(src_node); 4942 break; 4943 default: 4944 ASSERT(0); 4945 } 4946 4947 mutex_enter(&path->mutex); 4948 4949 rsmka_dequeue_msgbuf(path); 4950 4951 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4952 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4953 path->procmsg_cnt++; 4954 4955 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4956 4957 /* No need to send credits if path is going down */ 4958 if ((path->state == RSMKA_PATH_ACTIVE) && 4959 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4960 /* 4961 * send credits and reset procmsg_cnt if success otherwise 4962 * credits will be sent after processing the next message 4963 */ 4964 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4965 if (e == 0) 4966 path->procmsg_cnt = 0; 4967 else 4968 DBG_PRINTF((category, RSM_ERR, 4969 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4970 } 4971 4972 /* 4973 * decrement the path refcnt since we incremented it in 4974 * rsm_intr_callback_dispatch 4975 */ 4976 PATH_RELE_NOLOCK(path); 4977 4978 mutex_exit(&path->mutex); 4979 4980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4981 "rsm_intr_proc_deferred done\n")); 4982 } 4983 4984 /* 4985 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4986 */ 4987 static void 4988 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4989 rsm_intr_hand_arg_t arg) 4990 { 4991 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4992 path_t *path; 4993 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4994 DBG_DEFINE(category, 4995 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4996 4997 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4998 "rsm_intr_callback_dispatch enter\n")); 4999 ASSERT(data && hdlr_argp); 5000 5001 /* look up the path - incr the path refcnt */ 5002 path = rsm_find_path(hdlr_argp->adapter_name, 5003 hdlr_argp->adapter_instance, src_hwaddr); 5004 5005 /* the path has been removed - drop this message */ 5006 if (path == NULL) { 5007 DBG_PRINTF((category, RSM_DEBUG, 5008 "rsm_intr_callback_dispatch done: msg dropped\n")); 5009 return; 5010 } 5011 /* the path is not active - don't accept new messages */ 5012 if (path->state != RSMKA_PATH_ACTIVE) { 5013 PATH_RELE_NOLOCK(path); 5014 mutex_exit(&path->mutex); 5015 DBG_PRINTF((category, RSM_DEBUG, 5016 "rsm_intr_callback_dispatch done: msg dropped" 5017 " path=%lx !ACTIVE\n", path)); 5018 return; 5019 } 5020 5021 /* 5022 * Check if this message was sent to an older incarnation 5023 * of the path/sendq. 5024 */ 5025 if (path->local_incn != msghdr->rsmipc_incn) { 5026 /* decrement the refcnt */ 5027 PATH_RELE_NOLOCK(path); 5028 mutex_exit(&path->mutex); 5029 DBG_PRINTF((category, RSM_DEBUG, 5030 "rsm_intr_callback_dispatch done: old incn %lld\n", 5031 msghdr->rsmipc_incn)); 5032 return; 5033 } 5034 5035 /* copy and enqueue msg on the path's msgbuf queue */ 5036 rsmka_enqueue_msgbuf(path, data); 5037 5038 /* 5039 * schedule task to process messages - ignore retval from 5040 * task_dispatch because we sender cannot send more than 5041 * what receiver can handle. 5042 */ 5043 (void) taskq_dispatch(path->recv_taskq, 5044 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5045 5046 mutex_exit(&path->mutex); 5047 5048 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5049 "rsm_intr_callback_dispatch done\n")); 5050 } 5051 5052 /* 5053 * This procedure is called from rsm_srv_func when a remote node creates a 5054 * a send queue. This event is used as a hint that an earlier failed 5055 * attempt to create a send queue to that remote node may now succeed and 5056 * should be retried. Indication of an earlier failed attempt is provided 5057 * by the RSMKA_SQCREATE_PENDING flag. 5058 */ 5059 static void 5060 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5061 { 5062 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5063 path_t *path; 5064 DBG_DEFINE(category, 5065 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5066 5067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5068 "rsm_sqcreateop_callback enter\n")); 5069 5070 /* look up the path - incr the path refcnt */ 5071 path = rsm_find_path(hdlr_argp->adapter_name, 5072 hdlr_argp->adapter_instance, src_hwaddr); 5073 5074 if (path == NULL) { 5075 DBG_PRINTF((category, RSM_DEBUG, 5076 "rsm_sqcreateop_callback done: no path\n")); 5077 return; 5078 } 5079 5080 if ((path->state == RSMKA_PATH_UP) && 5081 (path->flags & RSMKA_SQCREATE_PENDING)) { 5082 /* 5083 * previous attempt to create sendq had failed, retry 5084 * it and move to RSMKA_PATH_ACTIVE state if successful. 5085 * the refcnt will be decremented in the do_deferred_work 5086 */ 5087 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5088 } else { 5089 /* decrement the refcnt */ 5090 PATH_RELE_NOLOCK(path); 5091 } 5092 mutex_exit(&path->mutex); 5093 5094 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5095 "rsm_sqcreateop_callback done\n")); 5096 } 5097 5098 static void 5099 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5100 { 5101 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5102 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5103 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5104 rsm_node_id_t src_node; 5105 DBG_DEFINE(category, 5106 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5107 5108 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5109 "src=%d, type=%d\n", msghdr->rsmipc_src, 5110 msghdr->rsmipc_type)); 5111 5112 /* 5113 * Check for the version number in the msg header. If it is not 5114 * RSM_VERSION, drop the message. In the future, we need to manage 5115 * incompatible version numbers in some way 5116 */ 5117 if (msghdr->rsmipc_version != RSM_VERSION) { 5118 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5119 /* 5120 * Drop requests that don't have a reply right here 5121 * Request with reply will send a BAD_VERSION reply 5122 * when they get processed by the worker thread. 5123 */ 5124 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5125 return; 5126 } 5127 5128 } 5129 5130 src_node = msghdr->rsmipc_src; 5131 5132 switch (msghdr->rsmipc_type) { 5133 case RSMIPC_MSG_SEGCONNECT: 5134 case RSMIPC_MSG_DISCONNECT: 5135 case RSMIPC_MSG_SUSPEND: 5136 case RSMIPC_MSG_SUSPEND_DONE: 5137 case RSMIPC_MSG_RESUME: 5138 /* 5139 * These message types are handled by a worker thread using 5140 * the flow-control algorithm. 5141 * Any message processing that does one or more of the 5142 * following should be handled in a worker thread. 5143 * - allocates resources and might sleep 5144 * - makes RSMPI calls down to the interconnect driver 5145 * this by defn include requests with reply. 5146 * - takes a long duration of time 5147 */ 5148 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5149 break; 5150 case RSMIPC_MSG_NOTIMPORTING: 5151 importer_list_rm(src_node, msg->rsmipc_key, 5152 msg->rsmipc_segment_cookie); 5153 break; 5154 case RSMIPC_MSG_SQREADY: 5155 rsm_proc_sqready(data, src_hwaddr, arg); 5156 break; 5157 case RSMIPC_MSG_SQREADY_ACK: 5158 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5159 break; 5160 case RSMIPC_MSG_CREDIT: 5161 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5162 break; 5163 case RSMIPC_MSG_REPLY: 5164 rsm_intr_reply(msghdr); 5165 break; 5166 case RSMIPC_MSG_BELL: 5167 rsm_intr_event(msg); 5168 break; 5169 case RSMIPC_MSG_IMPORTING: 5170 importer_list_add(src_node, msg->rsmipc_key, 5171 msg->rsmipc_adapter_hwaddr, 5172 msg->rsmipc_segment_cookie); 5173 break; 5174 case RSMIPC_MSG_REPUBLISH: 5175 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5176 break; 5177 default: 5178 DBG_PRINTF((category, RSM_DEBUG, 5179 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5180 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5181 } 5182 5183 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5184 5185 } 5186 5187 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5188 rsm_intr_q_op_t opcode, rsm_addr_t src, 5189 void *data, size_t size, rsm_intr_hand_arg_t arg) 5190 { 5191 DBG_DEFINE(category, 5192 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5193 5194 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5195 5196 switch (opcode) { 5197 case RSM_INTR_Q_OP_CREATE: 5198 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5199 rsm_sqcreateop_callback(src, arg); 5200 break; 5201 case RSM_INTR_Q_OP_DESTROY: 5202 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5203 break; 5204 case RSM_INTR_Q_OP_RECEIVE: 5205 rsm_intr_callback(data, src, arg); 5206 break; 5207 default: 5208 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5209 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5210 } 5211 5212 chd = chd; 5213 size = size; 5214 5215 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5216 5217 return (RSM_INTR_HAND_CLAIMED); 5218 } 5219 5220 /* *************************** IPC slots ************************* */ 5221 static rsmipc_slot_t * 5222 rsmipc_alloc() 5223 { 5224 int i; 5225 rsmipc_slot_t *slot; 5226 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5227 5228 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5229 5230 /* try to find a free slot, if not wait */ 5231 mutex_enter(&rsm_ipc.lock); 5232 5233 while (rsm_ipc.count == 0) { 5234 rsm_ipc.wanted = 1; 5235 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5236 } 5237 5238 /* An empty slot is available, find it */ 5239 slot = &rsm_ipc.slots[0]; 5240 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5241 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5242 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5243 break; 5244 } 5245 } 5246 5247 ASSERT(i < RSMIPC_SZ); 5248 rsm_ipc.count--; /* one less is available */ 5249 rsm_ipc.sequence++; /* new sequence */ 5250 5251 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5252 slot->rsmipc_cookie.ic.index = (uint_t)i; 5253 5254 mutex_exit(&rsm_ipc.lock); 5255 5256 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5257 5258 return (slot); 5259 } 5260 5261 static void 5262 rsmipc_free(rsmipc_slot_t *slot) 5263 { 5264 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5265 5266 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5267 5268 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5269 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5270 5271 mutex_enter(&rsm_ipc.lock); 5272 5273 RSMIPC_SET(slot, RSMIPC_FREE); 5274 5275 slot->rsmipc_cookie.ic.sequence = 0; 5276 5277 mutex_exit(&slot->rsmipc_lock); 5278 rsm_ipc.count++; 5279 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5280 if (rsm_ipc.wanted) { 5281 rsm_ipc.wanted = 0; 5282 cv_broadcast(&rsm_ipc.cv); 5283 } 5284 5285 mutex_exit(&rsm_ipc.lock); 5286 5287 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5288 } 5289 5290 static int 5291 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5292 { 5293 int e = 0; 5294 int credit_check = 0; 5295 int retry_cnt = 0; 5296 int min_retry_cnt = 10; 5297 clock_t ticks; 5298 rsm_send_t is; 5299 rsmipc_slot_t *rslot; 5300 adapter_t *adapter; 5301 path_t *path; 5302 sendq_token_t *sendq_token; 5303 sendq_token_t *used_sendq_token = NULL; 5304 rsm_send_q_handle_t ipc_handle; 5305 DBG_DEFINE(category, 5306 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5307 5308 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5309 dest)); 5310 5311 /* 5312 * Check if this is a local case 5313 */ 5314 if (dest == my_nodeid) { 5315 switch (req->rsmipc_hdr.rsmipc_type) { 5316 case RSMIPC_MSG_SEGCONNECT: 5317 reply->rsmipc_status = (short)rsmsegacl_validate( 5318 req, dest, reply); 5319 break; 5320 case RSMIPC_MSG_BELL: 5321 req->rsmipc_hdr.rsmipc_src = dest; 5322 rsm_intr_event(req); 5323 break; 5324 case RSMIPC_MSG_IMPORTING: 5325 importer_list_add(dest, req->rsmipc_key, 5326 req->rsmipc_adapter_hwaddr, 5327 req->rsmipc_segment_cookie); 5328 break; 5329 case RSMIPC_MSG_NOTIMPORTING: 5330 importer_list_rm(dest, req->rsmipc_key, 5331 req->rsmipc_segment_cookie); 5332 break; 5333 case RSMIPC_MSG_REPUBLISH: 5334 importer_update(dest, req->rsmipc_key, 5335 req->rsmipc_perm); 5336 break; 5337 case RSMIPC_MSG_SUSPEND: 5338 importer_suspend(dest); 5339 break; 5340 case RSMIPC_MSG_SUSPEND_DONE: 5341 rsm_suspend_complete(dest, 0); 5342 break; 5343 case RSMIPC_MSG_RESUME: 5344 importer_resume(dest); 5345 break; 5346 default: 5347 ASSERT(0); 5348 } 5349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5350 "rsmipc_send done\n")); 5351 return (0); 5352 } 5353 5354 if (dest >= MAX_NODES) { 5355 DBG_PRINTF((category, RSM_ERR, 5356 "rsm: rsmipc_send bad node number %x\n", dest)); 5357 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5358 } 5359 5360 /* 5361 * Oh boy! we are going remote. 5362 */ 5363 5364 /* 5365 * identify if we need to have credits to send this message 5366 * - only selected requests are flow controlled 5367 */ 5368 if (req != NULL) { 5369 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5370 "rsmipc_send:request type=%d\n", 5371 req->rsmipc_hdr.rsmipc_type)); 5372 5373 switch (req->rsmipc_hdr.rsmipc_type) { 5374 case RSMIPC_MSG_SEGCONNECT: 5375 case RSMIPC_MSG_DISCONNECT: 5376 case RSMIPC_MSG_IMPORTING: 5377 case RSMIPC_MSG_SUSPEND: 5378 case RSMIPC_MSG_SUSPEND_DONE: 5379 case RSMIPC_MSG_RESUME: 5380 credit_check = 1; 5381 break; 5382 default: 5383 credit_check = 0; 5384 } 5385 } 5386 5387 again: 5388 if (retry_cnt++ == min_retry_cnt) { 5389 /* backoff before further retries for 10ms */ 5390 delay(drv_usectohz(10000)); 5391 retry_cnt = 0; /* reset retry_cnt */ 5392 } 5393 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5394 if (sendq_token == NULL) { 5395 DBG_PRINTF((category, RSM_ERR, 5396 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5397 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5398 } 5399 5400 if ((sendq_token == used_sendq_token) && 5401 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5402 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5403 rele_sendq_token(sendq_token); 5404 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5405 return (RSMERR_CONN_ABORTED); 5406 } else 5407 used_sendq_token = sendq_token; 5408 5409 /* lint -save -e413 */ 5410 path = SQ_TOKEN_TO_PATH(sendq_token); 5411 adapter = path->local_adapter; 5412 /* lint -restore */ 5413 ipc_handle = sendq_token->rsmpi_sendq_handle; 5414 5415 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5416 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5417 5418 if (reply == NULL) { 5419 /* Send request without ack */ 5420 /* 5421 * Set the rsmipc_version number in the msghdr for KA 5422 * communication versioning 5423 */ 5424 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5425 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5426 /* 5427 * remote endpoints incn should match the value in our 5428 * path's remote_incn field. No need to grab any lock 5429 * since we have refcnted the path in rsmka_get_sendq_token 5430 */ 5431 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5432 5433 is.is_data = (void *)req; 5434 is.is_size = sizeof (*req); 5435 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5436 is.is_wait = 0; 5437 5438 if (credit_check) { 5439 mutex_enter(&path->mutex); 5440 /* 5441 * wait till we recv credits or path goes down. If path 5442 * goes down rsm_send will fail and we handle the error 5443 * then 5444 */ 5445 while ((sendq_token->msgbuf_avail == 0) && 5446 (path->state == RSMKA_PATH_ACTIVE)) { 5447 e = cv_wait_sig(&sendq_token->sendq_cv, 5448 &path->mutex); 5449 if (e == 0) { 5450 mutex_exit(&path->mutex); 5451 no_reply_cnt++; 5452 rele_sendq_token(sendq_token); 5453 DBG_PRINTF((category, RSM_DEBUG, 5454 "rsmipc_send done: " 5455 "cv_wait INTERRUPTED")); 5456 return (RSMERR_INTERRUPTED); 5457 } 5458 } 5459 5460 /* 5461 * path is not active retry on another path. 5462 */ 5463 if (path->state != RSMKA_PATH_ACTIVE) { 5464 mutex_exit(&path->mutex); 5465 rele_sendq_token(sendq_token); 5466 e = RSMERR_CONN_ABORTED; 5467 DBG_PRINTF((category, RSM_ERR, 5468 "rsm: rsmipc_send: path !ACTIVE")); 5469 goto again; 5470 } 5471 5472 ASSERT(sendq_token->msgbuf_avail > 0); 5473 5474 /* 5475 * reserve a msgbuf 5476 */ 5477 sendq_token->msgbuf_avail--; 5478 5479 mutex_exit(&path->mutex); 5480 5481 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5482 NULL); 5483 5484 if (e != RSM_SUCCESS) { 5485 mutex_enter(&path->mutex); 5486 /* 5487 * release the reserved msgbuf since 5488 * the send failed 5489 */ 5490 sendq_token->msgbuf_avail++; 5491 cv_broadcast(&sendq_token->sendq_cv); 5492 mutex_exit(&path->mutex); 5493 } 5494 } else 5495 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5496 NULL); 5497 5498 no_reply_cnt++; 5499 rele_sendq_token(sendq_token); 5500 if (e != RSM_SUCCESS) { 5501 DBG_PRINTF((category, RSM_ERR, 5502 "rsm: rsmipc_send no reply send" 5503 " err = %d no reply count = %d\n", 5504 e, no_reply_cnt)); 5505 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5506 e != RSMERR_BAD_BARRIER_HNDL); 5507 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5508 goto again; 5509 } else { 5510 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5511 "rsmipc_send done\n")); 5512 return (e); 5513 } 5514 5515 } 5516 5517 if (req == NULL) { 5518 /* Send reply - No flow control is done for reply */ 5519 /* 5520 * Set the version in the msg header for KA communication 5521 * versioning 5522 */ 5523 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5524 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5525 /* incn number is not used for reply msgs currently */ 5526 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5527 5528 is.is_data = (void *)reply; 5529 is.is_size = sizeof (*reply); 5530 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5531 is.is_wait = 0; 5532 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5533 rele_sendq_token(sendq_token); 5534 if (e != RSM_SUCCESS) { 5535 DBG_PRINTF((category, RSM_ERR, 5536 "rsm: rsmipc_send reply send" 5537 " err = %d\n", e)); 5538 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5539 goto again; 5540 } else { 5541 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5542 "rsmipc_send done\n")); 5543 return (e); 5544 } 5545 } 5546 5547 /* Reply needed */ 5548 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5549 5550 mutex_enter(&rslot->rsmipc_lock); 5551 5552 rslot->rsmipc_data = (void *)reply; 5553 RSMIPC_SET(rslot, RSMIPC_PENDING); 5554 5555 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5556 /* 5557 * Set the rsmipc_version number in the msghdr for KA 5558 * communication versioning 5559 */ 5560 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5561 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5562 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5563 /* 5564 * remote endpoints incn should match the value in our 5565 * path's remote_incn field. No need to grab any lock 5566 * since we have refcnted the path in rsmka_get_sendq_token 5567 */ 5568 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5569 5570 is.is_data = (void *)req; 5571 is.is_size = sizeof (*req); 5572 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5573 is.is_wait = 0; 5574 if (credit_check) { 5575 5576 mutex_enter(&path->mutex); 5577 /* 5578 * wait till we recv credits or path goes down. If path 5579 * goes down rsm_send will fail and we handle the error 5580 * then. 5581 */ 5582 while ((sendq_token->msgbuf_avail == 0) && 5583 (path->state == RSMKA_PATH_ACTIVE)) { 5584 e = cv_wait_sig(&sendq_token->sendq_cv, 5585 &path->mutex); 5586 if (e == 0) { 5587 mutex_exit(&path->mutex); 5588 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5589 rsmipc_free(rslot); 5590 rele_sendq_token(sendq_token); 5591 DBG_PRINTF((category, RSM_DEBUG, 5592 "rsmipc_send done: " 5593 "cv_wait INTERRUPTED")); 5594 return (RSMERR_INTERRUPTED); 5595 } 5596 } 5597 5598 /* 5599 * path is not active retry on another path. 5600 */ 5601 if (path->state != RSMKA_PATH_ACTIVE) { 5602 mutex_exit(&path->mutex); 5603 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5604 rsmipc_free(rslot); 5605 rele_sendq_token(sendq_token); 5606 e = RSMERR_CONN_ABORTED; 5607 DBG_PRINTF((category, RSM_ERR, 5608 "rsm: rsmipc_send: path !ACTIVE")); 5609 goto again; 5610 } 5611 5612 ASSERT(sendq_token->msgbuf_avail > 0); 5613 5614 /* 5615 * reserve a msgbuf 5616 */ 5617 sendq_token->msgbuf_avail--; 5618 5619 mutex_exit(&path->mutex); 5620 5621 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5622 NULL); 5623 5624 if (e != RSM_SUCCESS) { 5625 mutex_enter(&path->mutex); 5626 /* 5627 * release the reserved msgbuf since 5628 * the send failed 5629 */ 5630 sendq_token->msgbuf_avail++; 5631 cv_broadcast(&sendq_token->sendq_cv); 5632 mutex_exit(&path->mutex); 5633 } 5634 } else 5635 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5636 NULL); 5637 5638 if (e != RSM_SUCCESS) { 5639 DBG_PRINTF((category, RSM_ERR, 5640 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5641 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5642 rsmipc_free(rslot); 5643 rele_sendq_token(sendq_token); 5644 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5645 goto again; 5646 } 5647 5648 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5649 (void) drv_getparm(LBOLT, &ticks); 5650 ticks += drv_usectohz(5000000); 5651 e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5652 ticks); 5653 if (e < 0) { 5654 /* timed out - retry */ 5655 e = RSMERR_TIMEOUT; 5656 } else if (e == 0) { 5657 /* signalled - return error */ 5658 e = RSMERR_INTERRUPTED; 5659 break; 5660 } else { 5661 e = RSM_SUCCESS; 5662 } 5663 } 5664 5665 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5666 rsmipc_free(rslot); 5667 rele_sendq_token(sendq_token); 5668 5669 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5670 return (e); 5671 } 5672 5673 static int 5674 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5675 { 5676 rsmipc_request_t request; 5677 5678 /* 5679 * inform the exporter to delete this importer 5680 */ 5681 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5682 request.rsmipc_key = segid; 5683 request.rsmipc_segment_cookie = cookie; 5684 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5685 } 5686 5687 static void 5688 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5689 int acl_len, rsm_permission_t default_permission) 5690 { 5691 int i; 5692 importing_token_t *token; 5693 rsmipc_request_t request; 5694 republish_token_t *republish_list = NULL; 5695 republish_token_t *rp; 5696 rsm_permission_t permission; 5697 int index; 5698 5699 /* 5700 * send the new access mode to all the nodes that have imported 5701 * this segment. 5702 * If the new acl does not have a node that was present in 5703 * the old acl a access permission of 0 is sent. 5704 */ 5705 5706 index = rsmhash(segid); 5707 5708 /* 5709 * create a list of node/permissions to send the republish message 5710 */ 5711 mutex_enter(&importer_list.lock); 5712 5713 token = importer_list.bucket[index]; 5714 while (token != NULL) { 5715 if (segid == token->key) { 5716 permission = default_permission; 5717 5718 for (i = 0; i < acl_len; i++) { 5719 if (token->importing_node == acl[i].ae_node) { 5720 permission = acl[i].ae_permission; 5721 break; 5722 } 5723 } 5724 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5725 5726 rp->key = segid; 5727 rp->importing_node = token->importing_node; 5728 rp->permission = permission; 5729 rp->next = republish_list; 5730 republish_list = rp; 5731 } 5732 token = token->next; 5733 } 5734 5735 mutex_exit(&importer_list.lock); 5736 5737 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5738 request.rsmipc_key = segid; 5739 5740 while (republish_list != NULL) { 5741 request.rsmipc_perm = republish_list->permission; 5742 (void) rsmipc_send(republish_list->importing_node, 5743 &request, RSM_NO_REPLY); 5744 rp = republish_list; 5745 republish_list = republish_list->next; 5746 kmem_free(rp, sizeof (republish_token_t)); 5747 } 5748 } 5749 5750 static void 5751 rsm_send_suspend() 5752 { 5753 int i, e; 5754 rsmipc_request_t request; 5755 list_element_t *tokp; 5756 list_element_t *head = NULL; 5757 importing_token_t *token; 5758 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5759 "rsm_send_suspend enter\n")); 5760 5761 /* 5762 * create a list of node to send the suspend message 5763 * 5764 * Currently the whole importer list is scanned and we obtain 5765 * all the nodes - this basically gets all nodes that at least 5766 * import one segment from the local node. 5767 * 5768 * no need to grab the rsm_suspend_list lock here since we are 5769 * single threaded when suspend is called. 5770 */ 5771 5772 mutex_enter(&importer_list.lock); 5773 for (i = 0; i < rsm_hash_size; i++) { 5774 5775 token = importer_list.bucket[i]; 5776 5777 while (token != NULL) { 5778 5779 tokp = head; 5780 5781 /* 5782 * make sure that the token's node 5783 * is not already on the suspend list 5784 */ 5785 while (tokp != NULL) { 5786 if (tokp->nodeid == token->importing_node) { 5787 break; 5788 } 5789 tokp = tokp->next; 5790 } 5791 5792 if (tokp == NULL) { /* not in suspend list */ 5793 tokp = kmem_zalloc(sizeof (list_element_t), 5794 KM_SLEEP); 5795 tokp->nodeid = token->importing_node; 5796 tokp->next = head; 5797 head = tokp; 5798 } 5799 5800 token = token->next; 5801 } 5802 } 5803 mutex_exit(&importer_list.lock); 5804 5805 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5806 exporter_quiesce(); 5807 return; 5808 } 5809 5810 mutex_enter(&rsm_suspend_list.list_lock); 5811 ASSERT(rsm_suspend_list.list_head == NULL); 5812 /* 5813 * update the suspend list righaway so that if a node dies the 5814 * pathmanager can set the NODE dead flag 5815 */ 5816 rsm_suspend_list.list_head = head; 5817 mutex_exit(&rsm_suspend_list.list_lock); 5818 5819 tokp = head; 5820 5821 while (tokp != NULL) { 5822 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5823 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5824 /* 5825 * Error in rsmipc_send currently happens due to inaccessibility 5826 * of the remote node. 5827 */ 5828 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5829 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5830 } 5831 5832 tokp = tokp->next; 5833 } 5834 5835 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5836 "rsm_send_suspend done\n")); 5837 5838 } 5839 5840 static void 5841 rsm_send_resume() 5842 { 5843 rsmipc_request_t request; 5844 list_element_t *elem, *head; 5845 5846 /* 5847 * save the suspend list so that we know where to send 5848 * the resume messages and make the suspend list head 5849 * NULL. 5850 */ 5851 mutex_enter(&rsm_suspend_list.list_lock); 5852 head = rsm_suspend_list.list_head; 5853 rsm_suspend_list.list_head = NULL; 5854 mutex_exit(&rsm_suspend_list.list_lock); 5855 5856 while (head != NULL) { 5857 elem = head; 5858 head = head->next; 5859 5860 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5861 5862 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5863 5864 kmem_free((void *)elem, sizeof (list_element_t)); 5865 5866 } 5867 5868 } 5869 5870 /* 5871 * This function takes path and sends a message using the sendq 5872 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5873 * and RSMIPC_MSG_CREDIT are sent using this function. 5874 */ 5875 int 5876 rsmipc_send_controlmsg(path_t *path, int msgtype) 5877 { 5878 int e; 5879 int retry_cnt = 0; 5880 int min_retry_cnt = 10; 5881 clock_t timeout; 5882 adapter_t *adapter; 5883 rsm_send_t is; 5884 rsm_send_q_handle_t ipc_handle; 5885 rsmipc_controlmsg_t msg; 5886 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5887 5888 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5889 "rsmipc_send_controlmsg enter\n")); 5890 5891 ASSERT(MUTEX_HELD(&path->mutex)); 5892 5893 adapter = path->local_adapter; 5894 5895 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5896 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5897 my_nodeid, adapter->hwaddr, path->remote_node, 5898 path->remote_hwaddr, path->procmsg_cnt)); 5899 5900 if (path->state != RSMKA_PATH_ACTIVE) { 5901 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5902 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5903 return (1); 5904 } 5905 5906 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5907 5908 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5909 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5910 msg.rsmipc_hdr.rsmipc_type = msgtype; 5911 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5912 5913 if (msgtype == RSMIPC_MSG_CREDIT) 5914 msg.rsmipc_credits = path->procmsg_cnt; 5915 5916 msg.rsmipc_local_incn = path->local_incn; 5917 5918 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5919 /* incr the sendq, path refcnt */ 5920 PATH_HOLD_NOLOCK(path); 5921 SENDQ_TOKEN_HOLD(path); 5922 5923 do { 5924 /* drop the path lock before doing the rsm_send */ 5925 mutex_exit(&path->mutex); 5926 5927 is.is_data = (void *)&msg; 5928 is.is_size = sizeof (msg); 5929 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5930 is.is_wait = 0; 5931 5932 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5933 5934 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5935 e != RSMERR_BAD_BARRIER_HNDL); 5936 5937 mutex_enter(&path->mutex); 5938 5939 if (e == RSM_SUCCESS) { 5940 break; 5941 } 5942 /* error counter for statistics */ 5943 atomic_add_64(&rsm_ctrlmsg_errcnt, 1); 5944 5945 DBG_PRINTF((category, RSM_ERR, 5946 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5947 5948 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5949 timeout = ddi_get_lbolt() + drv_usectohz(10000); 5950 (void) cv_timedwait(&path->sendq_token.sendq_cv, 5951 &path->mutex, timeout); 5952 retry_cnt = 0; 5953 } 5954 } while (path->state == RSMKA_PATH_ACTIVE); 5955 5956 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5957 SENDQ_TOKEN_RELE(path); 5958 PATH_RELE_NOLOCK(path); 5959 5960 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5961 "rsmipc_send_controlmsg done=%d", e)); 5962 return (e); 5963 } 5964 5965 /* 5966 * Called from rsm_force_unload and path_importer_disconnect. The memory 5967 * mapping for the imported segment is removed and the segment is 5968 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5969 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5970 * and FALSE from rsm_rebind. 5971 * 5972 * When subsequent accesses cause page faulting, the dummy page is mapped 5973 * to resolve the fault, and the mapping generation number is incremented 5974 * so that the application can be notified on a close barrier operation. 5975 * 5976 * It is important to note that the caller of rsmseg_unload is responsible for 5977 * acquiring the segment lock before making a call to rsmseg_unload. This is 5978 * required to make the caller and rsmseg_unload thread safe. The segment lock 5979 * will be released by the rsmseg_unload function. 5980 */ 5981 void 5982 rsmseg_unload(rsmseg_t *im_seg) 5983 { 5984 rsmcookie_t *hdl; 5985 void *shared_cookie; 5986 rsmipc_request_t request; 5987 uint_t maxprot; 5988 5989 DBG_DEFINE(category, 5990 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5991 5992 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5993 5994 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5995 5996 /* wait until segment leaves the mapping state */ 5997 while (im_seg->s_state == RSM_STATE_MAPPING) 5998 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 5999 /* 6000 * An unload is only necessary if the segment is connected. However, 6001 * if the segment was on the import list in state RSM_STATE_CONNECTING 6002 * then a connection was in progress. Change to RSM_STATE_NEW 6003 * here to cause an early exit from the connection process. 6004 */ 6005 if (im_seg->s_state == RSM_STATE_NEW) { 6006 rsmseglock_release(im_seg); 6007 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6008 "rsmseg_unload done: RSM_STATE_NEW\n")); 6009 return; 6010 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6011 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6012 rsmsharelock_acquire(im_seg); 6013 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6014 rsmsharelock_release(im_seg); 6015 rsmseglock_release(im_seg); 6016 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6017 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6018 return; 6019 } 6020 6021 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6022 if (im_seg->s_ckl != NULL) { 6023 int e; 6024 /* Setup protections for remap */ 6025 maxprot = PROT_USER; 6026 if (im_seg->s_mode & RSM_PERM_READ) { 6027 maxprot |= PROT_READ; 6028 } 6029 if (im_seg->s_mode & RSM_PERM_WRITE) { 6030 maxprot |= PROT_WRITE; 6031 } 6032 hdl = im_seg->s_ckl; 6033 for (; hdl != NULL; hdl = hdl->c_next) { 6034 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6035 remap_cookie, 6036 hdl->c_off, hdl->c_len, 6037 maxprot, 0, NULL); 6038 6039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6040 "remap returns %d\n", e)); 6041 } 6042 } 6043 6044 (void) rsm_closeconnection(im_seg, &shared_cookie); 6045 6046 if (shared_cookie != NULL) { 6047 /* 6048 * inform the exporting node so this import 6049 * can be deleted from the list of importers. 6050 */ 6051 request.rsmipc_hdr.rsmipc_type = 6052 RSMIPC_MSG_NOTIMPORTING; 6053 request.rsmipc_key = im_seg->s_segid; 6054 request.rsmipc_segment_cookie = shared_cookie; 6055 rsmseglock_release(im_seg); 6056 (void) rsmipc_send(im_seg->s_node, &request, 6057 RSM_NO_REPLY); 6058 } else { 6059 rsmseglock_release(im_seg); 6060 } 6061 } 6062 else 6063 rsmseglock_release(im_seg); 6064 6065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6066 6067 } 6068 6069 /* ****************************** Importer Calls ************************ */ 6070 6071 static int 6072 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6073 { 6074 int shifts = 0; 6075 6076 if (crgetuid(cr) != owner) { 6077 shifts += 3; 6078 if (!groupmember(group, cr)) 6079 shifts += 3; 6080 } 6081 6082 mode &= ~(perm << shifts); 6083 6084 if (mode == 0) 6085 return (0); 6086 6087 return (secpolicy_rsm_access(cr, owner, mode)); 6088 } 6089 6090 6091 static int 6092 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6093 intptr_t dataptr, int mode) 6094 { 6095 int e; 6096 int recheck_state = 0; 6097 void *shared_cookie; 6098 rsmipc_request_t request; 6099 rsmipc_reply_t reply; 6100 rsm_permission_t access; 6101 adapter_t *adapter; 6102 rsm_addr_t addr = 0; 6103 rsm_import_share_t *sharedp; 6104 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6105 6106 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6107 6108 adapter = rsm_getadapter(msg, mode); 6109 if (adapter == NULL) { 6110 DBG_PRINTF((category, RSM_ERR, 6111 "rsm_connect done:ENODEV adapter=NULL\n")); 6112 return (RSMERR_CTLR_NOT_PRESENT); 6113 } 6114 6115 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6116 rsmka_release_adapter(adapter); 6117 DBG_PRINTF((category, RSM_ERR, 6118 "rsm_connect done:ENODEV loopback\n")); 6119 return (RSMERR_CTLR_NOT_PRESENT); 6120 } 6121 6122 6123 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6124 ASSERT(seg->s_state == RSM_STATE_NEW); 6125 6126 /* 6127 * Translate perm to access 6128 */ 6129 if (msg->perm & ~RSM_PERM_RDWR) { 6130 rsmka_release_adapter(adapter); 6131 DBG_PRINTF((category, RSM_ERR, 6132 "rsm_connect done:EINVAL invalid perms\n")); 6133 return (RSMERR_BAD_PERMS); 6134 } 6135 access = 0; 6136 if (msg->perm & RSM_PERM_READ) 6137 access |= RSM_ACCESS_READ; 6138 if (msg->perm & RSM_PERM_WRITE) 6139 access |= RSM_ACCESS_WRITE; 6140 6141 seg->s_node = msg->nodeid; 6142 6143 /* 6144 * Adding to the import list locks the segment; release the segment 6145 * lock so we can get the reply for the send. 6146 */ 6147 e = rsmimport_add(seg, msg->key); 6148 if (e) { 6149 rsmka_release_adapter(adapter); 6150 DBG_PRINTF((category, RSM_ERR, 6151 "rsm_connect done:rsmimport_add failed %d\n", e)); 6152 return (e); 6153 } 6154 seg->s_state = RSM_STATE_CONNECTING; 6155 6156 /* 6157 * Set the s_adapter field here so as to have a valid comparison of 6158 * the adapter and the s_adapter value during rsmshare_get. For 6159 * any error, set s_adapter to NULL before doing a release_adapter 6160 */ 6161 seg->s_adapter = adapter; 6162 6163 rsmseglock_release(seg); 6164 6165 /* 6166 * get the pointer to the shared data structure; the 6167 * shared data is locked and refcount has been incremented 6168 */ 6169 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6170 6171 ASSERT(rsmsharelock_held(seg)); 6172 6173 do { 6174 /* flag indicates whether we need to recheck the state */ 6175 recheck_state = 0; 6176 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6177 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6178 switch (sharedp->rsmsi_state) { 6179 case RSMSI_STATE_NEW: 6180 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6181 break; 6182 case RSMSI_STATE_CONNECTING: 6183 /* FALLTHRU */ 6184 case RSMSI_STATE_CONN_QUIESCE: 6185 /* FALLTHRU */ 6186 case RSMSI_STATE_MAP_QUIESCE: 6187 /* wait for the state to change */ 6188 while ((sharedp->rsmsi_state == 6189 RSMSI_STATE_CONNECTING) || 6190 (sharedp->rsmsi_state == 6191 RSMSI_STATE_CONN_QUIESCE) || 6192 (sharedp->rsmsi_state == 6193 RSMSI_STATE_MAP_QUIESCE)) { 6194 if (cv_wait_sig(&sharedp->rsmsi_cv, 6195 &sharedp->rsmsi_lock) == 0) { 6196 /* signalled - clean up and return */ 6197 rsmsharelock_release(seg); 6198 rsmimport_rm(seg); 6199 seg->s_adapter = NULL; 6200 rsmka_release_adapter(adapter); 6201 seg->s_state = RSM_STATE_NEW; 6202 DBG_PRINTF((category, RSM_ERR, 6203 "rsm_connect done: INTERRUPTED\n")); 6204 return (RSMERR_INTERRUPTED); 6205 } 6206 } 6207 /* 6208 * the state changed, loop back and check what it is 6209 */ 6210 recheck_state = 1; 6211 break; 6212 case RSMSI_STATE_ABORT_CONNECT: 6213 /* exit the loop and clean up further down */ 6214 break; 6215 case RSMSI_STATE_CONNECTED: 6216 /* already connected, good - fall through */ 6217 case RSMSI_STATE_MAPPED: 6218 /* already mapped, wow - fall through */ 6219 /* access validation etc is done further down */ 6220 break; 6221 case RSMSI_STATE_DISCONNECTED: 6222 /* disconnected - so reconnect now */ 6223 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6224 break; 6225 default: 6226 ASSERT(0); /* Invalid State */ 6227 } 6228 } while (recheck_state); 6229 6230 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6231 /* we are the first to connect */ 6232 rsmsharelock_release(seg); 6233 6234 if (msg->nodeid != my_nodeid) { 6235 addr = get_remote_hwaddr(adapter, msg->nodeid); 6236 6237 if ((int64_t)addr < 0) { 6238 rsmsharelock_acquire(seg); 6239 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6240 RSMSI_STATE_NEW); 6241 rsmsharelock_release(seg); 6242 rsmimport_rm(seg); 6243 seg->s_adapter = NULL; 6244 rsmka_release_adapter(adapter); 6245 seg->s_state = RSM_STATE_NEW; 6246 DBG_PRINTF((category, RSM_ERR, 6247 "rsm_connect done: hwaddr<0\n")); 6248 return (RSMERR_INTERNAL_ERROR); 6249 } 6250 } else { 6251 addr = adapter->hwaddr; 6252 } 6253 6254 /* 6255 * send request to node [src, dest, key, msgid] and get back 6256 * [status, msgid, cookie] 6257 */ 6258 request.rsmipc_key = msg->key; 6259 /* 6260 * we need the s_mode of the exporter so pass 6261 * RSM_ACCESS_TRUSTED 6262 */ 6263 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6264 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6265 request.rsmipc_adapter_hwaddr = addr; 6266 request.rsmipc_segment_cookie = sharedp; 6267 6268 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6269 if (e) { 6270 rsmsharelock_acquire(seg); 6271 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6272 RSMSI_STATE_NEW); 6273 rsmsharelock_release(seg); 6274 rsmimport_rm(seg); 6275 seg->s_adapter = NULL; 6276 rsmka_release_adapter(adapter); 6277 seg->s_state = RSM_STATE_NEW; 6278 DBG_PRINTF((category, RSM_ERR, 6279 "rsm_connect done:rsmipc_send failed %d\n", e)); 6280 return (e); 6281 } 6282 6283 if (reply.rsmipc_status != RSM_SUCCESS) { 6284 rsmsharelock_acquire(seg); 6285 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6286 RSMSI_STATE_NEW); 6287 rsmsharelock_release(seg); 6288 rsmimport_rm(seg); 6289 seg->s_adapter = NULL; 6290 rsmka_release_adapter(adapter); 6291 seg->s_state = RSM_STATE_NEW; 6292 DBG_PRINTF((category, RSM_ERR, 6293 "rsm_connect done:rsmipc_send reply err %d\n", 6294 reply.rsmipc_status)); 6295 return (reply.rsmipc_status); 6296 } 6297 6298 rsmsharelock_acquire(seg); 6299 /* store the information recvd into the shared data struct */ 6300 sharedp->rsmsi_mode = reply.rsmipc_mode; 6301 sharedp->rsmsi_uid = reply.rsmipc_uid; 6302 sharedp->rsmsi_gid = reply.rsmipc_gid; 6303 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6304 sharedp->rsmsi_cookie = sharedp; 6305 } 6306 6307 rsmsharelock_release(seg); 6308 6309 /* 6310 * Get the segment lock and check for a force disconnect 6311 * from the export side which would have changed the state 6312 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6313 * force disconnect will be held off until the connection 6314 * has completed. 6315 */ 6316 rsmseglock_acquire(seg); 6317 rsmsharelock_acquire(seg); 6318 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6319 seg->s_state == RSM_STATE_ABORT_CONNECT); 6320 6321 shared_cookie = sharedp->rsmsi_cookie; 6322 6323 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6324 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6325 seg->s_state = RSM_STATE_NEW; 6326 seg->s_adapter = NULL; 6327 rsmsharelock_release(seg); 6328 rsmseglock_release(seg); 6329 rsmimport_rm(seg); 6330 rsmka_release_adapter(adapter); 6331 6332 rsmsharelock_acquire(seg); 6333 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6334 /* 6335 * set a flag indicating abort handling has been 6336 * done 6337 */ 6338 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6339 rsmsharelock_release(seg); 6340 /* send a message to exporter - only once */ 6341 (void) rsm_send_notimporting(msg->nodeid, 6342 msg->key, shared_cookie); 6343 rsmsharelock_acquire(seg); 6344 /* 6345 * wake up any waiting importers and inform that 6346 * connection has been aborted 6347 */ 6348 cv_broadcast(&sharedp->rsmsi_cv); 6349 } 6350 rsmsharelock_release(seg); 6351 6352 DBG_PRINTF((category, RSM_ERR, 6353 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6354 return (RSMERR_INTERRUPTED); 6355 } 6356 6357 6358 /* 6359 * We need to verify that this process has access 6360 */ 6361 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6362 access & sharedp->rsmsi_mode, 6363 (int)(msg->perm & RSM_PERM_RDWR), cred); 6364 if (e) { 6365 rsmsharelock_release(seg); 6366 seg->s_state = RSM_STATE_NEW; 6367 seg->s_adapter = NULL; 6368 rsmseglock_release(seg); 6369 rsmimport_rm(seg); 6370 rsmka_release_adapter(adapter); 6371 /* 6372 * No need to lock segment it has been removed 6373 * from the hash table 6374 */ 6375 rsmsharelock_acquire(seg); 6376 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6377 rsmsharelock_release(seg); 6378 /* this is the first importer */ 6379 6380 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6381 shared_cookie); 6382 rsmsharelock_acquire(seg); 6383 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6384 cv_broadcast(&sharedp->rsmsi_cv); 6385 } 6386 rsmsharelock_release(seg); 6387 6388 DBG_PRINTF((category, RSM_ERR, 6389 "rsm_connect done: ipcaccess failed\n")); 6390 return (RSMERR_PERM_DENIED); 6391 } 6392 6393 /* update state and cookie */ 6394 seg->s_segid = sharedp->rsmsi_segid; 6395 seg->s_len = sharedp->rsmsi_seglen; 6396 seg->s_mode = access & sharedp->rsmsi_mode; 6397 seg->s_pid = ddi_get_pid(); 6398 seg->s_mapinfo = NULL; 6399 6400 if (seg->s_node != my_nodeid) { 6401 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6402 e = adapter->rsmpi_ops->rsm_connect( 6403 adapter->rsmpi_handle, 6404 addr, seg->s_segid, &sharedp->rsmsi_handle); 6405 6406 if (e != RSM_SUCCESS) { 6407 seg->s_state = RSM_STATE_NEW; 6408 seg->s_adapter = NULL; 6409 rsmsharelock_release(seg); 6410 rsmseglock_release(seg); 6411 rsmimport_rm(seg); 6412 rsmka_release_adapter(adapter); 6413 /* 6414 * inform the exporter to delete this importer 6415 */ 6416 (void) rsm_send_notimporting(msg->nodeid, 6417 msg->key, shared_cookie); 6418 6419 /* 6420 * Now inform any waiting importers to 6421 * retry connect. This needs to be done 6422 * after sending notimporting so that 6423 * the notimporting is sent before a waiting 6424 * importer sends a segconnect while retrying 6425 * 6426 * No need to lock segment it has been removed 6427 * from the hash table 6428 */ 6429 6430 rsmsharelock_acquire(seg); 6431 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6432 cv_broadcast(&sharedp->rsmsi_cv); 6433 rsmsharelock_release(seg); 6434 6435 DBG_PRINTF((category, RSM_ERR, 6436 "rsm_connect error %d\n", e)); 6437 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6438 return ( 6439 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6440 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6441 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6442 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6443 else 6444 return (e); 6445 } 6446 6447 } 6448 seg->s_handle.in = sharedp->rsmsi_handle; 6449 6450 } 6451 6452 seg->s_state = RSM_STATE_CONNECT; 6453 6454 6455 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6456 if (bar_va) { 6457 /* increment generation number on barrier page */ 6458 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6459 /* return user off into barrier page where status will be */ 6460 msg->off = (int)seg->s_hdr.rsmrc_num; 6461 msg->gnum = bar_va[msg->off]; /* gnum race */ 6462 } else { 6463 msg->off = 0; 6464 msg->gnum = 0; /* gnum race */ 6465 } 6466 6467 msg->len = (int)sharedp->rsmsi_seglen; 6468 msg->rnum = seg->s_minor; 6469 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6470 rsmsharelock_release(seg); 6471 rsmseglock_release(seg); 6472 6473 /* Return back to user the segment size & perm in case it's needed */ 6474 6475 #ifdef _MULTI_DATAMODEL 6476 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6477 rsm_ioctlmsg32_t msg32; 6478 6479 if (msg->len > UINT_MAX) 6480 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6481 else 6482 msg32.len = msg->len; 6483 msg32.off = msg->off; 6484 msg32.perm = msg->perm; 6485 msg32.gnum = msg->gnum; 6486 msg32.rnum = msg->rnum; 6487 6488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6489 "rsm_connect done\n")); 6490 6491 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6492 sizeof (msg32), mode)) 6493 return (RSMERR_BAD_ADDR); 6494 else 6495 return (RSM_SUCCESS); 6496 } 6497 #endif 6498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6499 6500 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6501 mode)) 6502 return (RSMERR_BAD_ADDR); 6503 else 6504 return (RSM_SUCCESS); 6505 } 6506 6507 static int 6508 rsm_unmap(rsmseg_t *seg) 6509 { 6510 int err; 6511 adapter_t *adapter; 6512 rsm_import_share_t *sharedp; 6513 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6514 6515 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6516 "rsm_unmap enter %u\n", seg->s_segid)); 6517 6518 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6519 6520 /* assert seg is locked */ 6521 ASSERT(rsmseglock_held(seg)); 6522 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6523 6524 if ((seg->s_state != RSM_STATE_ACTIVE) && 6525 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6526 /* segment unmap has already been done */ 6527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6528 return (RSM_SUCCESS); 6529 } 6530 6531 sharedp = seg->s_share; 6532 6533 rsmsharelock_acquire(seg); 6534 6535 /* 6536 * - shared data struct is in MAPPED or MAP_QUIESCE state 6537 */ 6538 6539 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6540 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6541 6542 /* 6543 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6544 * the segment cookie list was NULL; but it is always NULL when 6545 * called from rsmmap_unmap and won't be NULL when called for 6546 * a force disconnect - so the check for NULL cookie list was removed 6547 */ 6548 6549 ASSERT(sharedp->rsmsi_mapcnt > 0); 6550 6551 sharedp->rsmsi_mapcnt--; 6552 6553 if (sharedp->rsmsi_mapcnt == 0) { 6554 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6555 /* unmap the shared RSMPI mapping */ 6556 adapter = seg->s_adapter; 6557 if (seg->s_node != my_nodeid) { 6558 ASSERT(sharedp->rsmsi_handle != NULL); 6559 err = adapter->rsmpi_ops-> 6560 rsm_unmap(sharedp->rsmsi_handle); 6561 DBG_PRINTF((category, RSM_DEBUG, 6562 "rsm_unmap: rsmpi unmap %d\n", err)); 6563 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6564 sharedp->rsmsi_mapinfo = NULL; 6565 } 6566 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6567 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6568 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6569 } 6570 } 6571 6572 rsmsharelock_release(seg); 6573 6574 /* 6575 * The s_cookie field is used to store the cookie returned from the 6576 * ddi_umem_lock when binding the pages for an export segment. This 6577 * is the primary use of the s_cookie field and does not normally 6578 * pertain to any importing segment except in the loopback case. 6579 * For the loopback case, the import segment and export segment are 6580 * on the same node, the s_cookie field of the segment structure for 6581 * the importer is initialized to the s_cookie field in the exported 6582 * segment during the map operation and is used during the call to 6583 * devmap_umem_setup for the import mapping. 6584 * Thus, during unmap, we simply need to set s_cookie to NULL to 6585 * indicate that the mapping no longer exists. 6586 */ 6587 seg->s_cookie = NULL; 6588 6589 seg->s_mapinfo = NULL; 6590 6591 if (seg->s_state == RSM_STATE_ACTIVE) 6592 seg->s_state = RSM_STATE_CONNECT; 6593 else 6594 seg->s_state = RSM_STATE_CONN_QUIESCE; 6595 6596 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6597 6598 return (RSM_SUCCESS); 6599 } 6600 6601 /* 6602 * cookie returned here if not null indicates that it is 6603 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6604 * message. 6605 */ 6606 static int 6607 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6608 { 6609 int e; 6610 adapter_t *adapter; 6611 rsm_import_share_t *sharedp; 6612 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6613 6614 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6615 "rsm_closeconnection enter\n")); 6616 6617 *cookie = (void *)NULL; 6618 6619 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6620 6621 /* assert seg is locked */ 6622 ASSERT(rsmseglock_held(seg)); 6623 6624 if (seg->s_state == RSM_STATE_DISCONNECT) { 6625 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6626 "rsm_closeconnection done: already disconnected\n")); 6627 return (RSM_SUCCESS); 6628 } 6629 6630 /* wait for all putv/getv ops to get done */ 6631 while (seg->s_rdmacnt > 0) { 6632 cv_wait(&seg->s_cv, &seg->s_lock); 6633 } 6634 6635 (void) rsm_unmap(seg); 6636 6637 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6638 seg->s_state == RSM_STATE_CONN_QUIESCE); 6639 6640 adapter = seg->s_adapter; 6641 sharedp = seg->s_share; 6642 6643 ASSERT(sharedp != NULL); 6644 6645 rsmsharelock_acquire(seg); 6646 6647 /* 6648 * Disconnect on adapter 6649 * 6650 * The current algorithm is stateless, I don't have to contact 6651 * server when I go away. He only gives me permissions. Of course, 6652 * the adapters will talk to terminate the connect. 6653 * 6654 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6655 */ 6656 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6657 (sharedp->rsmsi_node != my_nodeid)) { 6658 6659 if (sharedp->rsmsi_refcnt == 1) { 6660 /* this is the last importer */ 6661 ASSERT(sharedp->rsmsi_mapcnt == 0); 6662 6663 e = adapter->rsmpi_ops-> 6664 rsm_disconnect(sharedp->rsmsi_handle); 6665 if (e != RSM_SUCCESS) { 6666 DBG_PRINTF((category, RSM_DEBUG, 6667 "rsm:disconnect failed seg=%x:err=%d\n", 6668 seg->s_key, e)); 6669 } 6670 } 6671 } 6672 6673 seg->s_handle.in = NULL; 6674 6675 sharedp->rsmsi_refcnt--; 6676 6677 if (sharedp->rsmsi_refcnt == 0) { 6678 *cookie = (void *)sharedp->rsmsi_cookie; 6679 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6680 sharedp->rsmsi_handle = NULL; 6681 rsmsharelock_release(seg); 6682 6683 /* clean up the shared data structure */ 6684 mutex_destroy(&sharedp->rsmsi_lock); 6685 cv_destroy(&sharedp->rsmsi_cv); 6686 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6687 6688 } else { 6689 rsmsharelock_release(seg); 6690 } 6691 6692 /* increment generation number on barrier page */ 6693 if (bar_va) { 6694 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6695 } 6696 6697 /* 6698 * The following needs to be done after any 6699 * rsmsharelock calls which use seg->s_share. 6700 */ 6701 seg->s_share = NULL; 6702 6703 seg->s_state = RSM_STATE_DISCONNECT; 6704 /* signal anyone waiting in the CONN_QUIESCE state */ 6705 cv_broadcast(&seg->s_cv); 6706 6707 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6708 "rsm_closeconnection done\n")); 6709 6710 return (RSM_SUCCESS); 6711 } 6712 6713 int 6714 rsm_disconnect(rsmseg_t *seg) 6715 { 6716 rsmipc_request_t request; 6717 void *shared_cookie; 6718 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6719 6720 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6721 6722 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6723 6724 /* assert seg isn't locked */ 6725 ASSERT(!rsmseglock_held(seg)); 6726 6727 6728 /* Remove segment from imported list */ 6729 rsmimport_rm(seg); 6730 6731 /* acquire the segment */ 6732 rsmseglock_acquire(seg); 6733 6734 /* wait until segment leaves the mapping state */ 6735 while (seg->s_state == RSM_STATE_MAPPING) 6736 cv_wait(&seg->s_cv, &seg->s_lock); 6737 6738 if (seg->s_state == RSM_STATE_DISCONNECT) { 6739 seg->s_state = RSM_STATE_NEW; 6740 rsmseglock_release(seg); 6741 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6742 "rsm_disconnect done: already disconnected\n")); 6743 return (RSM_SUCCESS); 6744 } 6745 6746 (void) rsm_closeconnection(seg, &shared_cookie); 6747 6748 /* update state */ 6749 seg->s_state = RSM_STATE_NEW; 6750 6751 if (shared_cookie != NULL) { 6752 /* 6753 * This is the last importer so inform the exporting node 6754 * so this import can be deleted from the list of importers. 6755 */ 6756 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6757 request.rsmipc_key = seg->s_segid; 6758 request.rsmipc_segment_cookie = shared_cookie; 6759 rsmseglock_release(seg); 6760 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6761 } else { 6762 rsmseglock_release(seg); 6763 } 6764 6765 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6766 6767 return (DDI_SUCCESS); 6768 } 6769 6770 /*ARGSUSED*/ 6771 static int 6772 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6773 struct pollhead **phpp) 6774 { 6775 minor_t rnum; 6776 rsmresource_t *res; 6777 rsmseg_t *seg; 6778 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6779 6780 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6781 6782 /* find minor, no lock */ 6783 rnum = getminor(dev); 6784 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6785 6786 /* poll is supported only for export/import segments */ 6787 if ((res == NULL) || (res == RSMRC_RESERVED) || 6788 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6789 return (ENXIO); 6790 } 6791 6792 *reventsp = 0; 6793 6794 /* 6795 * An exported segment must be in state RSM_STATE_EXPORT; an 6796 * imported segment must be in state RSM_STATE_ACTIVE. 6797 */ 6798 seg = (rsmseg_t *)res; 6799 6800 if (seg->s_pollevent) { 6801 *reventsp = POLLRDNORM; 6802 } else if (!anyyet) { 6803 /* cannot take segment lock here */ 6804 *phpp = &seg->s_poll; 6805 seg->s_pollflag |= RSM_SEGMENT_POLL; 6806 } 6807 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6808 return (0); 6809 } 6810 6811 6812 6813 /* ************************* IOCTL Commands ********************* */ 6814 6815 static rsmseg_t * 6816 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6817 rsm_resource_type_t type) 6818 { 6819 /* get segment from resource handle */ 6820 rsmseg_t *seg; 6821 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6822 6823 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6824 6825 6826 if (res != RSMRC_RESERVED) { 6827 seg = (rsmseg_t *)res; 6828 } else { 6829 /* Allocate segment now and bind it */ 6830 seg = rsmseg_alloc(rnum, credp); 6831 6832 /* 6833 * if DR pre-processing is going on or DR is in progress 6834 * then the new export segments should be in the NEW_QSCD state 6835 */ 6836 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6837 mutex_enter(&rsm_drv_data.drv_lock); 6838 if ((rsm_drv_data.drv_state == 6839 RSM_DRV_PREDEL_STARTED) || 6840 (rsm_drv_data.drv_state == 6841 RSM_DRV_PREDEL_COMPLETED) || 6842 (rsm_drv_data.drv_state == 6843 RSM_DRV_DR_IN_PROGRESS)) { 6844 seg->s_state = RSM_STATE_NEW_QUIESCED; 6845 } 6846 mutex_exit(&rsm_drv_data.drv_lock); 6847 } 6848 6849 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6850 } 6851 6852 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6853 6854 return (seg); 6855 } 6856 6857 static int 6858 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6859 int mode, cred_t *credp) 6860 { 6861 int error; 6862 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6863 6864 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6865 6866 arg = arg; 6867 credp = credp; 6868 6869 ASSERT(seg != NULL); 6870 6871 switch (cmd) { 6872 case RSM_IOCTL_BIND: 6873 error = rsm_bind(seg, msg, arg, mode); 6874 break; 6875 case RSM_IOCTL_REBIND: 6876 error = rsm_rebind(seg, msg); 6877 break; 6878 case RSM_IOCTL_UNBIND: 6879 error = ENOTSUP; 6880 break; 6881 case RSM_IOCTL_PUBLISH: 6882 error = rsm_publish(seg, msg, arg, mode); 6883 break; 6884 case RSM_IOCTL_REPUBLISH: 6885 error = rsm_republish(seg, msg, mode); 6886 break; 6887 case RSM_IOCTL_UNPUBLISH: 6888 error = rsm_unpublish(seg, 1); 6889 break; 6890 default: 6891 error = EINVAL; 6892 break; 6893 } 6894 6895 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6896 error)); 6897 6898 return (error); 6899 } 6900 static int 6901 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6902 int mode, cred_t *credp) 6903 { 6904 int error; 6905 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6906 6907 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6908 6909 ASSERT(seg); 6910 6911 switch (cmd) { 6912 case RSM_IOCTL_CONNECT: 6913 error = rsm_connect(seg, msg, credp, arg, mode); 6914 break; 6915 default: 6916 error = EINVAL; 6917 break; 6918 } 6919 6920 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6921 error)); 6922 return (error); 6923 } 6924 6925 static int 6926 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6927 int mode) 6928 { 6929 int e; 6930 adapter_t *adapter; 6931 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6932 6933 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6934 6935 6936 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6937 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6938 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6939 return (RSMERR_CONN_ABORTED); 6940 } else if (seg->s_node == my_nodeid) { 6941 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6942 "rsmbar_ioctl done: loopback\n")); 6943 return (RSM_SUCCESS); 6944 } 6945 6946 adapter = seg->s_adapter; 6947 6948 switch (cmd) { 6949 case RSM_IOCTL_BAR_CHECK: 6950 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6951 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6952 return (bar_va ? RSM_SUCCESS : EINVAL); 6953 case RSM_IOCTL_BAR_OPEN: 6954 e = adapter->rsmpi_ops-> 6955 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6956 break; 6957 case RSM_IOCTL_BAR_ORDER: 6958 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6959 break; 6960 case RSM_IOCTL_BAR_CLOSE: 6961 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6962 break; 6963 default: 6964 e = EINVAL; 6965 break; 6966 } 6967 6968 if (e == RSM_SUCCESS) { 6969 #ifdef _MULTI_DATAMODEL 6970 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6971 rsm_ioctlmsg32_t msg32; 6972 int i; 6973 6974 for (i = 0; i < 4; i++) { 6975 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6976 } 6977 6978 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6979 "rsmbar_ioctl done\n")); 6980 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6981 sizeof (msg32), mode)) 6982 return (RSMERR_BAD_ADDR); 6983 else 6984 return (RSM_SUCCESS); 6985 } 6986 #endif 6987 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6988 "rsmbar_ioctl done\n")); 6989 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6990 sizeof (*msg), mode)) 6991 return (RSMERR_BAD_ADDR); 6992 else 6993 return (RSM_SUCCESS); 6994 } 6995 6996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6997 "rsmbar_ioctl done: error=%d\n", e)); 6998 6999 return (e); 7000 } 7001 7002 /* 7003 * Ring the doorbell of the export segment to which this segment is 7004 * connected. 7005 */ 7006 static int 7007 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7008 { 7009 int e = 0; 7010 rsmipc_request_t request; 7011 7012 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7013 7014 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7015 7016 request.rsmipc_key = seg->s_segid; 7017 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7018 request.rsmipc_segment_cookie = NULL; 7019 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7020 7021 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7022 "exportbell_ioctl done: %d\n", e)); 7023 7024 return (e); 7025 } 7026 7027 /* 7028 * Ring the doorbells of all segments importing this segment 7029 */ 7030 static int 7031 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7032 { 7033 importing_token_t *token = NULL; 7034 rsmipc_request_t request; 7035 int index; 7036 7037 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7038 7039 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7040 7041 ASSERT(seg->s_state != RSM_STATE_NEW && 7042 seg->s_state != RSM_STATE_NEW_QUIESCED); 7043 7044 request.rsmipc_key = seg->s_segid; 7045 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7046 7047 index = rsmhash(seg->s_segid); 7048 7049 token = importer_list.bucket[index]; 7050 7051 while (token != NULL) { 7052 if (seg->s_key == token->key) { 7053 request.rsmipc_segment_cookie = 7054 token->import_segment_cookie; 7055 (void) rsmipc_send(token->importing_node, 7056 &request, RSM_NO_REPLY); 7057 } 7058 token = token->next; 7059 } 7060 7061 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7062 "importbell_ioctl done\n")); 7063 return (RSM_SUCCESS); 7064 } 7065 7066 static int 7067 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7068 rsm_poll_event_t **eventspp, int mode) 7069 { 7070 rsm_poll_event_t *evlist = NULL; 7071 size_t evlistsz; 7072 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7073 7074 #ifdef _MULTI_DATAMODEL 7075 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7076 int i; 7077 rsm_consume_event_msg32_t cemsg32 = {0}; 7078 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7079 rsm_poll_event32_t *evlist32; 7080 size_t evlistsz32; 7081 7082 /* copyin the ioctl message */ 7083 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7084 sizeof (rsm_consume_event_msg32_t), mode)) { 7085 DBG_PRINTF((category, RSM_ERR, 7086 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7087 return (RSMERR_BAD_ADDR); 7088 } 7089 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7090 msgp->numents = (int)cemsg32.numents; 7091 7092 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7093 /* 7094 * If numents is large alloc events list on heap otherwise 7095 * use the address of array that was passed in. 7096 */ 7097 if (msgp->numents > RSM_MAX_POLLFDS) { 7098 if (msgp->numents > max_segs) { /* validate numents */ 7099 DBG_PRINTF((category, RSM_ERR, 7100 "consumeevent_copyin: " 7101 "RSMERR_BAD_ARGS_ERRORS\n")); 7102 return (RSMERR_BAD_ARGS_ERRORS); 7103 } 7104 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7105 } else { 7106 evlist32 = event32; 7107 } 7108 7109 /* copyin the seglist into the rsm_poll_event32_t array */ 7110 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7111 evlistsz32, mode)) { 7112 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7113 kmem_free(evlist32, evlistsz32); 7114 } 7115 DBG_PRINTF((category, RSM_ERR, 7116 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7117 return (RSMERR_BAD_ADDR); 7118 } 7119 7120 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7121 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7122 7123 if (msgp->numents > RSM_MAX_POLLFDS) { 7124 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7125 *eventspp = evlist; 7126 } else { 7127 evlist = *eventspp; 7128 } 7129 /* 7130 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7131 * array 7132 */ 7133 for (i = 0; i < msgp->numents; i++) { 7134 evlist[i].rnum = evlist32[i].rnum; 7135 evlist[i].fdsidx = evlist32[i].fdsidx; 7136 evlist[i].revent = evlist32[i].revent; 7137 } 7138 /* free the temp 32-bit event list */ 7139 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7140 kmem_free(evlist32, evlistsz32); 7141 } 7142 7143 return (RSM_SUCCESS); 7144 } 7145 #endif 7146 /* copyin the ioctl message */ 7147 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7148 mode)) { 7149 DBG_PRINTF((category, RSM_ERR, 7150 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7151 return (RSMERR_BAD_ADDR); 7152 } 7153 /* 7154 * If numents is large alloc events list on heap otherwise 7155 * use the address of array that was passed in. 7156 */ 7157 if (msgp->numents > RSM_MAX_POLLFDS) { 7158 if (msgp->numents > max_segs) { /* validate numents */ 7159 DBG_PRINTF((category, RSM_ERR, 7160 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7161 return (RSMERR_BAD_ARGS_ERRORS); 7162 } 7163 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7164 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7165 *eventspp = evlist; 7166 } 7167 7168 /* copyin the seglist */ 7169 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7170 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7171 if (evlist) { 7172 kmem_free(evlist, evlistsz); 7173 *eventspp = NULL; 7174 } 7175 DBG_PRINTF((category, RSM_ERR, 7176 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7177 return (RSMERR_BAD_ADDR); 7178 } 7179 7180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7181 "consumeevent_copyin done\n")); 7182 return (RSM_SUCCESS); 7183 } 7184 7185 static int 7186 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7187 rsm_poll_event_t *eventsp, int mode) 7188 { 7189 size_t evlistsz; 7190 int err = RSM_SUCCESS; 7191 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7192 7193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7194 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7195 msgp->numents, eventsp)); 7196 7197 #ifdef _MULTI_DATAMODEL 7198 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7199 int i; 7200 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7201 rsm_poll_event32_t *evlist32; 7202 size_t evlistsz32; 7203 7204 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7205 if (msgp->numents > RSM_MAX_POLLFDS) { 7206 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7207 } else { 7208 evlist32 = event32; 7209 } 7210 7211 /* 7212 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7213 * array 7214 */ 7215 for (i = 0; i < msgp->numents; i++) { 7216 evlist32[i].rnum = eventsp[i].rnum; 7217 evlist32[i].fdsidx = eventsp[i].fdsidx; 7218 evlist32[i].revent = eventsp[i].revent; 7219 } 7220 7221 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7222 evlistsz32, mode)) { 7223 err = RSMERR_BAD_ADDR; 7224 } 7225 7226 if (msgp->numents > RSM_MAX_POLLFDS) { 7227 if (evlist32) { /* free the temp 32-bit event list */ 7228 kmem_free(evlist32, evlistsz32); 7229 } 7230 /* 7231 * eventsp and evlistsz are based on rsm_poll_event_t 7232 * type 7233 */ 7234 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7235 /* event list on the heap and needs to be freed here */ 7236 if (eventsp) { 7237 kmem_free(eventsp, evlistsz); 7238 } 7239 } 7240 7241 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7242 "consumeevent_copyout done: err=%d\n", err)); 7243 return (err); 7244 } 7245 #endif 7246 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7247 7248 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7249 mode)) { 7250 err = RSMERR_BAD_ADDR; 7251 } 7252 7253 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7254 /* event list on the heap and needs to be freed here */ 7255 kmem_free(eventsp, evlistsz); 7256 } 7257 7258 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7259 "consumeevent_copyout done: err=%d\n", err)); 7260 return (err); 7261 } 7262 7263 static int 7264 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7265 { 7266 int rc; 7267 int i; 7268 minor_t rnum; 7269 rsm_consume_event_msg_t msg = {0}; 7270 rsmseg_t *seg; 7271 rsm_poll_event_t *event_list; 7272 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7273 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7274 7275 event_list = events; 7276 7277 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7278 RSM_SUCCESS) { 7279 return (rc); 7280 } 7281 7282 for (i = 0; i < msg.numents; i++) { 7283 rnum = event_list[i].rnum; 7284 event_list[i].revent = 0; 7285 /* get the segment structure */ 7286 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7287 if (seg) { 7288 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7289 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7290 seg)); 7291 if (seg->s_pollevent) { 7292 /* consume the event */ 7293 atomic_add_32(&seg->s_pollevent, -1); 7294 event_list[i].revent = POLLRDNORM; 7295 } 7296 rsmseglock_release(seg); 7297 } 7298 } 7299 7300 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7301 RSM_SUCCESS) { 7302 return (rc); 7303 } 7304 7305 return (RSM_SUCCESS); 7306 } 7307 7308 static int 7309 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7310 { 7311 int size; 7312 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7313 7314 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7315 7316 #ifdef _MULTI_DATAMODEL 7317 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7318 rsmka_iovec32_t *iovec32, *iovec32_base; 7319 int i; 7320 7321 size = count * sizeof (rsmka_iovec32_t); 7322 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7323 if (ddi_copyin((caddr_t)user_vec, 7324 (caddr_t)iovec32, size, mode)) { 7325 kmem_free(iovec32, size); 7326 DBG_PRINTF((category, RSM_DEBUG, 7327 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7328 return (RSMERR_BAD_ADDR); 7329 } 7330 7331 for (i = 0; i < count; i++, iovec++, iovec32++) { 7332 iovec->io_type = (int)iovec32->io_type; 7333 if (iovec->io_type == RSM_HANDLE_TYPE) 7334 iovec->local.segid = (rsm_memseg_id_t) 7335 iovec32->local; 7336 else 7337 iovec->local.vaddr = 7338 (caddr_t)(uintptr_t)iovec32->local; 7339 iovec->local_offset = (size_t)iovec32->local_offset; 7340 iovec->remote_offset = (size_t)iovec32->remote_offset; 7341 iovec->transfer_len = (size_t)iovec32->transfer_len; 7342 7343 } 7344 kmem_free(iovec32_base, size); 7345 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7346 "iovec_copyin done\n")); 7347 return (DDI_SUCCESS); 7348 } 7349 #endif 7350 7351 size = count * sizeof (rsmka_iovec_t); 7352 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7353 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7354 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7355 return (RSMERR_BAD_ADDR); 7356 } 7357 7358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7359 7360 return (DDI_SUCCESS); 7361 } 7362 7363 7364 static int 7365 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7366 { 7367 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7368 7369 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7370 7371 #ifdef _MULTI_DATAMODEL 7372 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7373 rsmka_scat_gath32_t sg_io32; 7374 7375 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7376 mode)) { 7377 DBG_PRINTF((category, RSM_DEBUG, 7378 "sgio_copyin done: returning EFAULT\n")); 7379 return (RSMERR_BAD_ADDR); 7380 } 7381 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7382 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7383 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7384 sg_io->flags = (size_t)sg_io32.flags; 7385 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7386 (uintptr_t)sg_io32.remote_handle; 7387 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7388 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7389 "sgio_copyin done\n")); 7390 return (DDI_SUCCESS); 7391 } 7392 #endif 7393 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7394 mode)) { 7395 DBG_PRINTF((category, RSM_DEBUG, 7396 "sgio_copyin done: returning EFAULT\n")); 7397 return (RSMERR_BAD_ADDR); 7398 } 7399 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7400 return (DDI_SUCCESS); 7401 } 7402 7403 static int 7404 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7405 { 7406 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7407 7408 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7409 "sgio_resid_copyout enter\n")); 7410 7411 #ifdef _MULTI_DATAMODEL 7412 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7413 rsmka_scat_gath32_t sg_io32; 7414 7415 sg_io32.io_residual_count = sg_io->io_residual_count; 7416 sg_io32.flags = sg_io->flags; 7417 7418 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7419 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7420 sizeof (uint32_t), mode)) { 7421 7422 DBG_PRINTF((category, RSM_ERR, 7423 "sgio_resid_copyout error: rescnt\n")); 7424 return (RSMERR_BAD_ADDR); 7425 } 7426 7427 if (ddi_copyout((caddr_t)&sg_io32.flags, 7428 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7429 sizeof (uint32_t), mode)) { 7430 7431 DBG_PRINTF((category, RSM_ERR, 7432 "sgio_resid_copyout error: flags\n")); 7433 return (RSMERR_BAD_ADDR); 7434 } 7435 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7436 "sgio_resid_copyout done\n")); 7437 return (DDI_SUCCESS); 7438 } 7439 #endif 7440 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7441 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7442 sizeof (ulong_t), mode)) { 7443 7444 DBG_PRINTF((category, RSM_ERR, 7445 "sgio_resid_copyout error:rescnt\n")); 7446 return (RSMERR_BAD_ADDR); 7447 } 7448 7449 if (ddi_copyout((caddr_t)&sg_io->flags, 7450 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7451 sizeof (uint_t), mode)) { 7452 7453 DBG_PRINTF((category, RSM_ERR, 7454 "sgio_resid_copyout error:flags\n")); 7455 return (RSMERR_BAD_ADDR); 7456 } 7457 7458 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7459 return (DDI_SUCCESS); 7460 } 7461 7462 7463 static int 7464 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7465 { 7466 rsmka_scat_gath_t sg_io; 7467 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7468 rsmka_iovec_t *ka_iovec; 7469 rsmka_iovec_t *ka_iovec_start; 7470 rsmpi_scat_gath_t rsmpi_sg_io; 7471 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7472 rsmpi_iovec_t *iovec; 7473 rsmpi_iovec_t *iovec_start = NULL; 7474 rsmapi_access_entry_t *acl; 7475 rsmresource_t *res; 7476 minor_t rnum; 7477 rsmseg_t *im_seg, *ex_seg; 7478 int e; 7479 int error = 0; 7480 uint_t i; 7481 uint_t iov_proc = 0; /* num of iovecs processed */ 7482 size_t size = 0; 7483 size_t ka_size; 7484 7485 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7486 7487 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7488 7489 credp = credp; 7490 7491 /* 7492 * Copyin the scatter/gather structure and build new structure 7493 * for rsmpi. 7494 */ 7495 e = sgio_copyin(arg, &sg_io, mode); 7496 if (e != DDI_SUCCESS) { 7497 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7498 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7499 return (e); 7500 } 7501 7502 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7503 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7504 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7505 sg_io.io_request_count)); 7506 return (RSMERR_BAD_SGIO); 7507 } 7508 7509 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7510 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7511 rsmpi_sg_io.io_segflg = 0; 7512 7513 /* Allocate memory and copyin io vector array */ 7514 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7515 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7516 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7517 } else { 7518 ka_iovec_start = ka_iovec = ka_iovec_arr; 7519 } 7520 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7521 sg_io.io_request_count, mode); 7522 if (e != DDI_SUCCESS) { 7523 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7524 kmem_free(ka_iovec, ka_size); 7525 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7526 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7527 return (e); 7528 } 7529 7530 /* get the import segment descriptor */ 7531 rnum = getminor(dev); 7532 res = rsmresource_lookup(rnum, RSM_LOCK); 7533 7534 /* 7535 * The following sequence of locking may (or MAY NOT) cause a 7536 * deadlock but this is currently not addressed here since the 7537 * implementation will be changed to incorporate the use of 7538 * reference counting for both the import and the export segments. 7539 */ 7540 7541 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7542 7543 im_seg = (rsmseg_t *)res; 7544 7545 if (im_seg == NULL) { 7546 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7547 kmem_free(ka_iovec, ka_size); 7548 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7549 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7550 return (EINVAL); 7551 } 7552 /* putv/getv supported is supported only on import segments */ 7553 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7554 rsmseglock_release(im_seg); 7555 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7556 kmem_free(ka_iovec, ka_size); 7557 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7558 "rsm_iovec_ioctl done: not an import segment\n")); 7559 return (EINVAL); 7560 } 7561 7562 /* 7563 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7564 * as well as wait for a local DR to complete. 7565 */ 7566 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7567 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7568 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7569 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7570 DBG_PRINTF((category, RSM_DEBUG, 7571 "rsm_iovec_ioctl done: cv_wait INTR")); 7572 rsmseglock_release(im_seg); 7573 return (RSMERR_INTERRUPTED); 7574 } 7575 } 7576 7577 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7578 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7579 7580 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7581 im_seg->s_state == RSM_STATE_NEW); 7582 7583 DBG_PRINTF((category, RSM_DEBUG, 7584 "rsm_iovec_ioctl done: im_seg not conn/map")); 7585 rsmseglock_release(im_seg); 7586 e = RSMERR_BAD_SGIO; 7587 goto out; 7588 } 7589 7590 im_seg->s_rdmacnt++; 7591 rsmseglock_release(im_seg); 7592 7593 /* 7594 * Allocate and set up the io vector for rsmpi 7595 */ 7596 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7597 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7598 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7599 } else { 7600 iovec_start = iovec = iovec_arr; 7601 } 7602 7603 rsmpi_sg_io.iovec = iovec; 7604 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7605 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7606 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7607 7608 if (ex_seg == NULL) { 7609 e = RSMERR_BAD_SGIO; 7610 break; 7611 } 7612 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7613 7614 acl = ex_seg->s_acl; 7615 if (acl[0].ae_permission == 0) { 7616 struct buf *xbuf; 7617 dev_t sdev = 0; 7618 7619 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7620 0, ex_seg->s_len, B_WRITE, 7621 sdev, 0, NULL, DDI_UMEM_SLEEP); 7622 7623 ASSERT(xbuf != NULL); 7624 7625 iovec->local_mem.ms_type = RSM_MEM_BUF; 7626 iovec->local_mem.ms_memory.bp = xbuf; 7627 } else { 7628 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7629 iovec->local_mem.ms_memory.handle = 7630 ex_seg->s_handle.out; 7631 } 7632 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7633 rsmseglock_release(ex_seg); 7634 } else { 7635 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7636 iovec->local_mem.ms_memory.vr.vaddr = 7637 ka_iovec->local.vaddr; 7638 } 7639 7640 iovec->local_offset = ka_iovec->local_offset; 7641 iovec->remote_handle = im_seg->s_handle.in; 7642 iovec->remote_offset = ka_iovec->remote_offset; 7643 iovec->transfer_length = ka_iovec->transfer_len; 7644 iovec++; 7645 ka_iovec++; 7646 } 7647 7648 if (iov_proc < sg_io.io_request_count) { 7649 /* error while processing handle */ 7650 rsmseglock_acquire(im_seg); 7651 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7652 if (im_seg->s_rdmacnt == 0) { 7653 cv_broadcast(&im_seg->s_cv); 7654 } 7655 rsmseglock_release(im_seg); 7656 goto out; 7657 } 7658 7659 /* call rsmpi */ 7660 if (cmd == RSM_IOCTL_PUTV) 7661 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7662 im_seg->s_adapter->rsmpi_handle, 7663 &rsmpi_sg_io); 7664 else if (cmd == RSM_IOCTL_GETV) 7665 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7666 im_seg->s_adapter->rsmpi_handle, 7667 &rsmpi_sg_io); 7668 else { 7669 e = EINVAL; 7670 DBG_PRINTF((category, RSM_DEBUG, 7671 "iovec_ioctl: bad command = %x\n", cmd)); 7672 } 7673 7674 7675 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7676 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7677 7678 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7679 7680 /* 7681 * Check for implicit signal post flag and do the signal 7682 * post if needed 7683 */ 7684 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7685 e == RSM_SUCCESS) { 7686 rsmipc_request_t request; 7687 7688 request.rsmipc_key = im_seg->s_segid; 7689 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7690 request.rsmipc_segment_cookie = NULL; 7691 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7692 /* 7693 * Reset the implicit signal post flag to 0 to indicate 7694 * that the signal post has been done and need not be 7695 * done in the RSMAPI library 7696 */ 7697 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7698 } 7699 7700 rsmseglock_acquire(im_seg); 7701 im_seg->s_rdmacnt--; 7702 if (im_seg->s_rdmacnt == 0) { 7703 cv_broadcast(&im_seg->s_cv); 7704 } 7705 rsmseglock_release(im_seg); 7706 error = sgio_resid_copyout(arg, &sg_io, mode); 7707 out: 7708 iovec = iovec_start; 7709 ka_iovec = ka_iovec_start; 7710 for (i = 0; i < iov_proc; i++) { 7711 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7712 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7713 7714 ASSERT(ex_seg != NULL); 7715 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7716 7717 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7718 if (ex_seg->s_rdmacnt == 0) { 7719 cv_broadcast(&ex_seg->s_cv); 7720 } 7721 rsmseglock_release(ex_seg); 7722 } 7723 7724 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7725 7726 /* 7727 * At present there is no dependency on the existence of xbufs 7728 * created by ddi_umem_iosetup for each of the iovecs. So we 7729 * can these xbufs here. 7730 */ 7731 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7732 freerbuf(iovec->local_mem.ms_memory.bp); 7733 } 7734 7735 iovec++; 7736 ka_iovec++; 7737 } 7738 7739 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7740 if (iovec_start) 7741 kmem_free(iovec_start, size); 7742 kmem_free(ka_iovec_start, ka_size); 7743 } 7744 7745 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7746 "rsm_iovec_ioctl done %d\n", e)); 7747 /* if RSMPI call fails return that else return copyout's retval */ 7748 return ((e != RSM_SUCCESS) ? e : error); 7749 7750 } 7751 7752 7753 static int 7754 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7755 { 7756 adapter_t *adapter; 7757 rsm_addr_t addr; 7758 rsm_node_id_t node; 7759 int rval = DDI_SUCCESS; 7760 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7761 7762 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7763 7764 adapter = rsm_getadapter(msg, mode); 7765 if (adapter == NULL) { 7766 DBG_PRINTF((category, RSM_DEBUG, 7767 "rsmaddr_ioctl done: adapter not found\n")); 7768 return (RSMERR_CTLR_NOT_PRESENT); 7769 } 7770 7771 switch (cmd) { 7772 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7773 /* returns the hwaddr in msg->hwaddr */ 7774 if (msg->nodeid == my_nodeid) { 7775 msg->hwaddr = adapter->hwaddr; 7776 } else { 7777 addr = get_remote_hwaddr(adapter, msg->nodeid); 7778 if ((int64_t)addr < 0) { 7779 rval = RSMERR_INTERNAL_ERROR; 7780 } else { 7781 msg->hwaddr = addr; 7782 } 7783 } 7784 break; 7785 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7786 /* returns the nodeid in msg->nodeid */ 7787 if (msg->hwaddr == adapter->hwaddr) { 7788 msg->nodeid = my_nodeid; 7789 } else { 7790 node = get_remote_nodeid(adapter, msg->hwaddr); 7791 if ((int)node < 0) { 7792 rval = RSMERR_INTERNAL_ERROR; 7793 } else { 7794 msg->nodeid = (rsm_node_id_t)node; 7795 } 7796 } 7797 break; 7798 default: 7799 rval = EINVAL; 7800 break; 7801 } 7802 7803 rsmka_release_adapter(adapter); 7804 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7805 "rsmaddr_ioctl done: %d\n", rval)); 7806 return (rval); 7807 } 7808 7809 static int 7810 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7811 { 7812 DBG_DEFINE(category, 7813 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7814 7815 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7816 7817 #ifdef _MULTI_DATAMODEL 7818 7819 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7820 rsm_ioctlmsg32_t msg32; 7821 int i; 7822 7823 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7824 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7825 "rsm_ddi_copyin done: EFAULT\n")); 7826 return (RSMERR_BAD_ADDR); 7827 } 7828 msg->len = msg32.len; 7829 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7830 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7831 msg->key = msg32.key; 7832 msg->acl_len = msg32.acl_len; 7833 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7834 msg->cnum = msg32.cnum; 7835 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7836 msg->cname_len = msg32.cname_len; 7837 msg->nodeid = msg32.nodeid; 7838 msg->hwaddr = msg32.hwaddr; 7839 msg->perm = msg32.perm; 7840 for (i = 0; i < 4; i++) { 7841 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7842 } 7843 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7844 "rsm_ddi_copyin done\n")); 7845 return (RSM_SUCCESS); 7846 } 7847 #endif 7848 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7849 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7850 return (RSMERR_BAD_ADDR); 7851 else 7852 return (RSM_SUCCESS); 7853 } 7854 7855 static int 7856 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7857 { 7858 rsmka_int_controller_attr_t rsm_cattr; 7859 DBG_DEFINE(category, 7860 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7861 7862 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7863 "rsmattr_ddi_copyout enter\n")); 7864 /* 7865 * need to copy appropriate data from rsm_controller_attr_t 7866 * to rsmka_int_controller_attr_t 7867 */ 7868 #ifdef _MULTI_DATAMODEL 7869 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7870 rsmka_int_controller_attr32_t rsm_cattr32; 7871 7872 rsm_cattr32.attr_direct_access_sizes = 7873 adapter->rsm_attr.attr_direct_access_sizes; 7874 rsm_cattr32.attr_atomic_sizes = 7875 adapter->rsm_attr.attr_atomic_sizes; 7876 rsm_cattr32.attr_page_size = 7877 adapter->rsm_attr.attr_page_size; 7878 if (adapter->rsm_attr.attr_max_export_segment_size > 7879 UINT_MAX) 7880 rsm_cattr32.attr_max_export_segment_size = 7881 RSM_MAXSZ_PAGE_ALIGNED; 7882 else 7883 rsm_cattr32.attr_max_export_segment_size = 7884 adapter->rsm_attr.attr_max_export_segment_size; 7885 if (adapter->rsm_attr.attr_tot_export_segment_size > 7886 UINT_MAX) 7887 rsm_cattr32.attr_tot_export_segment_size = 7888 RSM_MAXSZ_PAGE_ALIGNED; 7889 else 7890 rsm_cattr32.attr_tot_export_segment_size = 7891 adapter->rsm_attr.attr_tot_export_segment_size; 7892 if (adapter->rsm_attr.attr_max_export_segments > 7893 UINT_MAX) 7894 rsm_cattr32.attr_max_export_segments = 7895 UINT_MAX; 7896 else 7897 rsm_cattr32.attr_max_export_segments = 7898 adapter->rsm_attr.attr_max_export_segments; 7899 if (adapter->rsm_attr.attr_max_import_map_size > 7900 UINT_MAX) 7901 rsm_cattr32.attr_max_import_map_size = 7902 RSM_MAXSZ_PAGE_ALIGNED; 7903 else 7904 rsm_cattr32.attr_max_import_map_size = 7905 adapter->rsm_attr.attr_max_import_map_size; 7906 if (adapter->rsm_attr.attr_tot_import_map_size > 7907 UINT_MAX) 7908 rsm_cattr32.attr_tot_import_map_size = 7909 RSM_MAXSZ_PAGE_ALIGNED; 7910 else 7911 rsm_cattr32.attr_tot_import_map_size = 7912 adapter->rsm_attr.attr_tot_import_map_size; 7913 if (adapter->rsm_attr.attr_max_import_segments > 7914 UINT_MAX) 7915 rsm_cattr32.attr_max_import_segments = 7916 UINT_MAX; 7917 else 7918 rsm_cattr32.attr_max_import_segments = 7919 adapter->rsm_attr.attr_max_import_segments; 7920 rsm_cattr32.attr_controller_addr = 7921 adapter->rsm_attr.attr_controller_addr; 7922 7923 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7924 "rsmattr_ddi_copyout done\n")); 7925 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7926 sizeof (rsmka_int_controller_attr32_t), mode)) { 7927 return (RSMERR_BAD_ADDR); 7928 } 7929 else 7930 return (RSM_SUCCESS); 7931 } 7932 #endif 7933 rsm_cattr.attr_direct_access_sizes = 7934 adapter->rsm_attr.attr_direct_access_sizes; 7935 rsm_cattr.attr_atomic_sizes = 7936 adapter->rsm_attr.attr_atomic_sizes; 7937 rsm_cattr.attr_page_size = 7938 adapter->rsm_attr.attr_page_size; 7939 rsm_cattr.attr_max_export_segment_size = 7940 adapter->rsm_attr.attr_max_export_segment_size; 7941 rsm_cattr.attr_tot_export_segment_size = 7942 adapter->rsm_attr.attr_tot_export_segment_size; 7943 rsm_cattr.attr_max_export_segments = 7944 adapter->rsm_attr.attr_max_export_segments; 7945 rsm_cattr.attr_max_import_map_size = 7946 adapter->rsm_attr.attr_max_import_map_size; 7947 rsm_cattr.attr_tot_import_map_size = 7948 adapter->rsm_attr.attr_tot_import_map_size; 7949 rsm_cattr.attr_max_import_segments = 7950 adapter->rsm_attr.attr_max_import_segments; 7951 rsm_cattr.attr_controller_addr = 7952 adapter->rsm_attr.attr_controller_addr; 7953 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7954 "rsmattr_ddi_copyout done\n")); 7955 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7956 sizeof (rsmka_int_controller_attr_t), mode)) { 7957 return (RSMERR_BAD_ADDR); 7958 } 7959 else 7960 return (RSM_SUCCESS); 7961 } 7962 7963 /*ARGSUSED*/ 7964 static int 7965 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7966 int *rvalp) 7967 { 7968 rsmseg_t *seg; 7969 rsmresource_t *res; 7970 minor_t rnum; 7971 rsm_ioctlmsg_t msg = {0}; 7972 int error; 7973 adapter_t *adapter; 7974 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7975 7976 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7977 7978 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7979 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7980 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7981 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7982 return (error); 7983 } 7984 7985 /* topology cmd does not use the arg common to other cmds */ 7986 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7987 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7989 "rsm_ioctl done: %d\n", error)); 7990 return (error); 7991 } 7992 7993 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7994 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7995 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7996 "rsm_ioctl done: %d\n", error)); 7997 return (error); 7998 } 7999 8000 /* 8001 * try to load arguments 8002 */ 8003 if (cmd != RSM_IOCTL_RING_BELL && 8004 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 8005 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8006 "rsm_ioctl done: EFAULT\n")); 8007 return (RSMERR_BAD_ADDR); 8008 } 8009 8010 if (cmd == RSM_IOCTL_ATTR) { 8011 adapter = rsm_getadapter(&msg, mode); 8012 if (adapter == NULL) { 8013 DBG_PRINTF((category, RSM_DEBUG, 8014 "rsm_ioctl done: ENODEV\n")); 8015 return (RSMERR_CTLR_NOT_PRESENT); 8016 } 8017 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8018 rsmka_release_adapter(adapter); 8019 DBG_PRINTF((category, RSM_DEBUG, 8020 "rsm_ioctl:after copyout %d\n", error)); 8021 return (error); 8022 } 8023 8024 if (cmd == RSM_IOCTL_BAR_INFO) { 8025 /* Return library off,len of barrier page */ 8026 msg.off = barrier_offset; 8027 msg.len = (int)barrier_size; 8028 #ifdef _MULTI_DATAMODEL 8029 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8030 rsm_ioctlmsg32_t msg32; 8031 8032 if (msg.len > UINT_MAX) 8033 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8034 else 8035 msg32.len = (int32_t)msg.len; 8036 msg32.off = (int32_t)msg.off; 8037 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8038 "rsm_ioctl done\n")); 8039 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8040 sizeof (msg32), mode)) 8041 return (RSMERR_BAD_ADDR); 8042 else 8043 return (RSM_SUCCESS); 8044 } 8045 #endif 8046 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8047 "rsm_ioctl done\n")); 8048 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8049 sizeof (msg), mode)) 8050 return (RSMERR_BAD_ADDR); 8051 else 8052 return (RSM_SUCCESS); 8053 } 8054 8055 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8056 /* map the nodeid or hwaddr */ 8057 error = rsmaddr_ioctl(cmd, &msg, mode); 8058 if (error == RSM_SUCCESS) { 8059 #ifdef _MULTI_DATAMODEL 8060 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8061 rsm_ioctlmsg32_t msg32; 8062 8063 msg32.hwaddr = (uint64_t)msg.hwaddr; 8064 msg32.nodeid = (uint32_t)msg.nodeid; 8065 8066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8067 "rsm_ioctl done\n")); 8068 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8069 sizeof (msg32), mode)) 8070 return (RSMERR_BAD_ADDR); 8071 else 8072 return (RSM_SUCCESS); 8073 } 8074 #endif 8075 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8076 "rsm_ioctl done\n")); 8077 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8078 sizeof (msg), mode)) 8079 return (RSMERR_BAD_ADDR); 8080 else 8081 return (RSM_SUCCESS); 8082 } 8083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8084 "rsm_ioctl done: %d\n", error)); 8085 return (error); 8086 } 8087 8088 /* Find resource and look it in read mode */ 8089 rnum = getminor(dev); 8090 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8091 ASSERT(res != NULL); 8092 8093 /* 8094 * Find command group 8095 */ 8096 switch (RSM_IOCTL_CMDGRP(cmd)) { 8097 case RSM_IOCTL_EXPORT_SEG: 8098 /* 8099 * Export list is searched during publish, loopback and 8100 * remote lookup call. 8101 */ 8102 seg = rsmresource_seg(res, rnum, credp, 8103 RSM_RESOURCE_EXPORT_SEGMENT); 8104 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8105 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8106 credp); 8107 } else { /* export ioctl on an import/barrier resource */ 8108 error = RSMERR_BAD_SEG_HNDL; 8109 } 8110 break; 8111 case RSM_IOCTL_IMPORT_SEG: 8112 /* Import list is searched during remote unmap call. */ 8113 seg = rsmresource_seg(res, rnum, credp, 8114 RSM_RESOURCE_IMPORT_SEGMENT); 8115 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8116 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8117 credp); 8118 } else { /* import ioctl on an export/barrier resource */ 8119 error = RSMERR_BAD_SEG_HNDL; 8120 } 8121 break; 8122 case RSM_IOCTL_BAR: 8123 if (res != RSMRC_RESERVED && 8124 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8125 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8126 mode); 8127 } else { /* invalid res value */ 8128 error = RSMERR_BAD_SEG_HNDL; 8129 } 8130 break; 8131 case RSM_IOCTL_BELL: 8132 if (res != RSMRC_RESERVED) { 8133 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8134 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8135 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8136 error = importbell_ioctl((rsmseg_t *)res, cmd); 8137 else /* RSM_RESOURCE_BAR */ 8138 error = RSMERR_BAD_SEG_HNDL; 8139 } else { /* invalid res value */ 8140 error = RSMERR_BAD_SEG_HNDL; 8141 } 8142 break; 8143 default: 8144 error = EINVAL; 8145 } 8146 8147 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8148 error)); 8149 return (error); 8150 } 8151 8152 8153 /* **************************** Segment Mapping Operations ********* */ 8154 static rsm_mapinfo_t * 8155 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8156 size_t *map_len) 8157 { 8158 rsm_mapinfo_t *p; 8159 /* 8160 * Find the correct mapinfo structure to use during the mapping 8161 * from the seg->s_mapinfo list. 8162 * The seg->s_mapinfo list contains in reverse order the mappings 8163 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8164 * access the correct entry within this list for the mapping 8165 * requested. 8166 * 8167 * The algorithm for selecting a list entry is as follows: 8168 * 8169 * When start_offset of an entry <= off we have found the entry 8170 * we were looking for. Adjust the dev_offset and map_len (needs 8171 * to be PAGESIZE aligned). 8172 */ 8173 p = seg->s_mapinfo; 8174 for (; p; p = p->next) { 8175 if (p->start_offset <= off) { 8176 *dev_offset = p->dev_offset + off - p->start_offset; 8177 *map_len = (len > p->individual_len) ? 8178 p->individual_len : ptob(btopr(len)); 8179 return (p); 8180 } 8181 p = p->next; 8182 } 8183 8184 return (NULL); 8185 } 8186 8187 static void 8188 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8189 { 8190 rsm_mapinfo_t *p; 8191 8192 while (mapinfo != NULL) { 8193 p = mapinfo; 8194 mapinfo = mapinfo->next; 8195 kmem_free(p, sizeof (*p)); 8196 } 8197 } 8198 8199 static int 8200 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8201 size_t len, void **pvtp) 8202 { 8203 rsmcookie_t *p; 8204 rsmresource_t *res; 8205 rsmseg_t *seg; 8206 minor_t rnum; 8207 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8208 8209 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8210 8211 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8212 "rsmmap_map: dhp = %x\n", dhp)); 8213 8214 flags = flags; 8215 8216 rnum = getminor(dev); 8217 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8218 ASSERT(res != NULL); 8219 8220 seg = (rsmseg_t *)res; 8221 8222 rsmseglock_acquire(seg); 8223 8224 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8225 8226 /* 8227 * Allocate structure and add cookie to segment list 8228 */ 8229 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8230 8231 p->c_dhp = dhp; 8232 p->c_off = off; 8233 p->c_len = len; 8234 p->c_next = seg->s_ckl; 8235 seg->s_ckl = p; 8236 8237 *pvtp = (void *)seg; 8238 8239 rsmseglock_release(seg); 8240 8241 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8242 return (DDI_SUCCESS); 8243 } 8244 8245 /* 8246 * Page fault handling is done here. The prerequisite mapping setup 8247 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8248 * ddi_umem_setup 8249 */ 8250 static int 8251 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8252 uint_t type, uint_t rw) 8253 { 8254 int e; 8255 rsmseg_t *seg = (rsmseg_t *)pvt; 8256 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8257 8258 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8259 8260 rsmseglock_acquire(seg); 8261 8262 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8263 8264 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8265 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8266 DBG_PRINTF((category, RSM_DEBUG, 8267 "rsmmap_access done: cv_wait INTR")); 8268 rsmseglock_release(seg); 8269 return (RSMERR_INTERRUPTED); 8270 } 8271 } 8272 8273 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8274 seg->s_state == RSM_STATE_ACTIVE); 8275 8276 if (seg->s_state == RSM_STATE_DISCONNECT) 8277 seg->s_flags |= RSM_IMPORT_DUMMY; 8278 8279 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8280 "rsmmap_access: dhp = %x\n", dhp)); 8281 8282 rsmseglock_release(seg); 8283 8284 if (e = devmap_load(dhp, offset, len, type, rw)) { 8285 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8286 } 8287 8288 8289 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8290 8291 return (e); 8292 } 8293 8294 static int 8295 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8296 void **newpvt) 8297 { 8298 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8299 rsmcookie_t *p, *old; 8300 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8301 8302 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8303 8304 /* 8305 * Same as map, create an entry to hold cookie and add it to 8306 * connect segment list. The oldpvt is a pointer to segment. 8307 * Return segment pointer in newpvt. 8308 */ 8309 rsmseglock_acquire(seg); 8310 8311 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8312 8313 /* 8314 * Find old cookie 8315 */ 8316 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8317 if (old->c_dhp == dhp) { 8318 break; 8319 } 8320 } 8321 if (old == NULL) { 8322 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8323 "rsmmap_dup done: EINVAL\n")); 8324 rsmseglock_release(seg); 8325 return (EINVAL); 8326 } 8327 8328 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8329 8330 p->c_dhp = new_dhp; 8331 p->c_off = old->c_off; 8332 p->c_len = old->c_len; 8333 p->c_next = seg->s_ckl; 8334 seg->s_ckl = p; 8335 8336 *newpvt = (void *)seg; 8337 8338 rsmseglock_release(seg); 8339 8340 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8341 8342 return (DDI_SUCCESS); 8343 } 8344 8345 static void 8346 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8347 devmap_cookie_t new_dhp1, void **pvtp1, 8348 devmap_cookie_t new_dhp2, void **pvtp2) 8349 { 8350 /* 8351 * Remove pvtp structure from segment list. 8352 */ 8353 rsmseg_t *seg = (rsmseg_t *)pvtp; 8354 int freeflag; 8355 8356 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8357 8358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8359 8360 off = off; len = len; 8361 pvtp1 = pvtp1; pvtp2 = pvtp2; 8362 8363 rsmseglock_acquire(seg); 8364 8365 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8366 8367 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8368 "rsmmap_unmap: dhp = %x\n", dhp)); 8369 /* 8370 * We can go ahead and remove the dhps even if we are in 8371 * the MAPPING state because the dhps being removed here 8372 * belong to a different mmap and we are holding the segment 8373 * lock. 8374 */ 8375 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8376 /* find and remove dhp handle */ 8377 rsmcookie_t *tmp, **back = &seg->s_ckl; 8378 8379 while (*back != NULL) { 8380 tmp = *back; 8381 if (tmp->c_dhp == dhp) { 8382 *back = tmp->c_next; 8383 kmem_free(tmp, sizeof (*tmp)); 8384 break; 8385 } 8386 back = &tmp->c_next; 8387 } 8388 } else { 8389 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8390 "rsmmap_unmap:parital unmap" 8391 "new_dhp1 %lx, new_dhp2 %lx\n", 8392 (size_t)new_dhp1, (size_t)new_dhp2)); 8393 } 8394 8395 /* 8396 * rsmmap_unmap is called for each mapping cookie on the list. 8397 * When the list becomes empty and we are not in the MAPPING 8398 * state then unmap in the rsmpi driver. 8399 */ 8400 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8401 (void) rsm_unmap(seg); 8402 8403 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8404 freeflag = 1; 8405 } else { 8406 freeflag = 0; 8407 } 8408 8409 rsmseglock_release(seg); 8410 8411 if (freeflag) { 8412 /* Free the segment structure */ 8413 rsmseg_free(seg); 8414 } 8415 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8416 8417 } 8418 8419 static struct devmap_callback_ctl rsmmap_ops = { 8420 DEVMAP_OPS_REV, /* devmap_ops version number */ 8421 rsmmap_map, /* devmap_ops map routine */ 8422 rsmmap_access, /* devmap_ops access routine */ 8423 rsmmap_dup, /* devmap_ops dup routine */ 8424 rsmmap_unmap, /* devmap_ops unmap routine */ 8425 }; 8426 8427 static int 8428 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8429 size_t *maplen, uint_t model /*ARGSUSED*/) 8430 { 8431 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8432 int err; 8433 uint_t maxprot; 8434 minor_t rnum; 8435 rsmseg_t *seg; 8436 off_t dev_offset; 8437 size_t cur_len; 8438 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8439 8440 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8441 8442 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8443 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8444 rnum = getminor(dev); 8445 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8446 ASSERT(seg != NULL); 8447 8448 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8449 if ((off == barrier_offset) && 8450 (len == barrier_size)) { 8451 8452 ASSERT(bar_va != NULL && bar_cookie != NULL); 8453 8454 /* 8455 * The offset argument in devmap_umem_setup represents 8456 * the offset within the kernel memory defined by the 8457 * cookie. We use this offset as barrier_offset. 8458 */ 8459 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8460 barrier_offset, len, PROT_USER|PROT_READ, 8461 DEVMAP_DEFAULTS, 0); 8462 8463 if (err != 0) { 8464 DBG_PRINTF((category, RSM_ERR, 8465 "rsm_devmap done: %d\n", err)); 8466 return (RSMERR_MAP_FAILED); 8467 } 8468 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8469 "rsm_devmap done: %d\n", err)); 8470 8471 *maplen = barrier_size; 8472 8473 return (err); 8474 } else { 8475 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8476 "rsm_devmap done: %d\n", err)); 8477 return (RSMERR_MAP_FAILED); 8478 } 8479 } 8480 8481 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8482 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8483 8484 /* 8485 * Make sure we still have permission for the map operation. 8486 */ 8487 maxprot = PROT_USER; 8488 if (seg->s_mode & RSM_PERM_READ) { 8489 maxprot |= PROT_READ; 8490 } 8491 8492 if (seg->s_mode & RSM_PERM_WRITE) { 8493 maxprot |= PROT_WRITE; 8494 } 8495 8496 /* 8497 * For each devmap call, rsmmap_map is called. This maintains driver 8498 * private information for the mapping. Thus, if there are multiple 8499 * devmap calls there will be multiple rsmmap_map calls and for each 8500 * call, the mapping information will be stored. 8501 * In case of an error during the processing of the devmap call, error 8502 * will be returned. This error return causes the caller of rsm_devmap 8503 * to undo all the mappings by calling rsmmap_unmap for each one. 8504 * rsmmap_unmap will free up the private information for the requested 8505 * mapping. 8506 */ 8507 if (seg->s_node != my_nodeid) { 8508 rsm_mapinfo_t *p; 8509 8510 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8511 if (p == NULL) { 8512 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8513 "rsm_devmap: incorrect mapping info\n")); 8514 return (RSMERR_MAP_FAILED); 8515 } 8516 err = devmap_devmem_setup(dhc, p->dip, 8517 callbackops, p->dev_register, 8518 dev_offset, cur_len, maxprot, 8519 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8520 8521 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8522 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8523 "off=%lx,len=%lx\n", 8524 p->dip, p->dev_register, dev_offset, off, cur_len)); 8525 8526 if (err != 0) { 8527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8528 "rsm_devmap: devmap_devmem_setup failed %d\n", 8529 err)); 8530 return (RSMERR_MAP_FAILED); 8531 } 8532 /* cur_len is always an integral multiple pagesize */ 8533 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8534 *maplen = cur_len; 8535 return (err); 8536 8537 } else { 8538 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8539 seg->s_cookie, off, len, maxprot, 8540 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8541 if (err != 0) { 8542 DBG_PRINTF((category, RSM_DEBUG, 8543 "rsm_devmap: devmap_umem_setup failed %d\n", 8544 err)); 8545 return (RSMERR_MAP_FAILED); 8546 } 8547 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8548 "rsm_devmap: loopback done\n")); 8549 8550 *maplen = ptob(btopr(len)); 8551 8552 return (err); 8553 } 8554 } 8555 8556 /* 8557 * We can use the devmap framework for mapping device memory to user space by 8558 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8559 * processing calls this entry point and devmap_setup is called within this 8560 * function, which eventually calls rsm_devmap 8561 */ 8562 static int 8563 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8564 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8565 { 8566 int error = 0; 8567 int old_state; 8568 minor_t rnum; 8569 rsmseg_t *seg, *eseg; 8570 adapter_t *adapter; 8571 rsm_import_share_t *sharedp; 8572 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8573 8574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8575 8576 /* 8577 * find segment 8578 */ 8579 rnum = getminor(dev); 8580 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8581 8582 if (seg == NULL) { 8583 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8584 "rsm_segmap done: invalid segment\n")); 8585 return (EINVAL); 8586 } 8587 8588 /* 8589 * the user is trying to map a resource that has not been 8590 * defined yet. The library uses this to map in the 8591 * barrier page. 8592 */ 8593 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8594 rsmseglock_release(seg); 8595 8596 /* 8597 * The mapping for the barrier page is identified 8598 * by the special offset barrier_offset 8599 */ 8600 8601 if (off == (off_t)barrier_offset || 8602 len == (off_t)barrier_size) { 8603 if (bar_cookie == NULL || bar_va == NULL) { 8604 DBG_PRINTF((category, RSM_DEBUG, 8605 "rsm_segmap: bar cookie/va is NULL\n")); 8606 return (EINVAL); 8607 } 8608 8609 error = devmap_setup(dev, (offset_t)off, as, addrp, 8610 (size_t)len, prot, maxprot, flags, cred); 8611 8612 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8613 "rsm_segmap done: %d\n", error)); 8614 return (error); 8615 } else { 8616 DBG_PRINTF((category, RSM_DEBUG, 8617 "rsm_segmap: bad offset/length\n")); 8618 return (EINVAL); 8619 } 8620 } 8621 8622 /* Make sure you can only map imported segments */ 8623 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8624 rsmseglock_release(seg); 8625 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8626 "rsm_segmap done: not an import segment\n")); 8627 return (EINVAL); 8628 } 8629 /* check means library is broken */ 8630 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8631 8632 /* wait for the segment to become unquiesced */ 8633 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8634 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8635 rsmseglock_release(seg); 8636 DBG_PRINTF((category, RSM_DEBUG, 8637 "rsm_segmap done: cv_wait INTR")); 8638 return (ENODEV); 8639 } 8640 } 8641 8642 /* wait until segment leaves the mapping state */ 8643 while (seg->s_state == RSM_STATE_MAPPING) 8644 cv_wait(&seg->s_cv, &seg->s_lock); 8645 8646 /* 8647 * we allow multiple maps of the same segment in the KA 8648 * and it works because we do an rsmpi map of the whole 8649 * segment during the first map and all the device mapping 8650 * information needed in rsm_devmap is in the mapinfo list. 8651 */ 8652 if ((seg->s_state != RSM_STATE_CONNECT) && 8653 (seg->s_state != RSM_STATE_ACTIVE)) { 8654 rsmseglock_release(seg); 8655 DBG_PRINTF((category, RSM_DEBUG, 8656 "rsm_segmap done: segment not connected\n")); 8657 return (ENODEV); 8658 } 8659 8660 /* 8661 * Make sure we are not mapping a larger segment than what's 8662 * exported 8663 */ 8664 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8665 rsmseglock_release(seg); 8666 DBG_PRINTF((category, RSM_DEBUG, 8667 "rsm_segmap done: off+len>seg size\n")); 8668 return (ENXIO); 8669 } 8670 8671 /* 8672 * Make sure we still have permission for the map operation. 8673 */ 8674 maxprot = PROT_USER; 8675 if (seg->s_mode & RSM_PERM_READ) { 8676 maxprot |= PROT_READ; 8677 } 8678 8679 if (seg->s_mode & RSM_PERM_WRITE) { 8680 maxprot |= PROT_WRITE; 8681 } 8682 8683 if ((prot & maxprot) != prot) { 8684 /* No permission */ 8685 rsmseglock_release(seg); 8686 DBG_PRINTF((category, RSM_DEBUG, 8687 "rsm_segmap done: no permission\n")); 8688 return (EACCES); 8689 } 8690 8691 old_state = seg->s_state; 8692 8693 ASSERT(seg->s_share != NULL); 8694 8695 rsmsharelock_acquire(seg); 8696 8697 sharedp = seg->s_share; 8698 8699 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8700 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8701 8702 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8703 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8704 rsmsharelock_release(seg); 8705 rsmseglock_release(seg); 8706 DBG_PRINTF((category, RSM_DEBUG, 8707 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8708 sharedp->rsmsi_state)); 8709 return (ENODEV); 8710 } 8711 8712 /* 8713 * Do the map - since we want importers to share mappings 8714 * we do the rsmpi map for the whole segment 8715 */ 8716 if (seg->s_node != my_nodeid) { 8717 uint_t dev_register; 8718 off_t dev_offset; 8719 dev_info_t *dip; 8720 size_t tmp_len; 8721 size_t total_length_mapped = 0; 8722 size_t length_to_map = seg->s_len; 8723 off_t tmp_off = 0; 8724 rsm_mapinfo_t *p; 8725 8726 /* 8727 * length_to_map = seg->s_len is always an integral 8728 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8729 * list is a multiple of PAGESIZE - RSMPI map ensures this 8730 */ 8731 8732 adapter = seg->s_adapter; 8733 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8734 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8735 8736 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8737 error = 0; 8738 /* map the whole segment */ 8739 while (total_length_mapped < seg->s_len) { 8740 tmp_len = 0; 8741 8742 error = adapter->rsmpi_ops->rsm_map( 8743 seg->s_handle.in, tmp_off, 8744 length_to_map, &tmp_len, 8745 &dip, &dev_register, &dev_offset, 8746 NULL, NULL); 8747 8748 if (error != 0) 8749 break; 8750 8751 /* 8752 * Store the mapping info obtained from rsm_map 8753 */ 8754 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8755 p->dev_register = dev_register; 8756 p->dev_offset = dev_offset; 8757 p->dip = dip; 8758 p->individual_len = tmp_len; 8759 p->start_offset = tmp_off; 8760 p->next = sharedp->rsmsi_mapinfo; 8761 sharedp->rsmsi_mapinfo = p; 8762 8763 total_length_mapped += tmp_len; 8764 length_to_map -= tmp_len; 8765 tmp_off += tmp_len; 8766 } 8767 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8768 8769 if (error != RSM_SUCCESS) { 8770 /* Check if this is the the first rsm_map */ 8771 if (sharedp->rsmsi_mapinfo != NULL) { 8772 /* 8773 * A single rsm_unmap undoes 8774 * multiple rsm_maps. 8775 */ 8776 (void) seg->s_adapter->rsmpi_ops-> 8777 rsm_unmap(sharedp->rsmsi_handle); 8778 rsm_free_mapinfo(sharedp-> 8779 rsmsi_mapinfo); 8780 } 8781 sharedp->rsmsi_mapinfo = NULL; 8782 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8783 rsmsharelock_release(seg); 8784 rsmseglock_release(seg); 8785 DBG_PRINTF((category, RSM_DEBUG, 8786 "rsm_segmap done: rsmpi map err %d\n", 8787 error)); 8788 ASSERT(error != RSMERR_BAD_LENGTH && 8789 error != RSMERR_BAD_MEM_ALIGNMENT && 8790 error != RSMERR_BAD_SEG_HNDL); 8791 if (error == RSMERR_UNSUPPORTED_OPERATION) 8792 return (ENOTSUP); 8793 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8794 return (EAGAIN); 8795 else if (error == RSMERR_CONN_ABORTED) 8796 return (ENODEV); 8797 else 8798 return (error); 8799 } else { 8800 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8801 } 8802 } else { 8803 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8804 } 8805 8806 sharedp->rsmsi_mapcnt++; 8807 8808 rsmsharelock_release(seg); 8809 8810 /* move to an intermediate mapping state */ 8811 seg->s_state = RSM_STATE_MAPPING; 8812 rsmseglock_release(seg); 8813 8814 error = devmap_setup(dev, (offset_t)off, as, addrp, 8815 len, prot, maxprot, flags, cred); 8816 8817 rsmseglock_acquire(seg); 8818 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8819 8820 if (error == DDI_SUCCESS) { 8821 seg->s_state = RSM_STATE_ACTIVE; 8822 } else { 8823 rsmsharelock_acquire(seg); 8824 8825 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8826 8827 sharedp->rsmsi_mapcnt--; 8828 if (sharedp->rsmsi_mapcnt == 0) { 8829 /* unmap the shared RSMPI mapping */ 8830 ASSERT(sharedp->rsmsi_handle != NULL); 8831 (void) adapter->rsmpi_ops-> 8832 rsm_unmap(sharedp->rsmsi_handle); 8833 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8834 sharedp->rsmsi_mapinfo = NULL; 8835 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8836 } 8837 8838 rsmsharelock_release(seg); 8839 seg->s_state = old_state; 8840 DBG_PRINTF((category, RSM_ERR, 8841 "rsm: devmap_setup failed %d\n", error)); 8842 } 8843 cv_broadcast(&seg->s_cv); 8844 rsmseglock_release(seg); 8845 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8846 error)); 8847 return (error); 8848 } else { 8849 /* 8850 * For loopback, the export segment mapping cookie (s_cookie) 8851 * is also used as the s_cookie value for its import segments 8852 * during mapping. 8853 * Note that reference counting for s_cookie of the export 8854 * segment is not required due to the following: 8855 * We never have a case of the export segment being destroyed, 8856 * leaving the import segments with a stale value for the 8857 * s_cookie field, since a force disconnect is done prior to a 8858 * destroy of an export segment. The force disconnect causes 8859 * the s_cookie value to be reset to NULL. Also for the 8860 * rsm_rebind operation, we change the s_cookie value of the 8861 * export segment as well as of all its local (loopback) 8862 * importers. 8863 */ 8864 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8865 8866 rsmsharelock_release(seg); 8867 /* 8868 * In order to maintain the lock ordering between the export 8869 * and import segment locks, we need to acquire the export 8870 * segment lock first and only then acquire the import 8871 * segment lock. 8872 * The above is necessary to avoid any deadlock scenarios 8873 * with rsm_rebind which also acquires both the export 8874 * and import segment locks in the above mentioned order. 8875 * Based on code inspection, there seem to be no other 8876 * situations in which both the export and import segment 8877 * locks are acquired either in the same or opposite order 8878 * as mentioned above. 8879 * Thus in order to conform to the above lock order, we 8880 * need to change the state of the import segment to 8881 * RSM_STATE_MAPPING, release the lock. Once this is done we 8882 * can now safely acquire the export segment lock first 8883 * followed by the import segment lock which is as per 8884 * the lock order mentioned above. 8885 */ 8886 /* move to an intermediate mapping state */ 8887 seg->s_state = RSM_STATE_MAPPING; 8888 rsmseglock_release(seg); 8889 8890 eseg = rsmexport_lookup(seg->s_key); 8891 8892 if (eseg == NULL) { 8893 rsmseglock_acquire(seg); 8894 /* 8895 * Revert to old_state and signal any waiters 8896 * The shared state is not changed 8897 */ 8898 8899 seg->s_state = old_state; 8900 cv_broadcast(&seg->s_cv); 8901 rsmseglock_release(seg); 8902 DBG_PRINTF((category, RSM_DEBUG, 8903 "rsm_segmap done: key %d not found\n", seg->s_key)); 8904 return (ENODEV); 8905 } 8906 8907 rsmsharelock_acquire(seg); 8908 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8909 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8910 8911 sharedp->rsmsi_mapcnt++; 8912 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8913 rsmsharelock_release(seg); 8914 8915 ASSERT(eseg->s_cookie != NULL); 8916 8917 /* 8918 * It is not required or necessary to acquire the import 8919 * segment lock here to change the value of s_cookie since 8920 * no one will touch the import segment as long as it is 8921 * in the RSM_STATE_MAPPING state. 8922 */ 8923 seg->s_cookie = eseg->s_cookie; 8924 8925 rsmseglock_release(eseg); 8926 8927 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8928 prot, maxprot, flags, cred); 8929 8930 rsmseglock_acquire(seg); 8931 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8932 if (error == 0) { 8933 seg->s_state = RSM_STATE_ACTIVE; 8934 } else { 8935 rsmsharelock_acquire(seg); 8936 8937 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8938 8939 sharedp->rsmsi_mapcnt--; 8940 if (sharedp->rsmsi_mapcnt == 0) { 8941 sharedp->rsmsi_mapinfo = NULL; 8942 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8943 } 8944 rsmsharelock_release(seg); 8945 seg->s_state = old_state; 8946 seg->s_cookie = NULL; 8947 } 8948 cv_broadcast(&seg->s_cv); 8949 rsmseglock_release(seg); 8950 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8951 "rsm_segmap done: %d\n", error)); 8952 return (error); 8953 } 8954 } 8955 8956 int 8957 rsmka_null_seg_create( 8958 rsm_controller_handle_t argcp, 8959 rsm_memseg_export_handle_t *handle, 8960 size_t size, 8961 uint_t flags, 8962 rsm_memory_local_t *memory, 8963 rsm_resource_callback_t callback, 8964 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8965 { 8966 return (RSM_SUCCESS); 8967 } 8968 8969 8970 int 8971 rsmka_null_seg_destroy( 8972 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8973 { 8974 return (RSM_SUCCESS); 8975 } 8976 8977 8978 int 8979 rsmka_null_bind( 8980 rsm_memseg_export_handle_t argmemseg, 8981 off_t offset, 8982 rsm_memory_local_t *argmemory, 8983 rsm_resource_callback_t callback, 8984 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8985 { 8986 return (RSM_SUCCESS); 8987 } 8988 8989 8990 int 8991 rsmka_null_unbind( 8992 rsm_memseg_export_handle_t argmemseg, 8993 off_t offset, 8994 size_t length /*ARGSUSED*/) 8995 { 8996 return (DDI_SUCCESS); 8997 } 8998 8999 int 9000 rsmka_null_rebind( 9001 rsm_memseg_export_handle_t argmemseg, 9002 off_t offset, 9003 rsm_memory_local_t *memory, 9004 rsm_resource_callback_t callback, 9005 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9006 { 9007 return (RSM_SUCCESS); 9008 } 9009 9010 int 9011 rsmka_null_publish( 9012 rsm_memseg_export_handle_t argmemseg, 9013 rsm_access_entry_t access_list[], 9014 uint_t access_list_length, 9015 rsm_memseg_id_t segment_id, 9016 rsm_resource_callback_t callback, 9017 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9018 { 9019 return (RSM_SUCCESS); 9020 } 9021 9022 9023 int 9024 rsmka_null_republish( 9025 rsm_memseg_export_handle_t memseg, 9026 rsm_access_entry_t access_list[], 9027 uint_t access_list_length, 9028 rsm_resource_callback_t callback, 9029 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9030 { 9031 return (RSM_SUCCESS); 9032 } 9033 9034 int 9035 rsmka_null_unpublish( 9036 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9037 { 9038 return (RSM_SUCCESS); 9039 } 9040 9041 9042 void 9043 rsmka_init_loopback() 9044 { 9045 rsm_ops_t *ops = &null_rsmpi_ops; 9046 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9047 9048 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9049 "rsmka_init_loopback enter\n")); 9050 9051 /* initialize null ops vector */ 9052 ops->rsm_seg_create = rsmka_null_seg_create; 9053 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9054 ops->rsm_bind = rsmka_null_bind; 9055 ops->rsm_unbind = rsmka_null_unbind; 9056 ops->rsm_rebind = rsmka_null_rebind; 9057 ops->rsm_publish = rsmka_null_publish; 9058 ops->rsm_unpublish = rsmka_null_unpublish; 9059 ops->rsm_republish = rsmka_null_republish; 9060 9061 /* initialize attributes for loopback adapter */ 9062 loopback_attr.attr_name = loopback_str; 9063 loopback_attr.attr_page_size = 0x8; /* 8K */ 9064 9065 /* initialize loopback adapter */ 9066 loopback_adapter.rsm_attr = loopback_attr; 9067 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9069 "rsmka_init_loopback done\n")); 9070 } 9071 9072 /* ************** DR functions ********************************** */ 9073 static void 9074 rsm_quiesce_exp_seg(rsmresource_t *resp) 9075 { 9076 int recheck_state; 9077 rsmseg_t *segp = (rsmseg_t *)resp; 9078 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9079 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9080 9081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9082 "%s enter: key=%u\n", function, segp->s_key)); 9083 9084 rsmseglock_acquire(segp); 9085 do { 9086 recheck_state = 0; 9087 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9088 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9089 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9090 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9091 rsmseglock_release(segp); 9092 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9093 "%s done:state =%d\n", function, 9094 segp->s_state)); 9095 return; 9096 } 9097 9098 if (segp->s_state == RSM_STATE_NEW) { 9099 segp->s_state = RSM_STATE_NEW_QUIESCED; 9100 rsmseglock_release(segp); 9101 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9102 "%s done:state =%d\n", function, 9103 segp->s_state)); 9104 return; 9105 } 9106 9107 if (segp->s_state == RSM_STATE_BIND) { 9108 /* unbind */ 9109 (void) rsm_unbind_pages(segp); 9110 segp->s_state = RSM_STATE_BIND_QUIESCED; 9111 rsmseglock_release(segp); 9112 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9113 "%s done:state =%d\n", function, 9114 segp->s_state)); 9115 return; 9116 } 9117 9118 if (segp->s_state == RSM_STATE_EXPORT) { 9119 /* 9120 * wait for putv/getv to complete if the segp is 9121 * a local memory handle 9122 */ 9123 while ((segp->s_state == RSM_STATE_EXPORT) && 9124 (segp->s_rdmacnt != 0)) { 9125 cv_wait(&segp->s_cv, &segp->s_lock); 9126 } 9127 9128 if (segp->s_state != RSM_STATE_EXPORT) { 9129 /* 9130 * state changed need to see what it 9131 * should be changed to. 9132 */ 9133 recheck_state = 1; 9134 continue; 9135 } 9136 9137 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9138 rsmseglock_release(segp); 9139 /* 9140 * send SUSPEND messages - currently it will be 9141 * done at the end 9142 */ 9143 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9144 "%s done:state =%d\n", function, 9145 segp->s_state)); 9146 return; 9147 } 9148 } while (recheck_state); 9149 9150 rsmseglock_release(segp); 9151 } 9152 9153 static void 9154 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9155 { 9156 int ret; 9157 rsmseg_t *segp = (rsmseg_t *)resp; 9158 rsmapi_access_entry_t *acl; 9159 rsm_access_entry_t *rsmpi_acl; 9160 int acl_len; 9161 int create_flags = 0; 9162 struct buf *xbuf; 9163 rsm_memory_local_t mem; 9164 adapter_t *adapter; 9165 dev_t sdev = 0; 9166 rsm_resource_callback_t callback_flag; 9167 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9168 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9169 9170 rsmseglock_acquire(segp); 9171 9172 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9173 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9174 segp->s_state)); 9175 9176 if ((segp->s_state == RSM_STATE_NEW) || 9177 (segp->s_state == RSM_STATE_BIND) || 9178 (segp->s_state == RSM_STATE_EXPORT)) { 9179 rsmseglock_release(segp); 9180 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9181 function, segp->s_state)); 9182 return; 9183 } 9184 9185 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9186 segp->s_state = RSM_STATE_NEW; 9187 cv_broadcast(&segp->s_cv); 9188 rsmseglock_release(segp); 9189 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9190 function, segp->s_state)); 9191 return; 9192 } 9193 9194 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9195 /* bind the segment */ 9196 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9197 segp->s_len, segp->s_proc); 9198 if (ret == RSM_SUCCESS) { /* bind successful */ 9199 segp->s_state = RSM_STATE_BIND; 9200 } else { /* bind failed - resource unavailable */ 9201 segp->s_state = RSM_STATE_NEW; 9202 } 9203 cv_broadcast(&segp->s_cv); 9204 rsmseglock_release(segp); 9205 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9206 "%s done: bind_qscd bind = %d\n", function, ret)); 9207 return; 9208 } 9209 9210 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9211 /* wait for the segment to move to EXPORT_QUIESCED state */ 9212 cv_wait(&segp->s_cv, &segp->s_lock); 9213 } 9214 9215 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9216 /* bind the segment */ 9217 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9218 segp->s_len, segp->s_proc); 9219 9220 if (ret != RSM_SUCCESS) { 9221 /* bind failed - resource unavailable */ 9222 acl_len = segp->s_acl_len; 9223 acl = segp->s_acl; 9224 rsmpi_acl = segp->s_acl_in; 9225 segp->s_acl_len = 0; 9226 segp->s_acl = NULL; 9227 segp->s_acl_in = NULL; 9228 rsmseglock_release(segp); 9229 9230 rsmexport_rm(segp); 9231 rsmacl_free(acl, acl_len); 9232 rsmpiacl_free(rsmpi_acl, acl_len); 9233 9234 rsmseglock_acquire(segp); 9235 segp->s_state = RSM_STATE_NEW; 9236 cv_broadcast(&segp->s_cv); 9237 rsmseglock_release(segp); 9238 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9239 "%s done: exp_qscd bind failed = %d\n", 9240 function, ret)); 9241 return; 9242 } 9243 /* 9244 * publish the segment 9245 * if successful 9246 * segp->s_state = RSM_STATE_EXPORT; 9247 * else failed 9248 * segp->s_state = RSM_STATE_BIND; 9249 */ 9250 9251 /* check whether it is a local_memory_handle */ 9252 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9253 if ((segp->s_acl[0].ae_node == my_nodeid) && 9254 (segp->s_acl[0].ae_permission == 0)) { 9255 segp->s_state = RSM_STATE_EXPORT; 9256 cv_broadcast(&segp->s_cv); 9257 rsmseglock_release(segp); 9258 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9259 "%s done:exp_qscd\n", function)); 9260 return; 9261 } 9262 } 9263 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9264 sdev, 0, NULL, DDI_UMEM_SLEEP); 9265 ASSERT(xbuf != NULL); 9266 9267 mem.ms_type = RSM_MEM_BUF; 9268 mem.ms_bp = xbuf; 9269 9270 adapter = segp->s_adapter; 9271 9272 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9273 create_flags = RSM_ALLOW_UNBIND_REBIND; 9274 } 9275 9276 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9277 callback_flag = RSM_RESOURCE_DONTWAIT; 9278 } else { 9279 callback_flag = RSM_RESOURCE_SLEEP; 9280 } 9281 9282 ret = adapter->rsmpi_ops->rsm_seg_create( 9283 adapter->rsmpi_handle, &segp->s_handle.out, 9284 segp->s_len, create_flags, &mem, 9285 callback_flag, NULL); 9286 9287 if (ret != RSM_SUCCESS) { 9288 acl_len = segp->s_acl_len; 9289 acl = segp->s_acl; 9290 rsmpi_acl = segp->s_acl_in; 9291 segp->s_acl_len = 0; 9292 segp->s_acl = NULL; 9293 segp->s_acl_in = NULL; 9294 rsmseglock_release(segp); 9295 9296 rsmexport_rm(segp); 9297 rsmacl_free(acl, acl_len); 9298 rsmpiacl_free(rsmpi_acl, acl_len); 9299 9300 rsmseglock_acquire(segp); 9301 segp->s_state = RSM_STATE_BIND; 9302 cv_broadcast(&segp->s_cv); 9303 rsmseglock_release(segp); 9304 DBG_PRINTF((category, RSM_ERR, 9305 "%s done: exp_qscd create failed = %d\n", 9306 function, ret)); 9307 return; 9308 } 9309 9310 ret = adapter->rsmpi_ops->rsm_publish( 9311 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9312 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9313 9314 if (ret != RSM_SUCCESS) { 9315 acl_len = segp->s_acl_len; 9316 acl = segp->s_acl; 9317 rsmpi_acl = segp->s_acl_in; 9318 segp->s_acl_len = 0; 9319 segp->s_acl = NULL; 9320 segp->s_acl_in = NULL; 9321 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9322 rsmseglock_release(segp); 9323 9324 rsmexport_rm(segp); 9325 rsmacl_free(acl, acl_len); 9326 rsmpiacl_free(rsmpi_acl, acl_len); 9327 9328 rsmseglock_acquire(segp); 9329 segp->s_state = RSM_STATE_BIND; 9330 cv_broadcast(&segp->s_cv); 9331 rsmseglock_release(segp); 9332 DBG_PRINTF((category, RSM_ERR, 9333 "%s done: exp_qscd publish failed = %d\n", 9334 function, ret)); 9335 return; 9336 } 9337 9338 segp->s_state = RSM_STATE_EXPORT; 9339 cv_broadcast(&segp->s_cv); 9340 rsmseglock_release(segp); 9341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9342 function)); 9343 return; 9344 } 9345 9346 rsmseglock_release(segp); 9347 9348 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9349 } 9350 9351 static void 9352 rsm_quiesce_imp_seg(rsmresource_t *resp) 9353 { 9354 rsmseg_t *segp = (rsmseg_t *)resp; 9355 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9356 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9357 9358 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9359 "%s enter: key=%u\n", function, segp->s_key)); 9360 9361 rsmseglock_acquire(segp); 9362 segp->s_flags |= RSM_DR_INPROGRESS; 9363 9364 while (segp->s_rdmacnt != 0) { 9365 /* wait for the RDMA to complete */ 9366 cv_wait(&segp->s_cv, &segp->s_lock); 9367 } 9368 9369 rsmseglock_release(segp); 9370 9371 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9372 9373 } 9374 9375 static void 9376 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9377 { 9378 rsmseg_t *segp = (rsmseg_t *)resp; 9379 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9380 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9381 9382 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9383 "%s enter: key=%u\n", function, segp->s_key)); 9384 9385 rsmseglock_acquire(segp); 9386 9387 segp->s_flags &= ~RSM_DR_INPROGRESS; 9388 /* wake up any waiting putv/getv ops */ 9389 cv_broadcast(&segp->s_cv); 9390 9391 rsmseglock_release(segp); 9392 9393 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9394 9395 9396 } 9397 9398 static void 9399 rsm_process_exp_seg(rsmresource_t *resp, int event) 9400 { 9401 if (event == RSM_DR_QUIESCE) 9402 rsm_quiesce_exp_seg(resp); 9403 else /* UNQUIESCE */ 9404 rsm_unquiesce_exp_seg(resp); 9405 } 9406 9407 static void 9408 rsm_process_imp_seg(rsmresource_t *resp, int event) 9409 { 9410 if (event == RSM_DR_QUIESCE) 9411 rsm_quiesce_imp_seg(resp); 9412 else /* UNQUIESCE */ 9413 rsm_unquiesce_imp_seg(resp); 9414 } 9415 9416 static void 9417 rsm_dr_process_local_segments(int event) 9418 { 9419 9420 int i, j; 9421 rsmresource_blk_t *blk; 9422 rsmresource_t *p; 9423 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9424 9425 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9426 "rsm_dr_process_local_segments enter\n")); 9427 9428 /* iterate through the resource structure */ 9429 9430 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9431 9432 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9433 blk = rsm_resource.rsmrc_root[i]; 9434 if (blk != NULL) { 9435 for (j = 0; j < RSMRC_BLKSZ; j++) { 9436 p = blk->rsmrcblk_blks[j]; 9437 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9438 /* valid resource */ 9439 if (p->rsmrc_type == 9440 RSM_RESOURCE_EXPORT_SEGMENT) 9441 rsm_process_exp_seg(p, event); 9442 else if (p->rsmrc_type == 9443 RSM_RESOURCE_IMPORT_SEGMENT) 9444 rsm_process_imp_seg(p, event); 9445 } 9446 } 9447 } 9448 } 9449 9450 rw_exit(&rsm_resource.rsmrc_lock); 9451 9452 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9453 "rsm_dr_process_local_segments done\n")); 9454 } 9455 9456 /* *************** DR callback functions ************ */ 9457 static void 9458 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9459 { 9460 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9461 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9462 "rsm_dr_callback_post_add is a no-op\n")); 9463 /* Noop */ 9464 } 9465 9466 static int 9467 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9468 { 9469 int recheck_state = 0; 9470 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9471 9472 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9473 "rsm_dr_callback_pre_del enter\n")); 9474 9475 mutex_enter(&rsm_drv_data.drv_lock); 9476 9477 do { 9478 recheck_state = 0; 9479 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9480 "rsm_dr_callback_pre_del:state=%d\n", 9481 rsm_drv_data.drv_state)); 9482 9483 switch (rsm_drv_data.drv_state) { 9484 case RSM_DRV_NEW: 9485 /* 9486 * The state should usually never be RSM_DRV_NEW 9487 * since in this state the callbacks have not yet 9488 * been registered. So, ASSERT. 9489 */ 9490 ASSERT(0); 9491 return (0); 9492 case RSM_DRV_REG_PROCESSING: 9493 /* 9494 * The driver is in the process of registering 9495 * with the DR framework. So, wait till the 9496 * registration process is complete. 9497 */ 9498 recheck_state = 1; 9499 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9500 break; 9501 case RSM_DRV_UNREG_PROCESSING: 9502 /* 9503 * If the state is RSM_DRV_UNREG_PROCESSING, the 9504 * module is in the process of detaching and 9505 * unregistering the callbacks from the DR 9506 * framework. So, simply return. 9507 */ 9508 mutex_exit(&rsm_drv_data.drv_lock); 9509 DBG_PRINTF((category, RSM_DEBUG, 9510 "rsm_dr_callback_pre_del:" 9511 "pre-del on NEW/UNREG\n")); 9512 return (0); 9513 case RSM_DRV_OK: 9514 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9515 break; 9516 case RSM_DRV_PREDEL_STARTED: 9517 /* FALLTHRU */ 9518 case RSM_DRV_PREDEL_COMPLETED: 9519 /* FALLTHRU */ 9520 case RSM_DRV_POSTDEL_IN_PROGRESS: 9521 recheck_state = 1; 9522 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9523 break; 9524 case RSM_DRV_DR_IN_PROGRESS: 9525 rsm_drv_data.drv_memdel_cnt++; 9526 mutex_exit(&rsm_drv_data.drv_lock); 9527 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9528 "rsm_dr_callback_pre_del done\n")); 9529 return (0); 9530 /* break; */ 9531 default: 9532 ASSERT(0); 9533 break; 9534 } 9535 9536 } while (recheck_state); 9537 9538 rsm_drv_data.drv_memdel_cnt++; 9539 9540 mutex_exit(&rsm_drv_data.drv_lock); 9541 9542 /* Do all the quiescing stuff here */ 9543 DBG_PRINTF((category, RSM_DEBUG, 9544 "rsm_dr_callback_pre_del: quiesce things now\n")); 9545 9546 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9547 9548 /* 9549 * now that all local segments have been quiesced lets inform 9550 * the importers 9551 */ 9552 rsm_send_suspend(); 9553 9554 /* 9555 * In response to the suspend message the remote node(s) will process 9556 * the segments and send a suspend_complete message. Till all 9557 * the nodes send the suspend_complete message we wait in the 9558 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9559 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9560 */ 9561 mutex_enter(&rsm_drv_data.drv_lock); 9562 9563 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9564 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9565 } 9566 9567 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9568 9569 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9570 cv_broadcast(&rsm_drv_data.drv_cv); 9571 9572 mutex_exit(&rsm_drv_data.drv_lock); 9573 9574 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9575 "rsm_dr_callback_pre_del done\n")); 9576 9577 return (0); 9578 } 9579 9580 static void 9581 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9582 { 9583 int recheck_state = 0; 9584 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9585 9586 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9587 "rsm_dr_callback_post_del enter\n")); 9588 9589 mutex_enter(&rsm_drv_data.drv_lock); 9590 9591 do { 9592 recheck_state = 0; 9593 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9594 "rsm_dr_callback_post_del:state=%d\n", 9595 rsm_drv_data.drv_state)); 9596 9597 switch (rsm_drv_data.drv_state) { 9598 case RSM_DRV_NEW: 9599 /* 9600 * The driver state cannot not be RSM_DRV_NEW 9601 * since in this state the callbacks have not 9602 * yet been registered. 9603 */ 9604 ASSERT(0); 9605 return; 9606 case RSM_DRV_REG_PROCESSING: 9607 /* 9608 * The driver is in the process of registering with 9609 * the DR framework. Wait till the registration is 9610 * complete. 9611 */ 9612 recheck_state = 1; 9613 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9614 break; 9615 case RSM_DRV_UNREG_PROCESSING: 9616 /* 9617 * RSM_DRV_UNREG_PROCESSING state means the module 9618 * is detaching and unregistering the callbacks 9619 * from the DR framework. So simply return. 9620 */ 9621 /* FALLTHRU */ 9622 case RSM_DRV_OK: 9623 /* 9624 * RSM_DRV_OK means we missed the pre-del 9625 * corresponding to this post-del coz we had not 9626 * registered yet, so simply return. 9627 */ 9628 mutex_exit(&rsm_drv_data.drv_lock); 9629 DBG_PRINTF((category, RSM_DEBUG, 9630 "rsm_dr_callback_post_del:" 9631 "post-del on OK/UNREG\n")); 9632 return; 9633 /* break; */ 9634 case RSM_DRV_PREDEL_STARTED: 9635 /* FALLTHRU */ 9636 case RSM_DRV_PREDEL_COMPLETED: 9637 /* FALLTHRU */ 9638 case RSM_DRV_POSTDEL_IN_PROGRESS: 9639 recheck_state = 1; 9640 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9641 break; 9642 case RSM_DRV_DR_IN_PROGRESS: 9643 rsm_drv_data.drv_memdel_cnt--; 9644 if (rsm_drv_data.drv_memdel_cnt > 0) { 9645 mutex_exit(&rsm_drv_data.drv_lock); 9646 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9647 "rsm_dr_callback_post_del done:\n")); 9648 return; 9649 } 9650 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9651 break; 9652 default: 9653 ASSERT(0); 9654 return; 9655 /* break; */ 9656 } 9657 } while (recheck_state); 9658 9659 mutex_exit(&rsm_drv_data.drv_lock); 9660 9661 /* Do all the unquiescing stuff here */ 9662 DBG_PRINTF((category, RSM_DEBUG, 9663 "rsm_dr_callback_post_del: unquiesce things now\n")); 9664 9665 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9666 9667 /* 9668 * now that all local segments have been unquiesced lets inform 9669 * the importers 9670 */ 9671 rsm_send_resume(); 9672 9673 mutex_enter(&rsm_drv_data.drv_lock); 9674 9675 rsm_drv_data.drv_state = RSM_DRV_OK; 9676 9677 cv_broadcast(&rsm_drv_data.drv_cv); 9678 9679 mutex_exit(&rsm_drv_data.drv_lock); 9680 9681 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9682 "rsm_dr_callback_post_del done\n")); 9683 9684 return; 9685 9686 } 9687