1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Overview of the RSM Kernel Agent: 30 * --------------------------------- 31 * 32 * rsm.c constitutes the implementation of the RSM kernel agent. The RSM 33 * kernel agent is a pseudo device driver which makes use of the RSMPI 34 * interface on behalf of the RSMAPI user library. 35 * 36 * The kernel agent functionality can be categorized into the following 37 * components: 38 * 1. Driver Infrastructure 39 * 2. Export/Import Segment Management 40 * 3. Internal resource allocation/deallocation 41 * 42 * The driver infrastructure includes the basic module loading entry points 43 * like _init, _info, _fini to load, unload and report information about 44 * the driver module. The driver infrastructure also includes the 45 * autoconfiguration entry points namely, attach, detach and getinfo for 46 * the device autoconfiguration. 47 * 48 * The kernel agent is a pseudo character device driver and exports 49 * a cb_ops structure which defines the driver entry points for character 50 * device access. This includes the open and close entry points. The 51 * other entry points provided include ioctl, devmap and segmap and chpoll. 52 * read and write entry points are not used since the device is memory 53 * mapped. Also ddi_prop_op is used for the prop_op entry point. 54 * 55 * The ioctl entry point supports a number of commands, which are used by 56 * the RSMAPI library in order to export and import segments. These 57 * commands include commands for binding and rebinding the physical pages 58 * allocated to the virtual address range, publishing the export segment, 59 * unpublishing and republishing an export segment, creating an 60 * import segment and a virtual connection from this import segment to 61 * an export segment, performing scatter-gather data transfer, barrier 62 * operations. 63 * 64 * 65 * Export and Import segments: 66 * --------------------------- 67 * 68 * In order to create an RSM export segment a process allocates a range in its 69 * virtual address space for the segment using standard Solaris interfaces. 70 * The process then calls RSMAPI, which in turn makes an ioctl call to the 71 * RSM kernel agent for an allocation of physical memory pages and for 72 * creation of the export segment by binding these pages to the virtual 73 * address range. These pages are locked in memory so that remote accesses 74 * are always applied to the correct page. Then the RSM segment is published, 75 * again via RSMAPI making an ioctl to the RSM kernel agent, and a segment id 76 * is assigned to it. 77 * 78 * In order to import a published RSM segment, RSMAPI creates an import 79 * segment and forms a virtual connection across the interconnect to the 80 * export segment, via an ioctl into the kernel agent with the connect 81 * command. The import segment setup is completed by mapping the 82 * local device memory into the importers virtual address space. The 83 * mapping of the import segment is handled by the segmap/devmap 84 * infrastructure described as follows. 85 * 86 * Segmap and Devmap interfaces: 87 * 88 * The RSM kernel agent allows device memory to be directly accessed by user 89 * threads via memory mapping. In order to do so, the RSM kernel agent 90 * supports the devmap and segmap entry points. 91 * 92 * The segmap entry point(rsm_segmap) is responsible for setting up a memory 93 * mapping as requested by mmap. The devmap entry point(rsm_devmap) is 94 * responsible for exporting the device memory to the user applications. 95 * rsm_segmap calls RSMPI rsm_map to allocate device memory. Then the 96 * control is transfered to the devmap_setup call which calls rsm_devmap. 97 * 98 * rsm_devmap validates the user mapping to the device or kernel memory 99 * and passes the information to the system for setting up the mapping. The 100 * actual setting up of the mapping is done by devmap_devmem_setup(for 101 * device memory) or devmap_umem_setup(for kernel memory). Callbacks are 102 * registered for device context management via the devmap_devmem_setup 103 * or devmap_umem_setup calls. The callbacks are rsmmap_map, rsmmap_unmap, 104 * rsmmap_access, rsmmap_dup. The callbacks are called when a new mapping 105 * is created, a mapping is freed, a mapping is accessed or an existing 106 * mapping is duplicated respectively. These callbacks allow the RSM kernel 107 * agent to maintain state information associated with the mappings. 108 * The state information is mainly in the form of a cookie list for the import 109 * segment for which mapping has been done. 110 * 111 * Forced disconnect of import segments: 112 * 113 * When an exported segment is unpublished, the exporter sends a forced 114 * disconnect message to all its importers. The importer segments are 115 * unloaded and disconnected. This involves unloading the original 116 * mappings and remapping to a preallocated kernel trash page. This is 117 * done by devmap_umem_remap. The trash/dummy page is a kernel page, 118 * preallocated by the kernel agent during attach using ddi_umem_alloc with 119 * the DDI_UMEM_TRASH flag set. This avoids a core dump in the application 120 * due to unloading of the original mappings. 121 * 122 * Additionally every segment has a mapping generation number associated 123 * with it. This is an entry in the barrier generation page, created 124 * during attach time. This mapping generation number for the import 125 * segments is incremented on a force disconnect to notify the application 126 * of the force disconnect. On this notification, the application needs 127 * to reconnect the segment to establish a new legitimate mapping. 128 * 129 * 130 * Locks used in the kernel agent: 131 * ------------------------------- 132 * 133 * The kernel agent uses a variety of mutexes and condition variables for 134 * mutual exclusion of the shared data structures and for synchronization 135 * between the various threads. Some of the locks are described as follows. 136 * 137 * Each resource structure, which represents either an export/import segment 138 * has a lock associated with it. The lock is the resource mutex, rsmrc_lock. 139 * This is used directly by RSMRC_LOCK and RSMRC_UNLOCK macros and in the 140 * rsmseglock_acquire and rsmseglock_release macros. An additional 141 * lock called the rsmsi_lock is used for the shared import data structure 142 * that is relevant for resources representing import segments. There is 143 * also a condition variable associated with the resource called s_cv. This 144 * is used to wait for events like the segment state change etc. 145 * 146 * The resource structures are allocated from a pool of resource structures, 147 * called rsm_resource. This pool is protected via a reader-writer lock, 148 * called rsmrc_lock. 149 * 150 * There are two separate hash tables, one for the export segments and 151 * one for the import segments. The export segments are inserted into the 152 * export segment hash table only after they have been published and the 153 * import segments are inserted in the import segments list only after they 154 * have successfully connected to an exported segment. These tables are 155 * protected via reader-writer locks. 156 * 157 * Debug Support in the kernel agent: 158 * ---------------------------------- 159 * 160 * Debugging support in the kernel agent is provided by the following 161 * macros. 162 * 163 * DBG_PRINTF((category, level, message)) is a macro which logs a debug 164 * message to the kernel agents debug buffer, rsmka_dbg. This debug buffer 165 * can be viewed in kmdb as *rsmka_dbg/s. The message is logged based 166 * on the definition of the category and level. All messages that belong to 167 * the specified category(rsmdbg_category) and are of an equal or greater 168 * severity than the specified level(rsmdbg_level) are logged. The message 169 * is a string which uses the same formatting rules as the strings used in 170 * printf. 171 * 172 * The category defines which component of the kernel agent has logged this 173 * message. There are a number of categories that have been defined such as 174 * RSM_KERNEL_AGENT, RSM_OPS, RSM_IMPORT, RSM_EXPORT etc. A macro, 175 * DBG_ADDCATEGORY is used to add in another category to the currently 176 * specified category value so that the component using this new category 177 * can also effectively log debug messages. Thus, the category of a specific 178 * message is some combination of the available categories and we can define 179 * sub-categories if we want a finer level of granularity. 180 * 181 * The level defines the severity of the message. Different level values are 182 * defined, with RSM_ERR being the most severe and RSM_DEBUG_VERBOSE being 183 * the least severe(debug level is 0). 184 * 185 * DBG_DEFINE and DBG_DEFINE_STR are macros provided to declare a debug 186 * variable or a string respectively. 187 * 188 * 189 * NOTES: 190 * 191 * Special Fork and Exec Handling: 192 * ------------------------------- 193 * 194 * The backing physical pages of an exported segment are always locked down. 195 * Thus, there are two cases in which a process having exported segments 196 * will cause a cpu to hang: (1) the process invokes exec; (2) a process 197 * forks and invokes exit before the duped file descriptors for the export 198 * segments are closed in the child process. The hang is caused because the 199 * address space release algorithm in Solaris VM subsystem is based on a 200 * non-blocking loop which does not terminate while segments are locked 201 * down. In addition to this, Solaris VM subsystem lacks a callback 202 * mechanism to the rsm kernel agent to allow unlocking these export 203 * segment pages. 204 * 205 * In order to circumvent this problem, the kernel agent does the following. 206 * The Solaris VM subsystem keeps memory segments in increasing order of 207 * virtual addressses. Thus a special page(special_exit_offset) is allocated 208 * by the kernel agent and is mmapped into the heap area of the process address 209 * space(the mmap is done by the RSMAPI library). During the mmap processing 210 * of this special page by the devmap infrastructure, a callback(the same 211 * devmap context management callbacks discussed above) is registered for an 212 * unmap. 213 * 214 * As discussed above, this page is processed by the Solaris address space 215 * release code before any of the exported segments pages(which are allocated 216 * from high memory). It is during this processing that the unmap callback gets 217 * called and this callback is responsible for force destroying the exported 218 * segments and thus eliminating the problem of locked pages. 219 * 220 * Flow-control: 221 * ------------ 222 * 223 * A credit based flow control algorithm is used for messages whose 224 * processing cannot be done in the interrupt context because it might 225 * involve invoking rsmpi calls, or might take a long time to complete 226 * or might need to allocate resources. The algorithm operates on a per 227 * path basis. To send a message the pathend needs to have a credit and 228 * it consumes one for every message that is flow controlled. On the 229 * receiving pathend the message is put on a msgbuf_queue and a task is 230 * dispatched on the worker thread - recv_taskq where it is processed. 231 * After processing the message, the receiving pathend dequeues the message, 232 * and if it has processed > RSMIPC_LOTSFREE_MSGBUFS messages sends 233 * credits to the sender pathend. 234 * 235 * RSM_DRTEST: 236 * ----------- 237 * 238 * This is used to enable the DR testing using a test driver on test 239 * platforms which do not supported DR. 240 * 241 */ 242 243 #include <sys/types.h> 244 #include <sys/param.h> 245 #include <sys/user.h> 246 #include <sys/buf.h> 247 #include <sys/systm.h> 248 #include <sys/cred.h> 249 #include <sys/vm.h> 250 #include <sys/uio.h> 251 #include <vm/seg.h> 252 #include <vm/page.h> 253 #include <sys/stat.h> 254 255 #include <sys/time.h> 256 #include <sys/errno.h> 257 258 #include <sys/file.h> 259 #include <sys/uio.h> 260 #include <sys/proc.h> 261 #include <sys/mman.h> 262 #include <sys/open.h> 263 #include <sys/atomic.h> 264 #include <sys/mem_config.h> 265 266 267 #include <sys/ddi.h> 268 #include <sys/devops.h> 269 #include <sys/ddidevmap.h> 270 #include <sys/sunddi.h> 271 #include <sys/esunddi.h> 272 #include <sys/ddi_impldefs.h> 273 274 #include <sys/kmem.h> 275 #include <sys/conf.h> 276 #include <sys/devops.h> 277 #include <sys/ddi_impldefs.h> 278 279 #include <sys/modctl.h> 280 281 #include <sys/policy.h> 282 #include <sys/types.h> 283 #include <sys/conf.h> 284 #include <sys/param.h> 285 286 #include <sys/taskq.h> 287 288 #include <sys/rsm/rsm_common.h> 289 #include <sys/rsm/rsmapi_common.h> 290 #include <sys/rsm/rsm.h> 291 #include <rsm_in.h> 292 #include <sys/rsm/rsmka_path_int.h> 293 #include <sys/rsm/rsmpi.h> 294 295 #include <sys/modctl.h> 296 #include <sys/debug.h> 297 298 #include <sys/tuneable.h> 299 300 #ifdef RSM_DRTEST 301 extern int rsm_kphysm_setup_func_register(kphysm_setup_vector_t *vec, 302 void *arg); 303 extern void rsm_kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, 304 void *arg); 305 #endif 306 307 extern void dbg_printf(int category, int level, char *fmt, ...); 308 extern void rsmka_pathmanager_init(); 309 extern void rsmka_pathmanager_cleanup(); 310 extern void rele_sendq_token(); 311 extern rsm_addr_t get_remote_hwaddr(adapter_t *, rsm_node_id_t); 312 extern rsm_node_id_t get_remote_nodeid(adapter_t *, rsm_addr_t); 313 extern int rsmka_topology_ioctl(caddr_t, int, int); 314 315 extern pri_t maxclsyspri; 316 extern work_queue_t work_queue; 317 extern kmutex_t ipc_info_lock; 318 extern kmutex_t ipc_info_cvlock; 319 extern kcondvar_t ipc_info_cv; 320 extern kmutex_t path_hold_cvlock; 321 extern kcondvar_t path_hold_cv; 322 323 extern kmutex_t rsmka_buf_lock; 324 325 extern path_t *rsm_find_path(char *, int, rsm_addr_t); 326 extern adapter_t *rsmka_lookup_adapter(char *, int); 327 extern sendq_token_t *rsmka_get_sendq_token(rsm_node_id_t, sendq_token_t *); 328 extern boolean_t rsmka_do_path_active(path_t *, int); 329 extern boolean_t rsmka_check_node_alive(rsm_node_id_t); 330 extern void rsmka_release_adapter(adapter_t *); 331 extern void rsmka_enqueue_msgbuf(path_t *path, void *data); 332 extern void rsmka_dequeue_msgbuf(path_t *path); 333 extern msgbuf_elem_t *rsmka_gethead_msgbuf(path_t *path); 334 /* lint -w2 */ 335 336 static int rsm_open(dev_t *, int, int, cred_t *); 337 static int rsm_close(dev_t, int, int, cred_t *); 338 static int rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 339 cred_t *credp, int *rvalp); 340 static int rsm_devmap(dev_t, devmap_cookie_t, offset_t, size_t, size_t *, 341 uint_t); 342 static int rsm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t, uint_t, 343 uint_t, uint_t, cred_t *); 344 static int rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 345 struct pollhead **phpp); 346 347 static int rsm_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 348 static int rsm_attach(dev_info_t *, ddi_attach_cmd_t); 349 static int rsm_detach(dev_info_t *, ddi_detach_cmd_t); 350 351 static int rsmipc_send(rsm_node_id_t, rsmipc_request_t *, rsmipc_reply_t *); 352 static void rsm_force_unload(rsm_node_id_t, rsm_memseg_id_t, boolean_t); 353 static void rsm_send_importer_disconnects(rsm_memseg_id_t, rsm_node_id_t); 354 static void rsm_send_republish(rsm_memseg_id_t, rsmapi_access_entry_t *, int, 355 rsm_permission_t); 356 static void rsm_export_force_destroy(ddi_umem_cookie_t *); 357 static void rsmacl_free(rsmapi_access_entry_t *, int); 358 static void rsmpiacl_free(rsm_access_entry_t *, int); 359 360 static int rsm_inc_pgcnt(pgcnt_t); 361 static void rsm_dec_pgcnt(pgcnt_t); 362 static void rsm_free_mapinfo(rsm_mapinfo_t *mapinfop); 363 static rsm_mapinfo_t *rsm_get_mapinfo(rsmseg_t *, off_t, size_t, off_t *, 364 size_t *); 365 static void exporter_quiesce(); 366 static void rsmseg_suspend(rsmseg_t *, int *); 367 static void rsmsegshare_suspend(rsmseg_t *); 368 static int rsmseg_resume(rsmseg_t *, void **); 369 static int rsmsegshare_resume(rsmseg_t *); 370 371 static struct cb_ops rsm_cb_ops = { 372 rsm_open, /* open */ 373 rsm_close, /* close */ 374 nodev, /* strategy */ 375 nodev, /* print */ 376 nodev, /* dump */ 377 nodev, /* read */ 378 nodev, /* write */ 379 rsm_ioctl, /* ioctl */ 380 rsm_devmap, /* devmap */ 381 NULL, /* mmap */ 382 rsm_segmap, /* segmap */ 383 rsm_chpoll, /* poll */ 384 ddi_prop_op, /* cb_prop_op */ 385 0, /* streamtab */ 386 D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */ 387 0, 388 0, 389 0 390 }; 391 392 static struct dev_ops rsm_ops = { 393 DEVO_REV, /* devo_rev, */ 394 0, /* refcnt */ 395 rsm_info, /* get_dev_info */ 396 nulldev, /* identify */ 397 nulldev, /* probe */ 398 rsm_attach, /* attach */ 399 rsm_detach, /* detach */ 400 nodev, /* reset */ 401 &rsm_cb_ops, /* driver operations */ 402 (struct bus_ops *)0, /* bus operations */ 403 0 404 }; 405 406 /* 407 * Module linkage information for the kernel. 408 */ 409 410 static struct modldrv modldrv = { 411 &mod_driverops, /* Type of module. This one is a pseudo driver */ 412 "Remote Shared Memory Driver %I%", 413 &rsm_ops, /* driver ops */ 414 }; 415 416 static struct modlinkage modlinkage = { 417 MODREV_1, 418 (void *)&modldrv, 419 0, 420 0, 421 0 422 }; 423 424 static void rsm_dr_callback_post_add(void *arg, pgcnt_t delta); 425 static int rsm_dr_callback_pre_del(void *arg, pgcnt_t delta); 426 static void rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled); 427 428 static kphysm_setup_vector_t rsm_dr_callback_vec = { 429 KPHYSM_SETUP_VECTOR_VERSION, 430 rsm_dr_callback_post_add, 431 rsm_dr_callback_pre_del, 432 rsm_dr_callback_post_del 433 }; 434 435 /* This flag can be changed to 0 to help with PIT testing */ 436 int rsmka_modunloadok = 1; 437 int no_reply_cnt = 0; 438 439 uint64_t rsm_ctrlmsg_errcnt = 0; 440 uint64_t rsm_ipcsend_errcnt = 0; 441 442 #define MAX_NODES 64 443 444 static struct rsm_driver_data rsm_drv_data; 445 static struct rsmresource_table rsm_resource; 446 447 static void rsmresource_insert(minor_t, rsmresource_t *, rsm_resource_type_t); 448 static void rsmresource_destroy(void); 449 static int rsmresource_alloc(minor_t *); 450 static rsmresource_t *rsmresource_free(minor_t rnum); 451 static int rsm_closeconnection(rsmseg_t *seg, void **cookie); 452 static int rsm_unpublish(rsmseg_t *seg, int mode); 453 static int rsm_unbind(rsmseg_t *seg); 454 static uint_t rsmhash(rsm_memseg_id_t key); 455 static void rsmhash_alloc(rsmhash_table_t *rhash, int size); 456 static void rsmhash_free(rsmhash_table_t *rhash, int size); 457 static void *rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval); 458 static void **rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval); 459 static int rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, 460 void *cookie); 461 int rsm_disconnect(rsmseg_t *seg); 462 void rsmseg_unload(rsmseg_t *); 463 void rsm_suspend_complete(rsm_node_id_t src_node, int flag); 464 465 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 466 rsm_intr_q_op_t opcode, rsm_addr_t src, 467 void *data, size_t size, rsm_intr_hand_arg_t arg); 468 469 static void rsm_intr_callback(void *, rsm_addr_t, rsm_intr_hand_arg_t); 470 471 rsm_node_id_t my_nodeid; 472 473 /* cookie, va, offsets and length for the barrier */ 474 static rsm_gnum_t *bar_va; 475 static ddi_umem_cookie_t bar_cookie; 476 static off_t barrier_offset; 477 static size_t barrier_size; 478 static int max_segs; 479 480 /* cookie for the trash memory */ 481 static ddi_umem_cookie_t remap_cookie; 482 483 static rsm_memseg_id_t rsm_nextavail_segmentid; 484 485 extern taskq_t *work_taskq; 486 extern char *taskq_name; 487 488 static dev_info_t *rsm_dip; /* private copy of devinfo pointer */ 489 490 static rsmhash_table_t rsm_export_segs; /* list of exported segs */ 491 rsmhash_table_t rsm_import_segs; /* list of imported segs */ 492 static rsmhash_table_t rsm_event_queues; /* list of event queues */ 493 494 static rsm_ipc_t rsm_ipc; /* ipc info */ 495 496 /* list of nodes to which RSMIPC_MSG_SUSPEND has been sent */ 497 static list_head_t rsm_suspend_list; 498 499 /* list of descriptors for remote importers */ 500 static importers_table_t importer_list; 501 502 kmutex_t rsm_suspend_cvlock; 503 kcondvar_t rsm_suspend_cv; 504 505 static kmutex_t rsm_lock; 506 507 adapter_t loopback_adapter; 508 rsm_controller_attr_t loopback_attr; 509 510 int rsmipc_send_controlmsg(path_t *path, int msgtype); 511 512 void rsmka_init_loopback(); 513 514 int rsmka_null_seg_create( 515 rsm_controller_handle_t, 516 rsm_memseg_export_handle_t *, 517 size_t, 518 uint_t, 519 rsm_memory_local_t *, 520 rsm_resource_callback_t, 521 rsm_resource_callback_arg_t); 522 523 int rsmka_null_seg_destroy( 524 rsm_memseg_export_handle_t); 525 526 int rsmka_null_bind( 527 rsm_memseg_export_handle_t, 528 off_t, 529 rsm_memory_local_t *, 530 rsm_resource_callback_t, 531 rsm_resource_callback_arg_t); 532 533 int rsmka_null_unbind( 534 rsm_memseg_export_handle_t, 535 off_t, 536 size_t); 537 538 int rsmka_null_rebind( 539 rsm_memseg_export_handle_t, 540 off_t, 541 rsm_memory_local_t *, 542 rsm_resource_callback_t, 543 rsm_resource_callback_arg_t); 544 545 int rsmka_null_publish( 546 rsm_memseg_export_handle_t, 547 rsm_access_entry_t [], 548 uint_t, 549 rsm_memseg_id_t, 550 rsm_resource_callback_t, 551 rsm_resource_callback_arg_t); 552 553 554 int rsmka_null_republish( 555 rsm_memseg_export_handle_t, 556 rsm_access_entry_t [], 557 uint_t, 558 rsm_resource_callback_t, 559 rsm_resource_callback_arg_t); 560 561 int rsmka_null_unpublish( 562 rsm_memseg_export_handle_t); 563 564 rsm_ops_t null_rsmpi_ops; 565 566 /* 567 * data and locks to keep track of total amount of exported memory 568 */ 569 static pgcnt_t rsm_pgcnt; 570 static pgcnt_t rsm_pgcnt_max; /* max allowed */ 571 static kmutex_t rsm_pgcnt_lock; 572 573 static int rsm_enable_dr; 574 575 static char loopback_str[] = "loopback"; 576 577 int rsm_hash_size; 578 579 /* 580 * The locking model is as follows: 581 * 582 * Local operations: 583 * find resource - grab reader lock on resouce list 584 * insert rc - grab writer lock 585 * delete rc - grab writer lock and resource mutex 586 * read/write - no lock 587 * 588 * Remote invocations: 589 * find resource - grab read lock and resource mutex 590 * 591 * State: 592 * resource state - grab resource mutex 593 */ 594 595 int 596 _init(void) 597 { 598 int e; 599 600 e = mod_install(&modlinkage); 601 if (e != 0) { 602 return (e); 603 } 604 605 mutex_init(&rsm_lock, NULL, MUTEX_DRIVER, NULL); 606 607 mutex_init(&rsmka_buf_lock, NULL, MUTEX_DEFAULT, NULL); 608 609 610 rw_init(&rsm_resource.rsmrc_lock, NULL, RW_DRIVER, NULL); 611 612 rsm_hash_size = RSM_HASHSZ; 613 614 rw_init(&rsm_export_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 615 616 rw_init(&rsm_import_segs.rsmhash_rw, NULL, RW_DRIVER, NULL); 617 618 mutex_init(&importer_list.lock, NULL, MUTEX_DRIVER, NULL); 619 620 mutex_init(&rsm_ipc.lock, NULL, MUTEX_DRIVER, NULL); 621 cv_init(&rsm_ipc.cv, NULL, CV_DRIVER, 0); 622 623 mutex_init(&rsm_suspend_cvlock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&rsm_suspend_cv, NULL, CV_DRIVER, 0); 625 626 mutex_init(&rsm_drv_data.drv_lock, NULL, MUTEX_DRIVER, NULL); 627 cv_init(&rsm_drv_data.drv_cv, NULL, CV_DRIVER, 0); 628 629 rsm_ipc.count = RSMIPC_SZ; 630 rsm_ipc.wanted = 0; 631 rsm_ipc.sequence = 0; 632 633 (void) mutex_init(&rsm_pgcnt_lock, NULL, MUTEX_DRIVER, NULL); 634 635 for (e = 0; e < RSMIPC_SZ; e++) { 636 rsmipc_slot_t *slot = &rsm_ipc.slots[e]; 637 638 RSMIPC_SET(slot, RSMIPC_FREE); 639 mutex_init(&slot->rsmipc_lock, NULL, MUTEX_DRIVER, NULL); 640 cv_init(&slot->rsmipc_cv, NULL, CV_DRIVER, 0); 641 } 642 643 /* 644 * Initialize the suspend message list 645 */ 646 rsm_suspend_list.list_head = NULL; 647 mutex_init(&rsm_suspend_list.list_lock, NULL, MUTEX_DRIVER, NULL); 648 649 /* 650 * It is assumed here that configuration data is available 651 * during system boot since _init may be called at that time. 652 */ 653 654 rsmka_pathmanager_init(); 655 656 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 657 "rsm: _init done\n")); 658 659 return (DDI_SUCCESS); 660 661 } 662 663 int 664 _info(struct modinfo *modinfop) 665 { 666 667 return (mod_info(&modlinkage, modinfop)); 668 } 669 670 int 671 _fini(void) 672 { 673 int e; 674 675 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, 676 "rsm: _fini enter\n")); 677 678 /* 679 * The rsmka_modunloadok flag is simply used to help with 680 * the PIT testing. Make this flag 0 to disallow modunload. 681 */ 682 if (rsmka_modunloadok == 0) 683 return (EBUSY); 684 685 /* rsm_detach will be called as a result of mod_remove */ 686 e = mod_remove(&modlinkage); 687 if (e) { 688 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_ERR, 689 "Unable to fini RSM %x\n", e)); 690 return (e); 691 } 692 693 rsmka_pathmanager_cleanup(); 694 695 rw_destroy(&rsm_resource.rsmrc_lock); 696 697 rw_destroy(&rsm_export_segs.rsmhash_rw); 698 rw_destroy(&rsm_import_segs.rsmhash_rw); 699 rw_destroy(&rsm_event_queues.rsmhash_rw); 700 701 mutex_destroy(&importer_list.lock); 702 703 mutex_destroy(&rsm_ipc.lock); 704 cv_destroy(&rsm_ipc.cv); 705 706 (void) mutex_destroy(&rsm_suspend_list.list_lock); 707 708 (void) mutex_destroy(&rsm_pgcnt_lock); 709 710 DBG_PRINTF((RSM_KERNEL_AGENT, RSM_DEBUG_VERBOSE, "_fini done\n")); 711 712 return (DDI_SUCCESS); 713 714 } 715 716 /*ARGSUSED1*/ 717 static int 718 rsm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 719 { 720 minor_t rnum; 721 int percent; 722 int ret; 723 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 724 725 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach enter\n")); 726 727 switch (cmd) { 728 case DDI_ATTACH: 729 break; 730 case DDI_RESUME: 731 default: 732 DBG_PRINTF((category, RSM_ERR, 733 "rsm:rsm_attach - cmd not supported\n")); 734 return (DDI_FAILURE); 735 } 736 737 if (rsm_dip != NULL) { 738 DBG_PRINTF((category, RSM_ERR, 739 "rsm:rsm_attach - supports only " 740 "one instance\n")); 741 return (DDI_FAILURE); 742 } 743 744 rsm_enable_dr = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 745 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 746 "enable-dynamic-reconfiguration", 1); 747 748 mutex_enter(&rsm_drv_data.drv_lock); 749 rsm_drv_data.drv_state = RSM_DRV_REG_PROCESSING; 750 mutex_exit(&rsm_drv_data.drv_lock); 751 752 if (rsm_enable_dr) { 753 #ifdef RSM_DRTEST 754 ret = rsm_kphysm_setup_func_register(&rsm_dr_callback_vec, 755 (void *)NULL); 756 #else 757 ret = kphysm_setup_func_register(&rsm_dr_callback_vec, 758 (void *)NULL); 759 #endif 760 if (ret != 0) { 761 mutex_exit(&rsm_drv_data.drv_lock); 762 cmn_err(CE_CONT, "rsm:rsm_attach - Dynamic " 763 "reconfiguration setup failed\n"); 764 return (DDI_FAILURE); 765 } 766 } 767 768 mutex_enter(&rsm_drv_data.drv_lock); 769 ASSERT(rsm_drv_data.drv_state == RSM_DRV_REG_PROCESSING); 770 rsm_drv_data.drv_state = RSM_DRV_OK; 771 cv_broadcast(&rsm_drv_data.drv_cv); 772 mutex_exit(&rsm_drv_data.drv_lock); 773 774 /* 775 * page_list_read_lock(); 776 * xx_setup(); 777 * page_list_read_unlock(); 778 */ 779 780 rsm_hash_size = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 781 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 782 "segment-hashtable-size", RSM_HASHSZ); 783 if (rsm_hash_size == 0) { 784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 785 "rsm: segment-hashtable-size in rsm.conf " 786 "must be greater than 0, defaulting to 128\n")); 787 rsm_hash_size = RSM_HASHSZ; 788 } 789 790 DBG_PRINTF((category, RSM_DEBUG, "rsm_attach rsm_hash_size: %d\n", 791 rsm_hash_size)); 792 793 rsm_pgcnt = 0; 794 795 percent = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 796 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 797 "max-exported-memory", 0); 798 if (percent < 0) { 799 DBG_PRINTF((category, RSM_ERR, 800 "rsm:rsm_attach not enough memory available to " 801 "export, or max-exported-memory set incorrectly.\n")); 802 return (DDI_FAILURE); 803 } 804 /* 0 indicates no fixed upper limit. maxmem is the max */ 805 /* available pageable physical mem */ 806 rsm_pgcnt_max = (percent*maxmem)/100; 807 808 if (rsm_pgcnt_max > 0) { 809 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 810 "rsm: Available physical memory = %lu pages, " 811 "Max exportable memory = %lu pages", 812 maxmem, rsm_pgcnt_max)); 813 } 814 815 /* 816 * Create minor number 817 */ 818 if (rsmresource_alloc(&rnum) != RSM_SUCCESS) { 819 DBG_PRINTF((category, RSM_ERR, 820 "rsm: rsm_attach - Unable to get " 821 "minor number\n")); 822 return (DDI_FAILURE); 823 } 824 825 ASSERT(rnum == RSM_DRIVER_MINOR); 826 827 if (ddi_create_minor_node(devi, DRIVER_NAME, S_IFCHR, 828 rnum, DDI_PSEUDO, NULL) == DDI_FAILURE) { 829 DBG_PRINTF((category, RSM_ERR, 830 "rsm: rsm_attach - unable to allocate " 831 "minor #\n")); 832 return (DDI_FAILURE); 833 } 834 835 rsm_dip = devi; 836 /* 837 * Allocate the hashtables 838 */ 839 rsmhash_alloc(&rsm_export_segs, rsm_hash_size); 840 rsmhash_alloc(&rsm_import_segs, rsm_hash_size); 841 842 importer_list.bucket = (importing_token_t **) 843 kmem_zalloc(rsm_hash_size * sizeof (importing_token_t *), 844 KM_SLEEP); 845 846 /* 847 * Allocate a resource struct 848 */ 849 { 850 rsmresource_t *p; 851 852 p = (rsmresource_t *)kmem_zalloc(sizeof (*p), KM_SLEEP); 853 854 mutex_init(&p->rsmrc_lock, NULL, MUTEX_DRIVER, (void *) NULL); 855 856 rsmresource_insert(rnum, p, RSM_RESOURCE_BAR); 857 } 858 859 /* 860 * Based on the rsm.conf property max-segments, determine the maximum 861 * number of segments that can be exported/imported. This is then used 862 * to determine the size for barrier failure pages. 863 */ 864 865 /* First get the max number of segments from the rsm.conf file */ 866 max_segs = ddi_prop_get_int(DDI_DEV_T_ANY, devi, 867 DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, 868 "max-segments", 0); 869 if (max_segs == 0) { 870 /* Use default number of segments */ 871 max_segs = RSM_MAX_NUM_SEG; 872 } 873 874 /* 875 * Based on the max number of segments allowed, determine the barrier 876 * page size. add 1 to max_segs since the barrier page itself uses 877 * a slot 878 */ 879 barrier_size = roundup((max_segs + 1) * sizeof (rsm_gnum_t), 880 PAGESIZE); 881 882 /* 883 * allocation of the barrier failure page 884 */ 885 bar_va = (rsm_gnum_t *)ddi_umem_alloc(barrier_size, 886 DDI_UMEM_SLEEP, &bar_cookie); 887 888 /* 889 * Set the barrier_offset 890 */ 891 barrier_offset = 0; 892 893 /* 894 * Allocate a trash memory and get a cookie for it. This will be used 895 * when remapping segments during force disconnects. Allocate the 896 * trash memory with a large size which is page aligned. 897 */ 898 (void) ddi_umem_alloc((size_t)TRASHSIZE, 899 DDI_UMEM_TRASH, &remap_cookie); 900 901 /* initialize user segment id allocation variable */ 902 rsm_nextavail_segmentid = (rsm_memseg_id_t)RSM_USER_APP_ID_BASE; 903 904 /* 905 * initialize the null_rsmpi_ops vector and the loopback adapter 906 */ 907 rsmka_init_loopback(); 908 909 910 ddi_report_dev(devi); 911 912 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_attach done\n")); 913 914 return (DDI_SUCCESS); 915 } 916 917 /* 918 * The call to mod_remove in the _fine routine will cause the system 919 * to call rsm_detach 920 */ 921 /*ARGSUSED*/ 922 static int 923 rsm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 924 { 925 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 926 927 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach enter\n")); 928 929 switch (cmd) { 930 case DDI_DETACH: 931 break; 932 default: 933 DBG_PRINTF((category, RSM_ERR, 934 "rsm:rsm_detach - cmd %x not supported\n", 935 cmd)); 936 return (DDI_FAILURE); 937 } 938 939 mutex_enter(&rsm_drv_data.drv_lock); 940 while (rsm_drv_data.drv_state != RSM_DRV_OK) 941 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 942 rsm_drv_data.drv_state = RSM_DRV_UNREG_PROCESSING; 943 mutex_exit(&rsm_drv_data.drv_lock); 944 945 /* 946 * Unregister the DR callback functions 947 */ 948 if (rsm_enable_dr) { 949 #ifdef RSM_DRTEST 950 rsm_kphysm_setup_func_unregister(&rsm_dr_callback_vec, 951 (void *)NULL); 952 #else 953 kphysm_setup_func_unregister(&rsm_dr_callback_vec, 954 (void *)NULL); 955 #endif 956 } 957 958 mutex_enter(&rsm_drv_data.drv_lock); 959 ASSERT(rsm_drv_data.drv_state == RSM_DRV_UNREG_PROCESSING); 960 rsm_drv_data.drv_state = RSM_DRV_NEW; 961 mutex_exit(&rsm_drv_data.drv_lock); 962 963 ASSERT(rsm_suspend_list.list_head == NULL); 964 965 /* 966 * Release all resources, seglist, controller, ... 967 */ 968 969 /* remove intersend queues */ 970 /* remove registered services */ 971 972 973 ddi_remove_minor_node(dip, DRIVER_NAME); 974 rsm_dip = NULL; 975 976 /* 977 * Free minor zero resource 978 */ 979 { 980 rsmresource_t *p; 981 982 p = rsmresource_free(RSM_DRIVER_MINOR); 983 if (p) { 984 mutex_destroy(&p->rsmrc_lock); 985 kmem_free((void *)p, sizeof (*p)); 986 } 987 } 988 989 /* 990 * Free resource table 991 */ 992 993 rsmresource_destroy(); 994 995 /* 996 * Free the hash tables 997 */ 998 rsmhash_free(&rsm_export_segs, rsm_hash_size); 999 rsmhash_free(&rsm_import_segs, rsm_hash_size); 1000 1001 kmem_free((void *)importer_list.bucket, 1002 rsm_hash_size * sizeof (importing_token_t *)); 1003 importer_list.bucket = NULL; 1004 1005 1006 /* free barrier page */ 1007 if (bar_cookie != NULL) { 1008 ddi_umem_free(bar_cookie); 1009 } 1010 bar_va = NULL; 1011 bar_cookie = NULL; 1012 1013 /* 1014 * Free the memory allocated for the trash 1015 */ 1016 if (remap_cookie != NULL) { 1017 ddi_umem_free(remap_cookie); 1018 } 1019 remap_cookie = NULL; 1020 1021 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_detach done\n")); 1022 1023 return (DDI_SUCCESS); 1024 } 1025 1026 /*ARGSUSED*/ 1027 static int 1028 rsm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1029 { 1030 register int error; 1031 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_DDI); 1032 1033 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info enter\n")); 1034 1035 switch (infocmd) { 1036 case DDI_INFO_DEVT2DEVINFO: 1037 if (rsm_dip == NULL) 1038 error = DDI_FAILURE; 1039 else { 1040 *result = (void *)rsm_dip; 1041 error = DDI_SUCCESS; 1042 } 1043 break; 1044 case DDI_INFO_DEVT2INSTANCE: 1045 *result = (void *)0; 1046 error = DDI_SUCCESS; 1047 break; 1048 default: 1049 error = DDI_FAILURE; 1050 } 1051 1052 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_info done\n")); 1053 return (error); 1054 } 1055 1056 adapter_t * 1057 rsm_getadapter(rsm_ioctlmsg_t *msg, int mode) 1058 { 1059 adapter_t *adapter; 1060 char adapter_devname[MAXNAMELEN]; 1061 int instance; 1062 DBG_DEFINE(category, 1063 RSM_KERNEL_AGENT | RSM_IMPORT | RSM_EXPORT | RSM_IOCTL); 1064 1065 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter enter\n")); 1066 1067 instance = msg->cnum; 1068 1069 if ((msg->cname_len <= 0) || (msg->cname_len > MAXNAMELEN)) { 1070 return (NULL); 1071 } 1072 1073 if (ddi_copyin(msg->cname, adapter_devname, msg->cname_len, mode)) 1074 return (NULL); 1075 1076 if (strcmp(adapter_devname, "loopback") == 0) 1077 return (&loopback_adapter); 1078 1079 adapter = rsmka_lookup_adapter(adapter_devname, instance); 1080 1081 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_getadapter done\n")); 1082 1083 return (adapter); 1084 } 1085 1086 1087 /* 1088 * *********************** Resource Number Management ******************** 1089 * All resources are stored in a simple hash table. The table is an array 1090 * of pointers to resource blks. Each blk contains: 1091 * base - base number of this blk 1092 * used - number of used slots in this blk. 1093 * blks - array of pointers to resource items. 1094 * An entry in a resource blk is empty if it's NULL. 1095 * 1096 * We start with no resource array. Each time we run out of slots, we 1097 * reallocate a new larger array and copy the pointer to the new array and 1098 * a new resource blk is allocated and added to the hash table. 1099 * 1100 * The resource control block contains: 1101 * root - array of pointer of resource blks 1102 * sz - current size of array. 1103 * len - last valid entry in array. 1104 * 1105 * A search operation based on a resource number is as follows: 1106 * index = rnum / RESOURCE_BLKSZ; 1107 * ASSERT(index < resource_block.len); 1108 * ASSERT(index < resource_block.sz); 1109 * offset = rnum % RESOURCE_BLKSZ; 1110 * ASSERT(offset >= resource_block.root[index]->base); 1111 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 1112 * return resource_block.root[index]->blks[offset]; 1113 * 1114 * A resource blk is freed with its used count reachs zero. 1115 */ 1116 static int 1117 rsmresource_alloc(minor_t *rnum) 1118 { 1119 1120 /* search for available resource slot */ 1121 int i, j, empty = -1; 1122 rsmresource_blk_t *blk; 1123 1124 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1125 "rsmresource_alloc enter\n")); 1126 1127 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1128 1129 /* Try to find an empty slot */ 1130 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1131 blk = rsm_resource.rsmrc_root[i]; 1132 if (blk != NULL && blk->rsmrcblk_avail > 0) { 1133 /* found an empty slot in this blk */ 1134 for (j = 0; j < RSMRC_BLKSZ; j++) { 1135 if (blk->rsmrcblk_blks[j] == NULL) { 1136 *rnum = (minor_t) 1137 (j + (i * RSMRC_BLKSZ)); 1138 /* 1139 * obey gen page limits 1140 */ 1141 if (*rnum >= max_segs + 1) { 1142 if (empty < 0) { 1143 rw_exit(&rsm_resource. 1144 rsmrc_lock); 1145 DBG_PRINTF(( 1146 RSM_KERNEL_ALL, 1147 RSM_ERR, 1148 "rsmresource" 1149 "_alloc failed:" 1150 "not enough res" 1151 "%d\n", *rnum)); 1152 return ( 1153 RSMERR_INSUFFICIENT_RESOURCES); 1154 } else { 1155 /* use empty slot */ 1156 break; 1157 } 1158 1159 } 1160 1161 blk->rsmrcblk_blks[j] = RSMRC_RESERVED; 1162 blk->rsmrcblk_avail--; 1163 rw_exit(&rsm_resource.rsmrc_lock); 1164 DBG_PRINTF((RSM_KERNEL_ALL, 1165 RSM_DEBUG_VERBOSE, 1166 "rsmresource_alloc done\n")); 1167 return (RSM_SUCCESS); 1168 } 1169 } 1170 } else if (blk == NULL && empty < 0) { 1171 /* remember first empty slot */ 1172 empty = i; 1173 } 1174 } 1175 1176 /* Couldn't find anything, allocate a new blk */ 1177 /* 1178 * Do we need to reallocate the root array 1179 */ 1180 if (empty < 0) { 1181 if (rsm_resource.rsmrc_len == rsm_resource.rsmrc_sz) { 1182 /* 1183 * Allocate new array and copy current stuff into it 1184 */ 1185 rsmresource_blk_t **p; 1186 uint_t newsz = (uint_t)rsm_resource.rsmrc_sz + 1187 RSMRC_BLKSZ; 1188 /* 1189 * Don't allocate more that max valid rnum 1190 */ 1191 if (rsm_resource.rsmrc_len*RSMRC_BLKSZ >= 1192 max_segs + 1) { 1193 rw_exit(&rsm_resource.rsmrc_lock); 1194 return (RSMERR_INSUFFICIENT_RESOURCES); 1195 } 1196 1197 p = (rsmresource_blk_t **)kmem_zalloc( 1198 newsz * sizeof (*p), 1199 KM_SLEEP); 1200 1201 if (rsm_resource.rsmrc_root) { 1202 uint_t oldsz; 1203 1204 oldsz = (uint_t)(rsm_resource.rsmrc_sz * 1205 (int)sizeof (*p)); 1206 1207 /* 1208 * Copy old data into new space and 1209 * free old stuff 1210 */ 1211 bcopy(rsm_resource.rsmrc_root, p, oldsz); 1212 kmem_free(rsm_resource.rsmrc_root, oldsz); 1213 } 1214 1215 rsm_resource.rsmrc_root = p; 1216 rsm_resource.rsmrc_sz = (int)newsz; 1217 } 1218 1219 empty = rsm_resource.rsmrc_len; 1220 rsm_resource.rsmrc_len++; 1221 } 1222 1223 /* 1224 * Allocate a new blk 1225 */ 1226 blk = (rsmresource_blk_t *)kmem_zalloc(sizeof (*blk), KM_SLEEP); 1227 ASSERT(rsm_resource.rsmrc_root[empty] == NULL); 1228 rsm_resource.rsmrc_root[empty] = blk; 1229 blk->rsmrcblk_avail = RSMRC_BLKSZ - 1; 1230 1231 /* 1232 * Allocate slot 1233 */ 1234 1235 *rnum = (minor_t)(empty * RSMRC_BLKSZ); 1236 1237 /* 1238 * watch out not to exceed bounds of barrier page 1239 */ 1240 if (*rnum >= max_segs + 1) { 1241 rw_exit(&rsm_resource.rsmrc_lock); 1242 DBG_PRINTF((RSM_KERNEL_ALL, RSM_ERR, 1243 "rsmresource_alloc failed %d\n", *rnum)); 1244 1245 return (RSMERR_INSUFFICIENT_RESOURCES); 1246 } 1247 blk->rsmrcblk_blks[0] = RSMRC_RESERVED; 1248 1249 1250 rw_exit(&rsm_resource.rsmrc_lock); 1251 1252 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1253 "rsmresource_alloc done\n")); 1254 1255 return (RSM_SUCCESS); 1256 } 1257 1258 static rsmresource_t * 1259 rsmresource_free(minor_t rnum) 1260 { 1261 1262 /* search for available resource slot */ 1263 int i, j; 1264 rsmresource_blk_t *blk; 1265 rsmresource_t *p; 1266 1267 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1268 "rsmresource_free enter\n")); 1269 1270 i = (int)(rnum / RSMRC_BLKSZ); 1271 j = (int)(rnum % RSMRC_BLKSZ); 1272 1273 if (i >= rsm_resource.rsmrc_len) { 1274 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1275 "rsmresource_free done\n")); 1276 return (NULL); 1277 } 1278 1279 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1280 1281 ASSERT(rsm_resource.rsmrc_root); 1282 ASSERT(i < rsm_resource.rsmrc_len); 1283 ASSERT(i < rsm_resource.rsmrc_sz); 1284 blk = rsm_resource.rsmrc_root[i]; 1285 if (blk == NULL) { 1286 rw_exit(&rsm_resource.rsmrc_lock); 1287 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1288 "rsmresource_free done\n")); 1289 return (NULL); 1290 } 1291 1292 ASSERT(blk->rsmrcblk_blks[j]); /* reserved or full */ 1293 1294 p = blk->rsmrcblk_blks[j]; 1295 if (p == RSMRC_RESERVED) { 1296 p = NULL; 1297 } 1298 1299 blk->rsmrcblk_blks[j] = NULL; 1300 blk->rsmrcblk_avail++; 1301 if (blk->rsmrcblk_avail == RSMRC_BLKSZ) { 1302 /* free this blk */ 1303 kmem_free(blk, sizeof (*blk)); 1304 rsm_resource.rsmrc_root[i] = NULL; 1305 } 1306 1307 rw_exit(&rsm_resource.rsmrc_lock); 1308 1309 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1310 "rsmresource_free done\n")); 1311 1312 return (p); 1313 } 1314 1315 static rsmresource_t * 1316 rsmresource_lookup(minor_t rnum, int lock) 1317 { 1318 int i, j; 1319 rsmresource_blk_t *blk; 1320 rsmresource_t *p; 1321 1322 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1323 "rsmresource_lookup enter\n")); 1324 1325 /* Find resource and lock it in READER mode */ 1326 /* search for available resource slot */ 1327 1328 i = (int)(rnum / RSMRC_BLKSZ); 1329 j = (int)(rnum % RSMRC_BLKSZ); 1330 1331 if (i >= rsm_resource.rsmrc_len) { 1332 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1333 "rsmresource_lookup done\n")); 1334 return (NULL); 1335 } 1336 1337 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1338 1339 blk = rsm_resource.rsmrc_root[i]; 1340 if (blk != NULL) { 1341 ASSERT(i < rsm_resource.rsmrc_len); 1342 ASSERT(i < rsm_resource.rsmrc_sz); 1343 1344 p = blk->rsmrcblk_blks[j]; 1345 if (lock == RSM_LOCK) { 1346 if (p != RSMRC_RESERVED) { 1347 mutex_enter(&p->rsmrc_lock); 1348 } else { 1349 p = NULL; 1350 } 1351 } 1352 } else { 1353 p = NULL; 1354 } 1355 rw_exit(&rsm_resource.rsmrc_lock); 1356 1357 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1358 "rsmresource_lookup done\n")); 1359 1360 return (p); 1361 } 1362 1363 static void 1364 rsmresource_insert(minor_t rnum, rsmresource_t *p, rsm_resource_type_t type) 1365 { 1366 /* Find resource and lock it in READER mode */ 1367 /* Caller can upgrade if need be */ 1368 /* search for available resource slot */ 1369 int i, j; 1370 rsmresource_blk_t *blk; 1371 1372 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1373 "rsmresource_insert enter\n")); 1374 1375 i = (int)(rnum / RSMRC_BLKSZ); 1376 j = (int)(rnum % RSMRC_BLKSZ); 1377 1378 p->rsmrc_type = type; 1379 p->rsmrc_num = rnum; 1380 1381 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 1382 1383 ASSERT(rsm_resource.rsmrc_root); 1384 ASSERT(i < rsm_resource.rsmrc_len); 1385 ASSERT(i < rsm_resource.rsmrc_sz); 1386 1387 blk = rsm_resource.rsmrc_root[i]; 1388 ASSERT(blk); 1389 1390 ASSERT(blk->rsmrcblk_blks[j] == RSMRC_RESERVED); 1391 1392 blk->rsmrcblk_blks[j] = p; 1393 1394 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1395 "rsmresource_insert done\n")); 1396 1397 rw_exit(&rsm_resource.rsmrc_lock); 1398 } 1399 1400 static void 1401 rsmresource_destroy() 1402 { 1403 int i, j; 1404 1405 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1406 "rsmresource_destroy enter\n")); 1407 1408 rw_enter(&rsm_resource.rsmrc_lock, RW_WRITER); 1409 1410 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 1411 rsmresource_blk_t *blk; 1412 1413 blk = rsm_resource.rsmrc_root[i]; 1414 if (blk == NULL) { 1415 continue; 1416 } 1417 for (j = 0; j < RSMRC_BLKSZ; j++) { 1418 if (blk->rsmrcblk_blks[j] != NULL) { 1419 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1420 "Not null slot %d, %lx\n", j, 1421 (size_t)blk->rsmrcblk_blks[j])); 1422 } 1423 } 1424 kmem_free(blk, sizeof (*blk)); 1425 rsm_resource.rsmrc_root[i] = NULL; 1426 } 1427 if (rsm_resource.rsmrc_root) { 1428 i = rsm_resource.rsmrc_sz * (int)sizeof (rsmresource_blk_t *); 1429 kmem_free(rsm_resource.rsmrc_root, (uint_t)i); 1430 rsm_resource.rsmrc_root = NULL; 1431 rsm_resource.rsmrc_len = 0; 1432 rsm_resource.rsmrc_sz = 0; 1433 } 1434 1435 DBG_PRINTF((RSM_KERNEL_ALL, RSM_DEBUG_VERBOSE, 1436 "rsmresource_destroy done\n")); 1437 1438 rw_exit(&rsm_resource.rsmrc_lock); 1439 } 1440 1441 1442 /* ******************** Generic Key Hash Table Management ********* */ 1443 static rsmresource_t * 1444 rsmhash_lookup(rsmhash_table_t *rhash, rsm_memseg_id_t key, 1445 rsm_resource_state_t state) 1446 { 1447 rsmresource_t *p; 1448 uint_t hashval; 1449 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1450 1451 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup enter\n")); 1452 1453 hashval = rsmhash(key); 1454 1455 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_lookup %u=%d\n", 1456 key, hashval)); 1457 1458 rw_enter(&rhash->rsmhash_rw, RW_READER); 1459 1460 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1461 1462 for (; p; p = p->rsmrc_next) { 1463 if (p->rsmrc_key == key) { 1464 /* acquire resource lock */ 1465 RSMRC_LOCK(p); 1466 break; 1467 } 1468 } 1469 1470 rw_exit(&rhash->rsmhash_rw); 1471 1472 if (p != NULL && p->rsmrc_state != state) { 1473 /* state changed, release lock and return null */ 1474 RSMRC_UNLOCK(p); 1475 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1476 "rsmhash_lookup done: state changed\n")); 1477 return (NULL); 1478 } 1479 1480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_lookup done\n")); 1481 1482 return (p); 1483 } 1484 1485 static void 1486 rsmhash_rm(rsmhash_table_t *rhash, rsmresource_t *rcelm) 1487 { 1488 rsmresource_t *p, **back; 1489 uint_t hashval; 1490 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1491 1492 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm enter\n")); 1493 1494 hashval = rsmhash(rcelm->rsmrc_key); 1495 1496 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_rm %u=%d\n", 1497 rcelm->rsmrc_key, hashval)); 1498 1499 /* 1500 * It's ok not to find the segment. 1501 */ 1502 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1503 1504 back = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1505 1506 for (; (p = *back) != NULL; back = &p->rsmrc_next) { 1507 if (p == rcelm) { 1508 *back = rcelm->rsmrc_next; 1509 break; 1510 } 1511 } 1512 1513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_rm done\n")); 1514 1515 rw_exit(&rhash->rsmhash_rw); 1516 } 1517 1518 static int 1519 rsmhash_add(rsmhash_table_t *rhash, rsmresource_t *new, rsm_memseg_id_t key, 1520 int dup_check, rsm_resource_state_t state) 1521 { 1522 rsmresource_t *p = NULL, **bktp; 1523 uint_t hashval; 1524 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1525 1526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add enter\n")); 1527 1528 /* lock table */ 1529 rw_enter(&rhash->rsmhash_rw, RW_WRITER); 1530 1531 /* 1532 * If the current resource state is other than the state passed in 1533 * then the resource is (probably) already on the list. eg. for an 1534 * import segment if the state is not RSM_STATE_NEW then it's on the 1535 * list already. 1536 */ 1537 RSMRC_LOCK(new); 1538 if (new->rsmrc_state != state) { 1539 RSMRC_UNLOCK(new); 1540 rw_exit(&rhash->rsmhash_rw); 1541 return (RSMERR_BAD_SEG_HNDL); 1542 } 1543 1544 hashval = rsmhash(key); 1545 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmhash_add %d\n", hashval)); 1546 1547 if (dup_check) { 1548 /* 1549 * Used for checking export segments; don't want to have 1550 * the same key used for multiple segments. 1551 */ 1552 1553 p = (rsmresource_t *)rsmhash_getbkt(rhash, hashval); 1554 1555 for (; p; p = p->rsmrc_next) { 1556 if (p->rsmrc_key == key) { 1557 RSMRC_UNLOCK(new); 1558 break; 1559 } 1560 } 1561 } 1562 1563 if (p == NULL) { 1564 /* Key doesn't exist, add it */ 1565 1566 bktp = (rsmresource_t **)rsmhash_bktaddr(rhash, hashval); 1567 1568 new->rsmrc_key = key; 1569 new->rsmrc_next = *bktp; 1570 *bktp = new; 1571 } 1572 1573 rw_exit(&rhash->rsmhash_rw); 1574 1575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmhash_add done\n")); 1576 1577 return (p == NULL ? RSM_SUCCESS : RSMERR_SEGID_IN_USE); 1578 } 1579 1580 /* 1581 * XOR each byte of the key. 1582 */ 1583 static uint_t 1584 rsmhash(rsm_memseg_id_t key) 1585 { 1586 uint_t hash = key; 1587 1588 hash ^= (key >> 8); 1589 hash ^= (key >> 16); 1590 hash ^= (key >> 24); 1591 1592 return (hash % rsm_hash_size); 1593 1594 } 1595 1596 /* 1597 * generic function to get a specific bucket 1598 */ 1599 static void * 1600 rsmhash_getbkt(rsmhash_table_t *rhash, uint_t hashval) 1601 { 1602 1603 if (rhash->bucket == NULL) 1604 return (NULL); 1605 else 1606 return ((void *)rhash->bucket[hashval]); 1607 } 1608 1609 /* 1610 * generic function to get a specific bucket's address 1611 */ 1612 static void ** 1613 rsmhash_bktaddr(rsmhash_table_t *rhash, uint_t hashval) 1614 { 1615 if (rhash->bucket == NULL) 1616 return (NULL); 1617 else 1618 return ((void **)&(rhash->bucket[hashval])); 1619 } 1620 1621 /* 1622 * generic function to alloc a hash table 1623 */ 1624 static void 1625 rsmhash_alloc(rsmhash_table_t *rhash, int size) 1626 { 1627 rhash->bucket = (rsmresource_t **) 1628 kmem_zalloc(size * sizeof (rsmresource_t *), KM_SLEEP); 1629 } 1630 1631 /* 1632 * generic function to free a hash table 1633 */ 1634 static void 1635 rsmhash_free(rsmhash_table_t *rhash, int size) 1636 { 1637 1638 kmem_free((void *)rhash->bucket, size * sizeof (caddr_t)); 1639 rhash->bucket = NULL; 1640 1641 } 1642 /* *********************** Exported Segment Key Management ************ */ 1643 1644 #define rsmexport_add(new, key) \ 1645 rsmhash_add(&rsm_export_segs, (rsmresource_t *)new, key, 1, \ 1646 RSM_STATE_BIND) 1647 1648 #define rsmexport_rm(arg) \ 1649 rsmhash_rm(&rsm_export_segs, (rsmresource_t *)(arg)) 1650 1651 #define rsmexport_lookup(key) \ 1652 (rsmseg_t *)rsmhash_lookup(&rsm_export_segs, key, RSM_STATE_EXPORT) 1653 1654 /* ************************** Import Segment List Management ********** */ 1655 1656 /* 1657 * Add segment to import list. This will be useful for paging and loopback 1658 * segment unloading. 1659 */ 1660 #define rsmimport_add(arg, key) \ 1661 rsmhash_add(&rsm_import_segs, (rsmresource_t *)(arg), (key), 0, \ 1662 RSM_STATE_NEW) 1663 1664 #define rsmimport_rm(arg) \ 1665 rsmhash_rm(&rsm_import_segs, (rsmresource_t *)(arg)) 1666 1667 /* 1668 * #define rsmimport_lookup(key) \ 1669 * (rsmseg_t *)rsmhash_lookup(&rsm_import_segs, (key), RSM_STATE_CONNECT) 1670 */ 1671 1672 /* 1673 * increase the ref count and make the import segment point to the 1674 * shared data structure. Return a pointer to the share data struct 1675 * and the shared data struct is locked upon return 1676 */ 1677 static rsm_import_share_t * 1678 rsmshare_get(rsm_memseg_id_t key, rsm_node_id_t node, adapter_t *adapter, 1679 rsmseg_t *segp) 1680 { 1681 uint_t hash; 1682 rsmresource_t *p; 1683 rsm_import_share_t *shdatap; 1684 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1685 1686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get enter\n")); 1687 1688 hash = rsmhash(key); 1689 /* lock table */ 1690 rw_enter(&rsm_import_segs.rsmhash_rw, RW_WRITER); 1691 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsmshare_get:key=%u, hash=%d\n", 1692 key, hash)); 1693 1694 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hash); 1695 1696 for (; p; p = p->rsmrc_next) { 1697 /* 1698 * Look for an entry that is importing the same exporter 1699 * with the share data structure allocated. 1700 */ 1701 if ((p->rsmrc_key == key) && 1702 (p->rsmrc_node == node) && 1703 (p->rsmrc_adapter == adapter) && 1704 (((rsmseg_t *)p)->s_share != NULL)) { 1705 shdatap = ((rsmseg_t *)p)->s_share; 1706 break; 1707 } 1708 } 1709 1710 if (p == NULL) { 1711 /* we are the first importer, create the shared data struct */ 1712 shdatap = kmem_zalloc(sizeof (rsm_import_share_t), KM_SLEEP); 1713 shdatap->rsmsi_state = RSMSI_STATE_NEW; 1714 shdatap->rsmsi_segid = key; 1715 shdatap->rsmsi_node = node; 1716 mutex_init(&shdatap->rsmsi_lock, NULL, MUTEX_DRIVER, NULL); 1717 cv_init(&shdatap->rsmsi_cv, NULL, CV_DRIVER, 0); 1718 } 1719 1720 rsmseglock_acquire(segp); 1721 1722 /* we grab the shared lock before returning from this function */ 1723 mutex_enter(&shdatap->rsmsi_lock); 1724 1725 shdatap->rsmsi_refcnt++; 1726 segp->s_share = shdatap; 1727 1728 rsmseglock_release(segp); 1729 1730 rw_exit(&rsm_import_segs.rsmhash_rw); 1731 1732 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmshare_get done\n")); 1733 1734 return (shdatap); 1735 } 1736 1737 /* 1738 * the shared data structure should be locked before calling 1739 * rsmsharecv_signal(). 1740 * Change the state and signal any waiting segments. 1741 */ 1742 void 1743 rsmsharecv_signal(rsmseg_t *seg, int oldstate, int newstate) 1744 { 1745 ASSERT(rsmsharelock_held(seg)); 1746 1747 if (seg->s_share->rsmsi_state == oldstate) { 1748 seg->s_share->rsmsi_state = newstate; 1749 cv_broadcast(&seg->s_share->rsmsi_cv); 1750 } 1751 } 1752 1753 /* 1754 * Add to the hash table 1755 */ 1756 static void 1757 importer_list_add(rsm_node_id_t node, rsm_memseg_id_t key, rsm_addr_t hwaddr, 1758 void *cookie) 1759 { 1760 1761 importing_token_t *head; 1762 importing_token_t *new_token; 1763 int index; 1764 1765 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1766 1767 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add enter\n")); 1768 1769 new_token = kmem_zalloc(sizeof (importing_token_t), KM_SLEEP); 1770 new_token->importing_node = node; 1771 new_token->key = key; 1772 new_token->import_segment_cookie = cookie; 1773 new_token->importing_adapter_hwaddr = hwaddr; 1774 1775 index = rsmhash(key); 1776 1777 mutex_enter(&importer_list.lock); 1778 1779 head = importer_list.bucket[index]; 1780 importer_list.bucket[index] = new_token; 1781 new_token->next = head; 1782 mutex_exit(&importer_list.lock); 1783 1784 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_add done\n")); 1785 } 1786 1787 static void 1788 importer_list_rm(rsm_node_id_t node, rsm_memseg_id_t key, void *cookie) 1789 { 1790 1791 importing_token_t *prev, *token = NULL; 1792 int index; 1793 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1794 1795 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm enter\n")); 1796 1797 index = rsmhash(key); 1798 1799 mutex_enter(&importer_list.lock); 1800 1801 token = importer_list.bucket[index]; 1802 1803 prev = token; 1804 while (token != NULL) { 1805 if (token->importing_node == node && 1806 token->import_segment_cookie == cookie) { 1807 if (prev == token) 1808 importer_list.bucket[index] = token->next; 1809 else 1810 prev->next = token->next; 1811 kmem_free((void *)token, sizeof (*token)); 1812 break; 1813 } else { 1814 prev = token; 1815 token = token->next; 1816 } 1817 } 1818 1819 mutex_exit(&importer_list.lock); 1820 1821 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_list_rm done\n")); 1822 1823 1824 } 1825 1826 /* **************************Segment Structure Management ************* */ 1827 1828 /* 1829 * Free segment structure 1830 */ 1831 static void 1832 rsmseg_free(rsmseg_t *seg) 1833 { 1834 1835 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1836 1837 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free enter\n")); 1838 1839 /* need to take seglock here to avoid race with rsmmap_unmap() */ 1840 rsmseglock_acquire(seg); 1841 if (seg->s_ckl != NULL) { 1842 /* Segment is still busy */ 1843 seg->s_state = RSM_STATE_END; 1844 rsmseglock_release(seg); 1845 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1846 "rsmseg_free done\n")); 1847 return; 1848 } 1849 1850 rsmseglock_release(seg); 1851 1852 ASSERT(seg->s_state == RSM_STATE_END || seg->s_state == RSM_STATE_NEW); 1853 1854 /* 1855 * If it's an importer decrement the refcount 1856 * and if its down to zero free the shared data structure. 1857 * This is where failures during rsm_connect() are unrefcounted 1858 */ 1859 if (seg->s_share != NULL) { 1860 1861 ASSERT(seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT); 1862 1863 rsmsharelock_acquire(seg); 1864 1865 ASSERT(seg->s_share->rsmsi_refcnt > 0); 1866 1867 seg->s_share->rsmsi_refcnt--; 1868 1869 if (seg->s_share->rsmsi_refcnt == 0) { 1870 rsmsharelock_release(seg); 1871 mutex_destroy(&seg->s_share->rsmsi_lock); 1872 cv_destroy(&seg->s_share->rsmsi_cv); 1873 kmem_free((void *)(seg->s_share), 1874 sizeof (rsm_import_share_t)); 1875 } else { 1876 rsmsharelock_release(seg); 1877 } 1878 /* 1879 * The following needs to be done after any 1880 * rsmsharelock calls which use seg->s_share. 1881 */ 1882 seg->s_share = NULL; 1883 } 1884 1885 cv_destroy(&seg->s_cv); 1886 mutex_destroy(&seg->s_lock); 1887 rsmacl_free(seg->s_acl, seg->s_acl_len); 1888 rsmpiacl_free(seg->s_acl_in, seg->s_acl_len); 1889 if (seg->s_adapter) 1890 rsmka_release_adapter(seg->s_adapter); 1891 1892 kmem_free((void *)seg, sizeof (*seg)); 1893 1894 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_free done\n")); 1895 1896 } 1897 1898 1899 static rsmseg_t * 1900 rsmseg_alloc(minor_t num, struct cred *cred) 1901 { 1902 rsmseg_t *new; 1903 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 1904 1905 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc enter\n")); 1906 /* 1907 * allocate memory for new segment. This should be a segkmem cache. 1908 */ 1909 new = (rsmseg_t *)kmem_zalloc(sizeof (*new), KM_SLEEP); 1910 1911 new->s_state = RSM_STATE_NEW; 1912 new->s_minor = num; 1913 new->s_acl_len = 0; 1914 new->s_cookie = NULL; 1915 new->s_adapter = NULL; 1916 1917 new->s_mode = 0777 & ~PTOU((ttoproc(curthread)))->u_cmask; 1918 /* we don't have a key yet, will set at export/connect */ 1919 new->s_uid = crgetuid(cred); 1920 new->s_gid = crgetgid(cred); 1921 1922 mutex_init(&new->s_lock, NULL, MUTEX_DRIVER, (void *)NULL); 1923 cv_init(&new->s_cv, NULL, CV_DRIVER, 0); 1924 1925 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_alloc done\n")); 1926 1927 return (new); 1928 } 1929 1930 /* ******************************** Driver Open/Close/Poll *************** */ 1931 1932 /*ARGSUSED1*/ 1933 static int 1934 rsm_open(dev_t *devp, int flag, int otyp, struct cred *cred) 1935 { 1936 minor_t rnum; 1937 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 1938 1939 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open enter\n")); 1940 /* 1941 * Char only 1942 */ 1943 if (otyp != OTYP_CHR) { 1944 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad otyp\n")); 1945 return (EINVAL); 1946 } 1947 1948 /* 1949 * Only zero can be opened, clones are used for resources. 1950 */ 1951 if (getminor(*devp) != RSM_DRIVER_MINOR) { 1952 DBG_PRINTF((category, RSM_ERR, 1953 "rsm_open: bad minor %d\n", getminor(*devp))); 1954 return (ENODEV); 1955 } 1956 1957 if ((flag & FEXCL) != 0 && secpolicy_excl_open(cred) != 0) { 1958 DBG_PRINTF((category, RSM_ERR, "rsm_open: bad perm\n")); 1959 return (EPERM); 1960 } 1961 1962 if (!(flag & FWRITE)) { 1963 /* 1964 * The library function _rsm_librsm_init calls open for 1965 * /dev/rsm with flag set to O_RDONLY. We want a valid 1966 * file descriptor to be returned for minor device zero. 1967 */ 1968 1969 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 1970 "rsm_open RDONLY done\n")); 1971 return (DDI_SUCCESS); 1972 } 1973 1974 /* 1975 * - allocate new minor number and segment. 1976 * - add segment to list of all segments. 1977 * - set minordev data to segment 1978 * - update devp argument to new device 1979 * - update s_cred to cred; make sure you do crhold(cred); 1980 */ 1981 1982 /* allocate a new resource number */ 1983 if (rsmresource_alloc(&rnum) == RSM_SUCCESS) { 1984 /* 1985 * We will bind this minor to a specific resource in first 1986 * ioctl 1987 */ 1988 *devp = makedevice(getmajor(*devp), rnum); 1989 } else { 1990 return (EAGAIN); 1991 } 1992 1993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_open done\n")); 1994 return (DDI_SUCCESS); 1995 } 1996 1997 static void 1998 rsmseg_close(rsmseg_t *seg, int force_flag) 1999 { 2000 int e = RSM_SUCCESS; 2001 2002 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2003 2004 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close enter\n")); 2005 2006 rsmseglock_acquire(seg); 2007 if (!force_flag && (seg->s_hdr.rsmrc_type == 2008 RSM_RESOURCE_EXPORT_SEGMENT)) { 2009 /* 2010 * If we are processing rsm_close wait for force_destroy 2011 * processing to complete since force_destroy processing 2012 * needs to finish first before we can free the segment. 2013 * force_destroy is only for export segments 2014 */ 2015 while (seg->s_flags & RSM_FORCE_DESTROY_WAIT) { 2016 cv_wait(&seg->s_cv, &seg->s_lock); 2017 } 2018 } 2019 rsmseglock_release(seg); 2020 2021 /* It's ok to read the state without a lock */ 2022 switch (seg->s_state) { 2023 case RSM_STATE_EXPORT: 2024 case RSM_STATE_EXPORT_QUIESCING: 2025 case RSM_STATE_EXPORT_QUIESCED: 2026 e = rsm_unpublish(seg, 1); 2027 /* FALLTHRU */ 2028 case RSM_STATE_BIND_QUIESCED: 2029 /* FALLTHRU */ 2030 case RSM_STATE_BIND: 2031 e = rsm_unbind(seg); 2032 if (e != RSM_SUCCESS && force_flag == 1) 2033 return; 2034 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT); 2035 /* FALLTHRU */ 2036 case RSM_STATE_NEW_QUIESCED: 2037 rsmseglock_acquire(seg); 2038 seg->s_state = RSM_STATE_NEW; 2039 cv_broadcast(&seg->s_cv); 2040 rsmseglock_release(seg); 2041 break; 2042 case RSM_STATE_NEW: 2043 break; 2044 case RSM_STATE_ZOMBIE: 2045 /* 2046 * Segments in this state have been removed off the 2047 * exported segments list and have been unpublished 2048 * and unbind. These segments have been removed during 2049 * a callback to the rsm_export_force_destroy, which 2050 * is called for the purpose of unlocking these 2051 * exported memory segments when a process exits but 2052 * leaves the segments locked down since rsm_close is 2053 * is not called for the segments. This can happen 2054 * when a process calls fork or exec and then exits. 2055 * Once the segments are in the ZOMBIE state, all that 2056 * remains is to destroy them when rsm_close is called. 2057 * This is done here. Thus, for such segments the 2058 * the state is changed to new so that later in this 2059 * function rsmseg_free is called. 2060 */ 2061 rsmseglock_acquire(seg); 2062 seg->s_state = RSM_STATE_NEW; 2063 rsmseglock_release(seg); 2064 break; 2065 case RSM_STATE_MAP_QUIESCE: 2066 case RSM_STATE_ACTIVE: 2067 /* Disconnect will handle the unmap */ 2068 case RSM_STATE_CONN_QUIESCE: 2069 case RSM_STATE_CONNECT: 2070 case RSM_STATE_DISCONNECT: 2071 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 2072 (void) rsm_disconnect(seg); 2073 break; 2074 case RSM_STATE_MAPPING: 2075 /*FALLTHRU*/ 2076 case RSM_STATE_END: 2077 DBG_PRINTF((category, RSM_ERR, 2078 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2079 break; 2080 default: 2081 DBG_PRINTF((category, RSM_ERR, 2082 "Invalid segment state %d in rsm_close\n", seg->s_state)); 2083 break; 2084 } 2085 2086 /* 2087 * check state. 2088 * - make sure you do crfree(s_cred); 2089 * release segment and minor number 2090 */ 2091 ASSERT(seg->s_state == RSM_STATE_NEW); 2092 2093 /* 2094 * The export_force_destroy callback is created to unlock 2095 * the exported segments of a process 2096 * when the process does a fork or exec and then exits calls this 2097 * function with the force flag set to 1 which indicates that the 2098 * segment state must be converted to ZOMBIE. This state means that the 2099 * segments still exist and have been unlocked and most importantly the 2100 * only operation allowed is to destroy them on an rsm_close. 2101 */ 2102 if (force_flag) { 2103 rsmseglock_acquire(seg); 2104 seg->s_state = RSM_STATE_ZOMBIE; 2105 rsmseglock_release(seg); 2106 } else { 2107 rsmseg_free(seg); 2108 } 2109 2110 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_close done\n")); 2111 } 2112 2113 static int 2114 rsm_close(dev_t dev, int flag, int otyp, cred_t *cred) 2115 { 2116 minor_t rnum = getminor(dev); 2117 rsmresource_t *res; 2118 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL| RSM_DDI); 2119 2120 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close enter\n")); 2121 2122 flag = flag; cred = cred; 2123 2124 if (otyp != OTYP_CHR) 2125 return (EINVAL); 2126 2127 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rnum = %d\n", rnum)); 2128 2129 /* 2130 * At this point we are the last reference to the resource. 2131 * Free resource number from resource table. 2132 * It's ok to remove number before we free the segment. 2133 * We need to lock the resource to protect against remote calls. 2134 */ 2135 if (rnum == RSM_DRIVER_MINOR || 2136 (res = rsmresource_free(rnum)) == NULL) { 2137 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2138 return (DDI_SUCCESS); 2139 } 2140 2141 switch (res->rsmrc_type) { 2142 case RSM_RESOURCE_EXPORT_SEGMENT: 2143 case RSM_RESOURCE_IMPORT_SEGMENT: 2144 rsmseg_close((rsmseg_t *)res, 0); 2145 break; 2146 case RSM_RESOURCE_BAR: 2147 DBG_PRINTF((category, RSM_ERR, "bad resource in rsm_close\n")); 2148 break; 2149 default: 2150 break; 2151 } 2152 2153 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_close done\n")); 2154 2155 return (DDI_SUCCESS); 2156 } 2157 2158 /* 2159 * rsm_inc_pgcnt 2160 * 2161 * Description: increment rsm page counter. 2162 * 2163 * Parameters: pgcnt_t pnum; number of pages to be used 2164 * 2165 * Returns: RSM_SUCCESS if memory limit not exceeded 2166 * ENOSPC if memory limit exceeded. In this case, the 2167 * page counter remains unchanged. 2168 * 2169 */ 2170 static int 2171 rsm_inc_pgcnt(pgcnt_t pnum) 2172 { 2173 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2174 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2175 return (RSM_SUCCESS); 2176 } 2177 2178 mutex_enter(&rsm_pgcnt_lock); 2179 2180 if (rsm_pgcnt + pnum > rsm_pgcnt_max) { 2181 /* ensure that limits have not been exceeded */ 2182 mutex_exit(&rsm_pgcnt_lock); 2183 return (RSMERR_INSUFFICIENT_MEM); 2184 } 2185 2186 rsm_pgcnt += pnum; 2187 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt incr to %d.\n", 2188 rsm_pgcnt)); 2189 mutex_exit(&rsm_pgcnt_lock); 2190 2191 return (RSM_SUCCESS); 2192 } 2193 2194 /* 2195 * rsm_dec_pgcnt 2196 * 2197 * Description: decrement rsm page counter. 2198 * 2199 * Parameters: pgcnt_t pnum; number of pages freed 2200 * 2201 */ 2202 static void 2203 rsm_dec_pgcnt(pgcnt_t pnum) 2204 { 2205 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2206 2207 if (rsm_pgcnt_max == 0) { /* no upper limit has been set */ 2208 return; 2209 } 2210 2211 mutex_enter(&rsm_pgcnt_lock); 2212 ASSERT(rsm_pgcnt >= pnum); 2213 rsm_pgcnt -= pnum; 2214 DBG_PRINTF((category, RSM_DEBUG, "rsm_pgcnt decr to %d.\n", 2215 rsm_pgcnt)); 2216 mutex_exit(&rsm_pgcnt_lock); 2217 } 2218 2219 static struct umem_callback_ops rsm_as_ops = { 2220 UMEM_CALLBACK_VERSION, /* version number */ 2221 rsm_export_force_destroy, 2222 }; 2223 2224 static int 2225 rsm_bind_pages(ddi_umem_cookie_t *cookie, caddr_t vaddr, size_t len, 2226 proc_t *procp) 2227 { 2228 int error = RSM_SUCCESS; 2229 ulong_t pnum; 2230 struct umem_callback_ops *callbackops = &rsm_as_ops; 2231 2232 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2233 2234 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages enter\n")); 2235 2236 /* 2237 * Make sure vaddr and len are aligned on a page boundary 2238 */ 2239 if ((uintptr_t)vaddr & (PAGESIZE - 1)) { 2240 return (RSMERR_BAD_ADDR); 2241 } 2242 2243 if (len & (PAGESIZE - 1)) { 2244 return (RSMERR_BAD_LENGTH); 2245 } 2246 2247 /* 2248 * Find number of pages 2249 */ 2250 pnum = btopr(len); 2251 error = rsm_inc_pgcnt(pnum); 2252 if (error != RSM_SUCCESS) { 2253 DBG_PRINTF((category, RSM_ERR, 2254 "rsm_bind_pages:mem limit exceeded\n")); 2255 return (RSMERR_INSUFFICIENT_MEM); 2256 } 2257 2258 error = umem_lockmemory(vaddr, len, 2259 DDI_UMEMLOCK_WRITE|DDI_UMEMLOCK_READ|DDI_UMEMLOCK_LONGTERM, 2260 cookie, 2261 callbackops, procp); 2262 2263 if (error) { 2264 rsm_dec_pgcnt(pnum); 2265 DBG_PRINTF((category, RSM_ERR, 2266 "rsm_bind_pages:ddi_umem_lock failed\n")); 2267 /* 2268 * ddi_umem_lock, in the case of failure, returns one of 2269 * the following three errors. These are translated into 2270 * the RSMERR namespace and returned. 2271 */ 2272 if (error == EFAULT) 2273 return (RSMERR_BAD_ADDR); 2274 else if (error == EACCES) 2275 return (RSMERR_PERM_DENIED); 2276 else 2277 return (RSMERR_INSUFFICIENT_MEM); 2278 } 2279 2280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind_pages done\n")); 2281 2282 return (error); 2283 2284 } 2285 2286 static int 2287 rsm_unbind_pages(rsmseg_t *seg) 2288 { 2289 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2290 2291 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages enter\n")); 2292 2293 ASSERT(rsmseglock_held(seg)); 2294 2295 if (seg->s_cookie != NULL) { 2296 /* unlock address range */ 2297 ddi_umem_unlock(seg->s_cookie); 2298 rsm_dec_pgcnt(btopr(seg->s_len)); 2299 seg->s_cookie = NULL; 2300 } 2301 2302 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind_pages done\n")); 2303 2304 return (RSM_SUCCESS); 2305 } 2306 2307 2308 static int 2309 rsm_bind(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2310 { 2311 int e; 2312 adapter_t *adapter; 2313 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2314 2315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind enter\n")); 2316 2317 adapter = rsm_getadapter(msg, mode); 2318 if (adapter == NULL) { 2319 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2320 "rsm_bind done:no adapter\n")); 2321 return (RSMERR_CTLR_NOT_PRESENT); 2322 } 2323 2324 /* lock address range */ 2325 if (msg->vaddr == NULL) { 2326 rsmka_release_adapter(adapter); 2327 DBG_PRINTF((category, RSM_ERR, 2328 "rsm: rsm_bind done: invalid vaddr\n")); 2329 return (RSMERR_BAD_ADDR); 2330 } 2331 if (msg->len <= 0) { 2332 rsmka_release_adapter(adapter); 2333 DBG_PRINTF((category, RSM_ERR, 2334 "rsm_bind: invalid length\n")); 2335 return (RSMERR_BAD_LENGTH); 2336 } 2337 2338 /* Lock segment */ 2339 rsmseglock_acquire(seg); 2340 2341 while (seg->s_state == RSM_STATE_NEW_QUIESCED) { 2342 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2343 DBG_PRINTF((category, RSM_DEBUG, 2344 "rsm_bind done: cv_wait INTERRUPTED")); 2345 rsmka_release_adapter(adapter); 2346 rsmseglock_release(seg); 2347 return (RSMERR_INTERRUPTED); 2348 } 2349 } 2350 2351 ASSERT(seg->s_state == RSM_STATE_NEW); 2352 2353 ASSERT(seg->s_cookie == NULL); 2354 2355 e = rsm_bind_pages(&seg->s_cookie, msg->vaddr, msg->len, curproc); 2356 if (e == RSM_SUCCESS) { 2357 seg->s_flags |= RSM_USER_MEMORY; 2358 if (msg->perm & RSM_ALLOW_REBIND) { 2359 seg->s_flags |= RSMKA_ALLOW_UNBIND_REBIND; 2360 } 2361 if (msg->perm & RSM_CREATE_SEG_DONTWAIT) { 2362 seg->s_flags |= RSMKA_SET_RESOURCE_DONTWAIT; 2363 } 2364 seg->s_region.r_vaddr = msg->vaddr; 2365 /* 2366 * Set the s_pid value in the segment structure. This is used 2367 * to identify exported segments belonging to a particular 2368 * process so that when the process exits, these segments can 2369 * be unlocked forcefully even if rsm_close is not called on 2370 * process exit since there maybe other processes referencing 2371 * them (for example on a fork or exec). 2372 * The s_pid value is also used to authenticate the process 2373 * doing a publish or unpublish on the export segment. Only 2374 * the creator of the export segment has a right to do a 2375 * publish or unpublish and unbind on the segment. 2376 */ 2377 seg->s_pid = ddi_get_pid(); 2378 seg->s_len = msg->len; 2379 seg->s_state = RSM_STATE_BIND; 2380 seg->s_adapter = adapter; 2381 seg->s_proc = curproc; 2382 } else { 2383 rsmka_release_adapter(adapter); 2384 DBG_PRINTF((category, RSM_WARNING, 2385 "unable to lock down pages\n")); 2386 } 2387 2388 msg->rnum = seg->s_minor; 2389 /* Unlock segment */ 2390 rsmseglock_release(seg); 2391 2392 if (e == RSM_SUCCESS) { 2393 /* copyout the resource number */ 2394 #ifdef _MULTI_DATAMODEL 2395 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2396 rsm_ioctlmsg32_t msg32; 2397 2398 msg32.rnum = msg->rnum; 2399 if (ddi_copyout((caddr_t)&msg32.rnum, 2400 (caddr_t)&((rsm_ioctlmsg32_t *)dataptr)->rnum, 2401 sizeof (minor_t), mode)) { 2402 rsmka_release_adapter(adapter); 2403 e = RSMERR_BAD_ADDR; 2404 } 2405 } 2406 #endif 2407 if (ddi_copyout((caddr_t)&msg->rnum, 2408 (caddr_t)&((rsm_ioctlmsg_t *)dataptr)->rnum, 2409 sizeof (minor_t), mode)) { 2410 rsmka_release_adapter(adapter); 2411 e = RSMERR_BAD_ADDR; 2412 } 2413 } 2414 2415 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_bind done\n")); 2416 2417 return (e); 2418 } 2419 2420 static void 2421 rsm_remap_local_importers(rsm_node_id_t src_nodeid, 2422 rsm_memseg_id_t ex_segid, 2423 ddi_umem_cookie_t cookie) 2424 2425 { 2426 rsmresource_t *p = NULL; 2427 rsmhash_table_t *rhash = &rsm_import_segs; 2428 uint_t index; 2429 2430 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2431 "rsm_remap_local_importers enter\n")); 2432 2433 index = rsmhash(ex_segid); 2434 2435 rw_enter(&rhash->rsmhash_rw, RW_READER); 2436 2437 p = rsmhash_getbkt(rhash, index); 2438 2439 for (; p; p = p->rsmrc_next) { 2440 rsmseg_t *seg = (rsmseg_t *)p; 2441 rsmseglock_acquire(seg); 2442 /* 2443 * Change the s_cookie value of only the local importers 2444 * which have been mapped (in state RSM_STATE_ACTIVE). 2445 * Note that there is no need to change the s_cookie value 2446 * if the imported segment is in RSM_STATE_MAPPING since 2447 * eventually the s_cookie will be updated via the mapping 2448 * functionality. 2449 */ 2450 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid) && 2451 (seg->s_state == RSM_STATE_ACTIVE)) { 2452 seg->s_cookie = cookie; 2453 } 2454 rsmseglock_release(seg); 2455 } 2456 rw_exit(&rhash->rsmhash_rw); 2457 2458 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_FUNC_ALL, RSM_DEBUG_VERBOSE, 2459 "rsm_remap_local_importers done\n")); 2460 } 2461 2462 static int 2463 rsm_rebind(rsmseg_t *seg, rsm_ioctlmsg_t *msg) 2464 { 2465 int e; 2466 adapter_t *adapter; 2467 ddi_umem_cookie_t cookie; 2468 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2469 2470 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind enter\n")); 2471 2472 /* Check for permissions to rebind */ 2473 if (!(seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND)) { 2474 return (RSMERR_REBIND_NOT_ALLOWED); 2475 } 2476 2477 if (seg->s_pid != ddi_get_pid() && 2478 ddi_get_pid() != 0) { 2479 DBG_PRINTF((category, RSM_ERR, "rsm_rebind: Not owner\n")); 2480 return (RSMERR_NOT_CREATOR); 2481 } 2482 2483 /* 2484 * We will not be allowing partial rebind and hence length passed 2485 * in must be same as segment length 2486 */ 2487 if (msg->vaddr == NULL) { 2488 DBG_PRINTF((category, RSM_ERR, 2489 "rsm_rebind done: null msg->vaddr\n")); 2490 return (RSMERR_BAD_ADDR); 2491 } 2492 if (msg->len != seg->s_len) { 2493 DBG_PRINTF((category, RSM_ERR, 2494 "rsm_rebind: invalid length\n")); 2495 return (RSMERR_BAD_LENGTH); 2496 } 2497 2498 /* Lock segment */ 2499 rsmseglock_acquire(seg); 2500 2501 while ((seg->s_state == RSM_STATE_BIND_QUIESCED) || 2502 (seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 2503 (seg->s_state == RSM_STATE_EXPORT_QUIESCED)) { 2504 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 2505 rsmseglock_release(seg); 2506 DBG_PRINTF((category, RSM_DEBUG, 2507 "rsm_rebind done: cv_wait INTERRUPTED")); 2508 return (RSMERR_INTERRUPTED); 2509 } 2510 } 2511 2512 /* verify segment state */ 2513 if ((seg->s_state != RSM_STATE_BIND) && 2514 (seg->s_state != RSM_STATE_EXPORT)) { 2515 /* Unlock segment */ 2516 rsmseglock_release(seg); 2517 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2518 "rsm_rebind done: invalid state\n")); 2519 return (RSMERR_BAD_SEG_HNDL); 2520 } 2521 2522 ASSERT(seg->s_cookie != NULL); 2523 2524 if (msg->vaddr == seg->s_region.r_vaddr) { 2525 rsmseglock_release(seg); 2526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2527 return (RSM_SUCCESS); 2528 } 2529 2530 e = rsm_bind_pages(&cookie, msg->vaddr, msg->len, curproc); 2531 if (e == RSM_SUCCESS) { 2532 struct buf *xbuf; 2533 dev_t sdev = 0; 2534 rsm_memory_local_t mem; 2535 2536 xbuf = ddi_umem_iosetup(cookie, 0, msg->len, B_WRITE, 2537 sdev, 0, NULL, DDI_UMEM_SLEEP); 2538 ASSERT(xbuf != NULL); 2539 2540 mem.ms_type = RSM_MEM_BUF; 2541 mem.ms_bp = xbuf; 2542 2543 adapter = seg->s_adapter; 2544 e = adapter->rsmpi_ops->rsm_rebind( 2545 seg->s_handle.out, 0, &mem, 2546 RSM_RESOURCE_DONTWAIT, NULL); 2547 2548 if (e == RSM_SUCCESS) { 2549 /* 2550 * unbind the older pages, and unload local importers; 2551 * but don't disconnect importers 2552 */ 2553 (void) rsm_unbind_pages(seg); 2554 seg->s_cookie = cookie; 2555 seg->s_region.r_vaddr = msg->vaddr; 2556 rsm_remap_local_importers(my_nodeid, seg->s_segid, 2557 cookie); 2558 } else { 2559 /* 2560 * Unbind the pages associated with "cookie" by the 2561 * rsm_bind_pages calls prior to this. This is 2562 * similar to what is done in the rsm_unbind_pages 2563 * routine for the seg->s_cookie. 2564 */ 2565 ddi_umem_unlock(cookie); 2566 rsm_dec_pgcnt(btopr(msg->len)); 2567 DBG_PRINTF((category, RSM_ERR, 2568 "rsm_rebind failed with %d\n", e)); 2569 } 2570 /* 2571 * At present there is no dependency on the existence of xbuf. 2572 * So we can free it here. If in the future this changes, it can 2573 * be freed sometime during the segment destroy. 2574 */ 2575 freerbuf(xbuf); 2576 } 2577 2578 /* Unlock segment */ 2579 rsmseglock_release(seg); 2580 2581 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_rebind done\n")); 2582 2583 return (e); 2584 } 2585 2586 static int 2587 rsm_unbind(rsmseg_t *seg) 2588 { 2589 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2590 2591 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind enter\n")); 2592 2593 rsmseglock_acquire(seg); 2594 2595 /* verify segment state */ 2596 if ((seg->s_state != RSM_STATE_BIND) && 2597 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2598 rsmseglock_release(seg); 2599 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2600 "rsm_unbind: invalid state\n")); 2601 return (RSMERR_BAD_SEG_HNDL); 2602 } 2603 2604 /* unlock current range */ 2605 (void) rsm_unbind_pages(seg); 2606 2607 if (seg->s_state == RSM_STATE_BIND) { 2608 seg->s_state = RSM_STATE_NEW; 2609 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 2610 seg->s_state = RSM_STATE_NEW_QUIESCED; 2611 } 2612 2613 rsmseglock_release(seg); 2614 2615 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unbind done\n")); 2616 2617 return (RSM_SUCCESS); 2618 } 2619 2620 /* **************************** Exporter Access List Management ******* */ 2621 static void 2622 rsmacl_free(rsmapi_access_entry_t *acl, int acl_len) 2623 { 2624 int acl_sz; 2625 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2626 2627 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free enter\n")); 2628 2629 /* acl could be NULL */ 2630 2631 if (acl != NULL && acl_len > 0) { 2632 acl_sz = acl_len * sizeof (rsmapi_access_entry_t); 2633 kmem_free((void *)acl, acl_sz); 2634 } 2635 2636 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_free done\n")); 2637 } 2638 2639 static void 2640 rsmpiacl_free(rsm_access_entry_t *acl, int acl_len) 2641 { 2642 int acl_sz; 2643 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2644 2645 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free enter\n")); 2646 2647 if (acl != NULL && acl_len > 0) { 2648 acl_sz = acl_len * sizeof (rsm_access_entry_t); 2649 kmem_free((void *)acl, acl_sz); 2650 } 2651 2652 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_free done\n")); 2653 2654 } 2655 2656 static int 2657 rsmacl_build(rsm_ioctlmsg_t *msg, int mode, 2658 rsmapi_access_entry_t **list, int *len, int loopback) 2659 { 2660 rsmapi_access_entry_t *acl; 2661 int acl_len; 2662 int i; 2663 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2664 2665 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build enter\n")); 2666 2667 *len = 0; 2668 *list = NULL; 2669 2670 acl_len = msg->acl_len; 2671 if ((loopback && acl_len > 1) || (acl_len < 0) || 2672 (acl_len > MAX_NODES)) { 2673 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2674 "rsmacl_build done: acl invalid\n")); 2675 return (RSMERR_BAD_ACL); 2676 } 2677 2678 if (acl_len > 0 && acl_len <= MAX_NODES) { 2679 size_t acl_size = acl_len * sizeof (rsmapi_access_entry_t); 2680 2681 acl = kmem_alloc(acl_size, KM_SLEEP); 2682 2683 if (ddi_copyin((caddr_t)msg->acl, (caddr_t)acl, 2684 acl_size, mode)) { 2685 kmem_free((void *) acl, acl_size); 2686 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2687 "rsmacl_build done: BAD_ADDR\n")); 2688 return (RSMERR_BAD_ADDR); 2689 } 2690 2691 /* 2692 * Verify access list 2693 */ 2694 for (i = 0; i < acl_len; i++) { 2695 if (acl[i].ae_node > MAX_NODES || 2696 (loopback && (acl[i].ae_node != my_nodeid)) || 2697 acl[i].ae_permission > RSM_ACCESS_TRUSTED) { 2698 /* invalid entry */ 2699 kmem_free((void *) acl, acl_size); 2700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2701 "rsmacl_build done: EINVAL\n")); 2702 return (RSMERR_BAD_ACL); 2703 } 2704 } 2705 2706 *len = acl_len; 2707 *list = acl; 2708 } 2709 2710 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmacl_build done\n")); 2711 2712 return (DDI_SUCCESS); 2713 } 2714 2715 static int 2716 rsmpiacl_create(rsmapi_access_entry_t *src, rsm_access_entry_t **dest, 2717 int acl_len, adapter_t *adapter) 2718 { 2719 rsm_access_entry_t *acl; 2720 rsm_addr_t hwaddr; 2721 int i; 2722 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2723 2724 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create enter\n")); 2725 2726 if (src != NULL) { 2727 size_t acl_size = acl_len * sizeof (rsm_access_entry_t); 2728 acl = kmem_alloc(acl_size, KM_SLEEP); 2729 2730 /* 2731 * translate access list 2732 */ 2733 for (i = 0; i < acl_len; i++) { 2734 if (src[i].ae_node == my_nodeid) { 2735 acl[i].ae_addr = adapter->hwaddr; 2736 } else { 2737 hwaddr = get_remote_hwaddr(adapter, 2738 src[i].ae_node); 2739 if ((int64_t)hwaddr < 0) { 2740 /* invalid hwaddr */ 2741 kmem_free((void *) acl, acl_size); 2742 DBG_PRINTF((category, 2743 RSM_DEBUG_VERBOSE, 2744 "rsmpiacl_create done:" 2745 "EINVAL hwaddr\n")); 2746 return (RSMERR_INTERNAL_ERROR); 2747 } 2748 acl[i].ae_addr = hwaddr; 2749 } 2750 /* rsmpi understands only RSM_PERM_XXXX */ 2751 acl[i].ae_permission = 2752 src[i].ae_permission & RSM_PERM_RDWR; 2753 } 2754 *dest = acl; 2755 } else { 2756 *dest = NULL; 2757 } 2758 2759 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmpiacl_create done\n")); 2760 2761 return (RSM_SUCCESS); 2762 } 2763 2764 static int 2765 rsmsegacl_validate(rsmipc_request_t *req, rsm_node_id_t rnode, 2766 rsmipc_reply_t *reply) 2767 { 2768 2769 int i; 2770 rsmseg_t *seg; 2771 rsm_memseg_id_t key = req->rsmipc_key; 2772 rsm_permission_t perm = req->rsmipc_perm; 2773 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2774 2775 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2776 "rsmsegacl_validate enter\n")); 2777 2778 /* 2779 * Find segment and grab its lock. The reason why we grab the segment 2780 * lock in side the search is to avoid the race when the segment is 2781 * being deleted and we already have a pointer to it. 2782 */ 2783 seg = rsmexport_lookup(key); 2784 if (!seg) { 2785 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2786 "rsmsegacl_validate done: %u ENXIO\n", key)); 2787 return (RSMERR_SEG_NOT_PUBLISHED); 2788 } 2789 2790 ASSERT(rsmseglock_held(seg)); 2791 ASSERT(seg->s_state == RSM_STATE_EXPORT); 2792 2793 /* 2794 * We implement a 2-level protection scheme. 2795 * First, we check if local/remote host has access rights. 2796 * Second, we check if the user has access rights. 2797 * 2798 * This routine only validates the rnode access_list 2799 */ 2800 if (seg->s_acl_len > 0) { 2801 /* 2802 * Check host access list 2803 */ 2804 ASSERT(seg->s_acl != NULL); 2805 for (i = 0; i < seg->s_acl_len; i++) { 2806 if (seg->s_acl[i].ae_node == rnode) { 2807 perm &= seg->s_acl[i].ae_permission; 2808 goto found; 2809 } 2810 } 2811 /* rnode is not found in the list */ 2812 rsmseglock_release(seg); 2813 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 2814 "rsmsegacl_validate done: EPERM\n")); 2815 return (RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 2816 } else { 2817 /* use default owner creation umask */ 2818 perm &= seg->s_mode; 2819 } 2820 2821 found: 2822 /* update perm for this node */ 2823 reply->rsmipc_mode = perm; 2824 reply->rsmipc_uid = seg->s_uid; 2825 reply->rsmipc_gid = seg->s_gid; 2826 reply->rsmipc_segid = seg->s_segid; 2827 reply->rsmipc_seglen = seg->s_len; 2828 2829 /* 2830 * Perm of requesting node is valid; source will validate user 2831 */ 2832 rsmseglock_release(seg); 2833 2834 /* 2835 * Add the importer to the list right away, if connect fails 2836 * the importer will ask the exporter to remove it. 2837 */ 2838 importer_list_add(rnode, key, req->rsmipc_adapter_hwaddr, 2839 req->rsmipc_segment_cookie); 2840 2841 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegacl_validate done\n")); 2842 2843 return (RSM_SUCCESS); 2844 } 2845 2846 2847 /* ************************** Exporter Calls ************************* */ 2848 2849 static int 2850 rsm_publish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, intptr_t dataptr, int mode) 2851 { 2852 int e; 2853 int acl_len; 2854 rsmapi_access_entry_t *acl; 2855 rsm_access_entry_t *rsmpi_acl; 2856 rsm_memory_local_t mem; 2857 struct buf *xbuf; 2858 dev_t sdev = 0; 2859 adapter_t *adapter; 2860 rsm_memseg_id_t segment_id = 0; 2861 int loopback_flag = 0; 2862 int create_flags = 0; 2863 rsm_resource_callback_t callback_flag; 2864 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 2865 2866 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish enter\n")); 2867 2868 if (seg->s_adapter == &loopback_adapter) 2869 loopback_flag = 1; 2870 2871 if (seg->s_pid != ddi_get_pid() && 2872 ddi_get_pid() != 0) { 2873 DBG_PRINTF((category, RSM_ERR, 2874 "rsm_publish: Not creator\n")); 2875 return (RSMERR_NOT_CREATOR); 2876 } 2877 2878 /* 2879 * Get per node access list 2880 */ 2881 e = rsmacl_build(msg, mode, &acl, &acl_len, loopback_flag); 2882 if (e != DDI_SUCCESS) { 2883 DBG_PRINTF((category, RSM_ERR, 2884 "rsm_publish done: rsmacl_build failed\n")); 2885 return (e); 2886 } 2887 2888 /* 2889 * The application provided msg->key is used for resolving a 2890 * segment id according to the following: 2891 * key = 0 Kernel Agent selects the segment id 2892 * key <= RSM_DLPI_ID_END Reserved for system usage except 2893 * RSMLIB range 2894 * key < RSM_USER_APP_ID_BASE segment id = key 2895 * key >= RSM_USER_APP_ID_BASE Reserved for KA selections 2896 * 2897 * rsm_nextavail_segmentid is initialized to 0x80000000 and 2898 * overflows to zero after 0x80000000 allocations. 2899 * An algorithm is needed which allows reinitialization and provides 2900 * for reallocation after overflow. For now, ENOMEM is returned 2901 * once the overflow condition has occurred. 2902 */ 2903 if (msg->key == 0) { 2904 mutex_enter(&rsm_lock); 2905 segment_id = rsm_nextavail_segmentid; 2906 if (segment_id != 0) { 2907 rsm_nextavail_segmentid++; 2908 mutex_exit(&rsm_lock); 2909 } else { 2910 mutex_exit(&rsm_lock); 2911 DBG_PRINTF((category, RSM_ERR, 2912 "rsm_publish done: no more keys avlbl\n")); 2913 return (RSMERR_INSUFFICIENT_RESOURCES); 2914 } 2915 } else if BETWEEN(msg->key, RSM_RSMLIB_ID_BASE, RSM_RSMLIB_ID_END) 2916 /* range reserved for internal use by base/ndi libraries */ 2917 segment_id = msg->key; 2918 else if (msg->key <= RSM_DLPI_ID_END) 2919 return (RSMERR_RESERVED_SEGID); 2920 else if (msg->key <= (uint_t)RSM_USER_APP_ID_BASE -1) 2921 segment_id = msg->key; 2922 else { 2923 DBG_PRINTF((category, RSM_ERR, 2924 "rsm_publish done: invalid key %u\n", msg->key)); 2925 return (RSMERR_RESERVED_SEGID); 2926 } 2927 2928 /* Add key to exportlist; The segment lock is held on success */ 2929 e = rsmexport_add(seg, segment_id); 2930 if (e) { 2931 rsmacl_free(acl, acl_len); 2932 DBG_PRINTF((category, RSM_ERR, 2933 "rsm_publish done: export_add failed: %d\n", e)); 2934 return (e); 2935 } 2936 2937 seg->s_segid = segment_id; 2938 2939 if ((seg->s_state != RSM_STATE_BIND) && 2940 (seg->s_state != RSM_STATE_BIND_QUIESCED)) { 2941 /* state changed since then, free acl and return */ 2942 rsmseglock_release(seg); 2943 rsmexport_rm(seg); 2944 rsmacl_free(acl, acl_len); 2945 DBG_PRINTF((category, RSM_ERR, 2946 "rsm_publish done: segment in wrong state: %d\n", 2947 seg->s_state)); 2948 return (RSMERR_BAD_SEG_HNDL); 2949 } 2950 2951 /* 2952 * If this is for a local memory handle and permissions are zero, 2953 * then the surrogate segment is very large and we want to skip 2954 * allocation of DVMA space. 2955 * 2956 * Careful! If the user didn't use an ACL list, acl will be a NULL 2957 * pointer. Check that before dereferencing it. 2958 */ 2959 if (acl != (rsmapi_access_entry_t *)NULL) { 2960 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 2961 goto skipdriver; 2962 } 2963 2964 /* create segment */ 2965 xbuf = ddi_umem_iosetup(seg->s_cookie, 0, seg->s_len, B_WRITE, 2966 sdev, 0, NULL, DDI_UMEM_SLEEP); 2967 ASSERT(xbuf != NULL); 2968 2969 mem.ms_type = RSM_MEM_BUF; 2970 mem.ms_bp = xbuf; 2971 2972 /* This call includes a bind operations */ 2973 2974 adapter = seg->s_adapter; 2975 /* 2976 * create a acl list with hwaddr for RSMPI publish 2977 */ 2978 e = rsmpiacl_create(acl, &rsmpi_acl, acl_len, adapter); 2979 2980 if (e != RSM_SUCCESS) { 2981 rsmseglock_release(seg); 2982 rsmexport_rm(seg); 2983 rsmacl_free(acl, acl_len); 2984 freerbuf(xbuf); 2985 DBG_PRINTF((category, RSM_ERR, 2986 "rsm_publish done: rsmpiacl_create failed: %d\n", e)); 2987 return (e); 2988 } 2989 2990 if (seg->s_state == RSM_STATE_BIND) { 2991 /* create segment */ 2992 2993 /* This call includes a bind operations */ 2994 2995 if (seg->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 2996 create_flags = RSM_ALLOW_UNBIND_REBIND; 2997 } 2998 2999 if (seg->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 3000 callback_flag = RSM_RESOURCE_DONTWAIT; 3001 } else { 3002 callback_flag = RSM_RESOURCE_SLEEP; 3003 } 3004 3005 e = adapter->rsmpi_ops->rsm_seg_create( 3006 adapter->rsmpi_handle, 3007 &seg->s_handle.out, seg->s_len, 3008 create_flags, &mem, 3009 callback_flag, NULL); 3010 /* 3011 * At present there is no dependency on the existence of xbuf. 3012 * So we can free it here. If in the future this changes, it can 3013 * be freed sometime during the segment destroy. 3014 */ 3015 freerbuf(xbuf); 3016 3017 if (e != RSM_SUCCESS) { 3018 rsmseglock_release(seg); 3019 rsmexport_rm(seg); 3020 rsmacl_free(acl, acl_len); 3021 rsmpiacl_free(rsmpi_acl, acl_len); 3022 DBG_PRINTF((category, RSM_ERR, 3023 "rsm_publish done: export_create failed: %d\n", e)); 3024 /* 3025 * The following assertion ensures that the two errors 3026 * related to the length and its alignment do not occur 3027 * since they have been checked during export_create 3028 */ 3029 ASSERT(e != RSMERR_BAD_MEM_ALIGNMENT && 3030 e != RSMERR_BAD_LENGTH); 3031 if (e == RSMERR_NOT_MEM) 3032 e = RSMERR_INSUFFICIENT_MEM; 3033 3034 return (e); 3035 } 3036 /* export segment, this should create an IMMU mapping */ 3037 e = adapter->rsmpi_ops->rsm_publish( 3038 seg->s_handle.out, 3039 rsmpi_acl, acl_len, 3040 seg->s_segid, 3041 RSM_RESOURCE_DONTWAIT, NULL); 3042 3043 if (e != RSM_SUCCESS) { 3044 adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3045 rsmseglock_release(seg); 3046 rsmexport_rm(seg); 3047 rsmacl_free(acl, acl_len); 3048 rsmpiacl_free(rsmpi_acl, acl_len); 3049 DBG_PRINTF((category, RSM_ERR, 3050 "rsm_publish done: export_publish failed: %d\n", 3051 e)); 3052 return (e); 3053 } 3054 } 3055 3056 seg->s_acl_in = rsmpi_acl; 3057 3058 skipdriver: 3059 /* defer s_acl/s_acl_len -> avoid crash in rsmseg_free */ 3060 seg->s_acl_len = acl_len; 3061 seg->s_acl = acl; 3062 3063 if (seg->s_state == RSM_STATE_BIND) { 3064 seg->s_state = RSM_STATE_EXPORT; 3065 } else if (seg->s_state == RSM_STATE_BIND_QUIESCED) { 3066 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 3067 cv_broadcast(&seg->s_cv); 3068 } 3069 3070 rsmseglock_release(seg); 3071 3072 /* 3073 * If the segment id was solicited, then return it in 3074 * the original incoming message. 3075 */ 3076 if (msg->key == 0) { 3077 msg->key = segment_id; 3078 #ifdef _MULTI_DATAMODEL 3079 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 3080 rsm_ioctlmsg32_t msg32; 3081 3082 msg32.key = msg->key; 3083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3084 "rsm_publish done\n")); 3085 return (ddi_copyout((caddr_t)&msg32, 3086 (caddr_t)dataptr, sizeof (msg32), mode)); 3087 } 3088 #endif 3089 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3090 "rsm_publish done\n")); 3091 return (ddi_copyout((caddr_t)msg, 3092 (caddr_t)dataptr, sizeof (*msg), mode)); 3093 } 3094 3095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_publish done\n")); 3096 return (DDI_SUCCESS); 3097 } 3098 3099 /* 3100 * This function modifies the access control list of an already published 3101 * segment. There is no effect on import segments which are already 3102 * connected. 3103 */ 3104 static int 3105 rsm_republish(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int mode) 3106 { 3107 rsmapi_access_entry_t *new_acl, *old_acl, *tmp_acl; 3108 rsm_access_entry_t *rsmpi_new_acl, *rsmpi_old_acl; 3109 int new_acl_len, old_acl_len, tmp_acl_len; 3110 int e, i; 3111 adapter_t *adapter; 3112 int loopback_flag = 0; 3113 rsm_memseg_id_t key; 3114 rsm_permission_t permission; 3115 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3116 3117 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish enter\n")); 3118 3119 if ((seg->s_state != RSM_STATE_EXPORT) && 3120 (seg->s_state != RSM_STATE_EXPORT_QUIESCED) && 3121 (seg->s_state != RSM_STATE_EXPORT_QUIESCING)) 3122 return (RSMERR_SEG_NOT_PUBLISHED); 3123 3124 if (seg->s_pid != ddi_get_pid() && 3125 ddi_get_pid() != 0) { 3126 DBG_PRINTF((category, RSM_ERR, 3127 "rsm_republish: Not owner\n")); 3128 return (RSMERR_NOT_CREATOR); 3129 } 3130 3131 if (seg->s_adapter == &loopback_adapter) 3132 loopback_flag = 1; 3133 3134 /* 3135 * Build new list first 3136 */ 3137 e = rsmacl_build(msg, mode, &new_acl, &new_acl_len, loopback_flag); 3138 if (e) { 3139 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3140 "rsm_republish done: rsmacl_build failed %d", e)); 3141 return (e); 3142 } 3143 3144 /* Lock segment */ 3145 rsmseglock_acquire(seg); 3146 /* 3147 * a republish is in progress - REPUBLISH message is being 3148 * sent to the importers so wait for it to complete OR 3149 * wait till DR completes 3150 */ 3151 while (((seg->s_state == RSM_STATE_EXPORT) && 3152 (seg->s_flags & RSM_REPUBLISH_WAIT)) || 3153 (seg->s_state == RSM_STATE_EXPORT_QUIESCED) || 3154 (seg->s_state == RSM_STATE_EXPORT_QUIESCING)) { 3155 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3156 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3157 "rsm_republish done: cv_wait INTERRUPTED")); 3158 rsmseglock_release(seg); 3159 rsmacl_free(new_acl, new_acl_len); 3160 return (RSMERR_INTERRUPTED); 3161 } 3162 } 3163 3164 /* recheck if state is valid */ 3165 if (seg->s_state != RSM_STATE_EXPORT) { 3166 rsmseglock_release(seg); 3167 rsmacl_free(new_acl, new_acl_len); 3168 return (RSMERR_SEG_NOT_PUBLISHED); 3169 } 3170 3171 key = seg->s_key; 3172 old_acl = seg->s_acl; 3173 old_acl_len = seg->s_acl_len; 3174 3175 seg->s_acl = new_acl; 3176 seg->s_acl_len = new_acl_len; 3177 3178 /* 3179 * This call will only be meaningful if and when the interconnect 3180 * layer makes use of the access list 3181 */ 3182 adapter = seg->s_adapter; 3183 /* 3184 * create a acl list with hwaddr for RSMPI publish 3185 */ 3186 e = rsmpiacl_create(new_acl, &rsmpi_new_acl, new_acl_len, adapter); 3187 3188 if (e != RSM_SUCCESS) { 3189 seg->s_acl = old_acl; 3190 seg->s_acl_len = old_acl_len; 3191 rsmseglock_release(seg); 3192 rsmacl_free(new_acl, new_acl_len); 3193 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3194 "rsm_republish done: rsmpiacl_create failed %d", e)); 3195 return (e); 3196 } 3197 rsmpi_old_acl = seg->s_acl_in; 3198 seg->s_acl_in = rsmpi_new_acl; 3199 3200 e = adapter->rsmpi_ops->rsm_republish(seg->s_handle.out, 3201 seg->s_acl_in, seg->s_acl_len, 3202 RSM_RESOURCE_DONTWAIT, NULL); 3203 3204 if (e != RSM_SUCCESS) { 3205 seg->s_acl = old_acl; 3206 seg->s_acl_in = rsmpi_old_acl; 3207 seg->s_acl_len = old_acl_len; 3208 rsmseglock_release(seg); 3209 rsmacl_free(new_acl, new_acl_len); 3210 rsmpiacl_free(rsmpi_new_acl, new_acl_len); 3211 3212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3213 "rsm_republish done: rsmpi republish failed %d\n", e)); 3214 return (e); 3215 } 3216 3217 /* create a tmp copy of the new acl */ 3218 tmp_acl_len = new_acl_len; 3219 if (tmp_acl_len > 0) { 3220 tmp_acl = kmem_zalloc(new_acl_len*sizeof (*tmp_acl), KM_SLEEP); 3221 for (i = 0; i < tmp_acl_len; i++) { 3222 tmp_acl[i].ae_node = new_acl[i].ae_node; 3223 tmp_acl[i].ae_permission = new_acl[i].ae_permission; 3224 } 3225 /* 3226 * The default permission of a node which was in the old 3227 * ACL but not in the new ACL is 0 ie no access. 3228 */ 3229 permission = 0; 3230 } else { 3231 /* 3232 * NULL acl means all importers can connect and 3233 * default permission will be owner creation umask 3234 */ 3235 tmp_acl = NULL; 3236 permission = seg->s_mode; 3237 } 3238 3239 /* make other republishers to wait for republish to complete */ 3240 seg->s_flags |= RSM_REPUBLISH_WAIT; 3241 3242 rsmseglock_release(seg); 3243 3244 /* send the new perms to the importing nodes */ 3245 rsm_send_republish(key, tmp_acl, tmp_acl_len, permission); 3246 3247 rsmseglock_acquire(seg); 3248 seg->s_flags &= ~RSM_REPUBLISH_WAIT; 3249 /* wake up any one waiting for republish to complete */ 3250 cv_broadcast(&seg->s_cv); 3251 rsmseglock_release(seg); 3252 3253 rsmacl_free(tmp_acl, tmp_acl_len); 3254 rsmacl_free(old_acl, old_acl_len); 3255 rsmpiacl_free(rsmpi_old_acl, old_acl_len); 3256 3257 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_republish done\n")); 3258 return (DDI_SUCCESS); 3259 } 3260 3261 static int 3262 rsm_unpublish(rsmseg_t *seg, int mode) 3263 { 3264 rsmapi_access_entry_t *acl; 3265 rsm_access_entry_t *rsmpi_acl; 3266 int acl_len; 3267 int e; 3268 clock_t ticks; 3269 adapter_t *adapter; 3270 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT); 3271 3272 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish enter\n")); 3273 3274 if (seg->s_pid != ddi_get_pid() && 3275 ddi_get_pid() != 0) { 3276 DBG_PRINTF((category, RSM_ERR, 3277 "rsm_unpublish: Not creator\n")); 3278 return (RSMERR_NOT_CREATOR); 3279 } 3280 3281 rsmseglock_acquire(seg); 3282 /* 3283 * wait for QUIESCING to complete here before rsmexport_rm 3284 * is called because the SUSPEND_COMPLETE mesg which changes 3285 * the seg state from EXPORT_QUIESCING to EXPORT_QUIESCED and 3286 * signals the cv_wait needs to find it in the hashtable. 3287 */ 3288 while ((seg->s_state == RSM_STATE_EXPORT_QUIESCING) || 3289 ((seg->s_state == RSM_STATE_EXPORT) && (seg->s_rdmacnt > 0))) { 3290 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3291 rsmseglock_release(seg); 3292 DBG_PRINTF((category, RSM_ERR, 3293 "rsm_unpublish done: cv_wait INTR qscing" 3294 "getv/putv in progress")); 3295 return (RSMERR_INTERRUPTED); 3296 } 3297 } 3298 3299 /* verify segment state */ 3300 if ((seg->s_state != RSM_STATE_EXPORT) && 3301 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3302 rsmseglock_release(seg); 3303 DBG_PRINTF((category, RSM_ERR, 3304 "rsm_unpublish done: bad state %x\n", seg->s_state)); 3305 return (RSMERR_SEG_NOT_PUBLISHED); 3306 } 3307 3308 rsmseglock_release(seg); 3309 3310 rsmexport_rm(seg); 3311 3312 rsm_send_importer_disconnects(seg->s_segid, my_nodeid); 3313 3314 rsmseglock_acquire(seg); 3315 /* 3316 * wait for republish to complete 3317 */ 3318 while ((seg->s_state == RSM_STATE_EXPORT) && 3319 (seg->s_flags & RSM_REPUBLISH_WAIT)) { 3320 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 3321 DBG_PRINTF((category, RSM_ERR, 3322 "rsm_unpublish done: cv_wait INTR repubing")); 3323 rsmseglock_release(seg); 3324 return (RSMERR_INTERRUPTED); 3325 } 3326 } 3327 3328 if ((seg->s_state != RSM_STATE_EXPORT) && 3329 (seg->s_state != RSM_STATE_EXPORT_QUIESCED)) { 3330 DBG_PRINTF((category, RSM_ERR, 3331 "rsm_unpublish done: invalid state")); 3332 rsmseglock_release(seg); 3333 return (RSMERR_SEG_NOT_PUBLISHED); 3334 } 3335 3336 /* 3337 * check for putv/get surrogate segment which was not published 3338 * to the driver. 3339 * 3340 * Be certain to see if there is an ACL first! If this segment was 3341 * not published with an ACL, acl will be a null pointer. Check 3342 * that before dereferencing it. 3343 */ 3344 acl = seg->s_acl; 3345 if (acl != (rsmapi_access_entry_t *)NULL) { 3346 if (acl[0].ae_node == my_nodeid && acl[0].ae_permission == 0) 3347 goto bypass; 3348 } 3349 3350 /* The RSMPI unpublish/destroy has been done if seg is QUIESCED */ 3351 if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) 3352 goto bypass; 3353 3354 adapter = seg->s_adapter; 3355 for (;;) { 3356 if (seg->s_state != RSM_STATE_EXPORT) { 3357 rsmseglock_release(seg); 3358 DBG_PRINTF((category, RSM_ERR, 3359 "rsm_unpublish done: bad state %x\n", 3360 seg->s_state)); 3361 return (RSMERR_SEG_NOT_PUBLISHED); 3362 } 3363 3364 /* unpublish from adapter */ 3365 e = adapter->rsmpi_ops->rsm_unpublish(seg->s_handle.out); 3366 3367 if (e == RSM_SUCCESS) { 3368 break; 3369 } 3370 3371 if (e == RSMERR_SEG_IN_USE && mode == 1) { 3372 /* 3373 * wait for unpublish to succeed, it's busy. 3374 */ 3375 seg->s_flags |= RSM_EXPORT_WAIT; 3376 3377 /* wait for a max of 1 ms - this is an empirical */ 3378 /* value that was found by some minimal testing */ 3379 /* can be fine tuned when we have better numbers */ 3380 /* A long term fix would be to send cv_signal */ 3381 /* from the intr callback routine */ 3382 (void) drv_getparm(LBOLT, &ticks); 3383 ticks += drv_usectohz(1000); 3384 /* currently nobody signals this wait */ 3385 (void) cv_timedwait(&seg->s_cv, &seg->s_lock, ticks); 3386 3387 DBG_PRINTF((category, RSM_ERR, 3388 "rsm_unpublish: SEG_IN_USE\n")); 3389 3390 seg->s_flags &= ~RSM_EXPORT_WAIT; 3391 } else { 3392 if (mode == 1) { 3393 DBG_PRINTF((category, RSM_ERR, 3394 "rsm:rsmpi unpublish err %x\n", e)); 3395 seg->s_state = RSM_STATE_BIND; 3396 } 3397 rsmseglock_release(seg); 3398 return (e); 3399 } 3400 } 3401 3402 /* Free segment */ 3403 e = adapter->rsmpi_ops->rsm_seg_destroy(seg->s_handle.out); 3404 3405 if (e != RSM_SUCCESS) { 3406 DBG_PRINTF((category, RSM_ERR, 3407 "rsm_unpublish: rsmpi destroy key=%x failed %x\n", 3408 seg->s_key, e)); 3409 } 3410 3411 bypass: 3412 acl = seg->s_acl; 3413 rsmpi_acl = seg->s_acl_in; 3414 acl_len = seg->s_acl_len; 3415 3416 seg->s_acl = NULL; 3417 seg->s_acl_in = NULL; 3418 seg->s_acl_len = 0; 3419 3420 if (seg->s_state == RSM_STATE_EXPORT) { 3421 seg->s_state = RSM_STATE_BIND; 3422 } else if (seg->s_state == RSM_STATE_EXPORT_QUIESCED) { 3423 seg->s_state = RSM_STATE_BIND_QUIESCED; 3424 cv_broadcast(&seg->s_cv); 3425 } 3426 3427 rsmseglock_release(seg); 3428 3429 rsmacl_free(acl, acl_len); 3430 rsmpiacl_free(rsmpi_acl, acl_len); 3431 3432 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unpublish done\n")); 3433 3434 return (DDI_SUCCESS); 3435 } 3436 3437 /* 3438 * Called from rsm_unpublish to force an unload and disconnection of all 3439 * importers of the unpublished segment. 3440 * 3441 * First build the list of segments requiring a force disconnect, then 3442 * send a request for each. 3443 */ 3444 static void 3445 rsm_send_importer_disconnects(rsm_memseg_id_t ex_segid, 3446 rsm_node_id_t ex_nodeid) 3447 { 3448 rsmipc_request_t request; 3449 importing_token_t *prev_token, *token, *tmp_token, *tokp; 3450 importing_token_t *force_disconnect_list = NULL; 3451 int index; 3452 3453 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3454 "rsm_send_importer_disconnects enter\n")); 3455 3456 index = rsmhash(ex_segid); 3457 3458 mutex_enter(&importer_list.lock); 3459 3460 prev_token = NULL; 3461 token = importer_list.bucket[index]; 3462 3463 while (token != NULL) { 3464 if (token->key == ex_segid) { 3465 /* 3466 * take it off the importer list and add it 3467 * to the force disconnect list. 3468 */ 3469 if (prev_token == NULL) 3470 importer_list.bucket[index] = token->next; 3471 else 3472 prev_token->next = token->next; 3473 tmp_token = token; 3474 token = token->next; 3475 if (force_disconnect_list == NULL) { 3476 force_disconnect_list = tmp_token; 3477 tmp_token->next = NULL; 3478 } else { 3479 tokp = force_disconnect_list; 3480 /* 3481 * make sure that the tmp_token's node 3482 * is not already on the force disconnect 3483 * list. 3484 */ 3485 while (tokp != NULL) { 3486 if (tokp->importing_node == 3487 tmp_token->importing_node) { 3488 break; 3489 } 3490 tokp = tokp->next; 3491 } 3492 if (tokp == NULL) { 3493 tmp_token->next = 3494 force_disconnect_list; 3495 force_disconnect_list = tmp_token; 3496 } else { 3497 kmem_free((void *)tmp_token, 3498 sizeof (*token)); 3499 } 3500 } 3501 3502 } else { 3503 prev_token = token; 3504 token = token->next; 3505 } 3506 } 3507 mutex_exit(&importer_list.lock); 3508 3509 token = force_disconnect_list; 3510 while (token != NULL) { 3511 if (token->importing_node == my_nodeid) { 3512 rsm_force_unload(ex_nodeid, ex_segid, 3513 DISCONNECT); 3514 } else { 3515 request.rsmipc_hdr.rsmipc_type = 3516 RSMIPC_MSG_DISCONNECT; 3517 request.rsmipc_key = token->key; 3518 for (;;) { 3519 if (rsmipc_send(token->importing_node, 3520 &request, 3521 RSM_NO_REPLY) == RSM_SUCCESS) { 3522 break; 3523 } else { 3524 delay(drv_usectohz(10000)); 3525 } 3526 } 3527 } 3528 tmp_token = token; 3529 token = token->next; 3530 kmem_free((void *)tmp_token, sizeof (*token)); 3531 } 3532 3533 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3534 "rsm_send_importer_disconnects done\n")); 3535 } 3536 3537 /* 3538 * This function is used as a callback for unlocking the pages locked 3539 * down by a process which then does a fork or an exec. 3540 * It marks the export segments corresponding to umem cookie given by 3541 * the *arg to be in a ZOMBIE state(by calling rsmseg_close to be 3542 * destroyed later when an rsm_close occurs). 3543 */ 3544 static void 3545 rsm_export_force_destroy(ddi_umem_cookie_t *ck) 3546 { 3547 rsmresource_blk_t *blk; 3548 rsmresource_t *p; 3549 rsmseg_t *eseg = NULL; 3550 int i, j; 3551 int found = 0; 3552 3553 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3554 "rsm_export_force_destroy enter\n")); 3555 3556 /* 3557 * Walk the resource list and locate the export segment (either 3558 * in the BIND or the EXPORT state) which corresponds to the 3559 * ddi_umem_cookie_t being freed up, and call rsmseg_close. 3560 * Change the state to ZOMBIE by calling rsmseg_close with the 3561 * force_flag argument (the second argument) set to 1. Also, 3562 * unpublish and unbind the segment, but don't free it. Free it 3563 * only on a rsm_close call for the segment. 3564 */ 3565 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 3566 3567 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 3568 blk = rsm_resource.rsmrc_root[i]; 3569 if (blk == NULL) { 3570 continue; 3571 } 3572 3573 for (j = 0; j < RSMRC_BLKSZ; j++) { 3574 p = blk->rsmrcblk_blks[j]; 3575 if ((p != NULL) && (p != RSMRC_RESERVED) && 3576 (p->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT)) { 3577 eseg = (rsmseg_t *)p; 3578 if (eseg->s_cookie != ck) 3579 continue; /* continue searching */ 3580 /* 3581 * Found the segment, set flag to indicate 3582 * force destroy processing is in progress 3583 */ 3584 rsmseglock_acquire(eseg); 3585 eseg->s_flags |= RSM_FORCE_DESTROY_WAIT; 3586 rsmseglock_release(eseg); 3587 found = 1; 3588 break; 3589 } 3590 } 3591 3592 if (found) 3593 break; 3594 } 3595 3596 rw_exit(&rsm_resource.rsmrc_lock); 3597 3598 if (found) { 3599 ASSERT(eseg != NULL); 3600 /* call rsmseg_close with force flag set to 1 */ 3601 rsmseg_close(eseg, 1); 3602 /* 3603 * force destroy processing done, clear flag and signal any 3604 * thread waiting in rsmseg_close. 3605 */ 3606 rsmseglock_acquire(eseg); 3607 eseg->s_flags &= ~RSM_FORCE_DESTROY_WAIT; 3608 cv_broadcast(&eseg->s_cv); 3609 rsmseglock_release(eseg); 3610 } 3611 3612 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 3613 "rsm_export_force_destroy done\n")); 3614 } 3615 3616 /* ******************************* Remote Calls *********************** */ 3617 static void 3618 rsm_intr_segconnect(rsm_node_id_t src, rsmipc_request_t *req) 3619 { 3620 rsmipc_reply_t reply; 3621 DBG_DEFINE(category, 3622 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3623 3624 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3625 "rsm_intr_segconnect enter\n")); 3626 3627 reply.rsmipc_status = (short)rsmsegacl_validate(req, src, &reply); 3628 3629 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 3630 reply.rsmipc_hdr.rsmipc_cookie = req->rsmipc_hdr.rsmipc_cookie; 3631 3632 (void) rsmipc_send(src, NULL, &reply); 3633 3634 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3635 "rsm_intr_segconnect done\n")); 3636 } 3637 3638 3639 /* 3640 * When an exported segment is unpublished the exporter sends an ipc 3641 * message (RSMIPC_MSG_DISCONNECT) to all importers. The recv ipc dispatcher 3642 * calls this function. The import list is scanned; segments which match the 3643 * exported segment id are unloaded and disconnected. 3644 * 3645 * Will also be called from rsm_rebind with disconnect_flag FALSE. 3646 * 3647 */ 3648 static void 3649 rsm_force_unload(rsm_node_id_t src_nodeid, 3650 rsm_memseg_id_t ex_segid, 3651 boolean_t disconnect_flag) 3652 3653 { 3654 rsmresource_t *p = NULL; 3655 rsmhash_table_t *rhash = &rsm_import_segs; 3656 uint_t index; 3657 DBG_DEFINE(category, 3658 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3659 3660 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload enter\n")); 3661 3662 index = rsmhash(ex_segid); 3663 3664 rw_enter(&rhash->rsmhash_rw, RW_READER); 3665 3666 p = rsmhash_getbkt(rhash, index); 3667 3668 for (; p; p = p->rsmrc_next) { 3669 rsmseg_t *seg = (rsmseg_t *)p; 3670 if ((seg->s_segid == ex_segid) && (seg->s_node == src_nodeid)) { 3671 /* 3672 * In order to make rsmseg_unload and rsm_force_unload 3673 * thread safe, acquire the segment lock here. 3674 * rsmseg_unload is responsible for releasing the lock. 3675 * rsmseg_unload releases the lock just before a call 3676 * to rsmipc_send or in case of an early exit which 3677 * occurs if the segment was in the state 3678 * RSM_STATE_CONNECTING or RSM_STATE_NEW. 3679 */ 3680 rsmseglock_acquire(seg); 3681 if (disconnect_flag) 3682 seg->s_flags |= RSM_FORCE_DISCONNECT; 3683 rsmseg_unload(seg); 3684 } 3685 } 3686 rw_exit(&rhash->rsmhash_rw); 3687 3688 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_force_unload done\n")); 3689 } 3690 3691 static void 3692 rsm_intr_reply(rsmipc_msghdr_t *msg) 3693 { 3694 /* 3695 * Find slot for cookie in reply. 3696 * Match sequence with sequence in cookie 3697 * If no match; return 3698 * Try to grap lock of slot, if locked return 3699 * copy data into reply slot area 3700 * signal waiter 3701 */ 3702 rsmipc_slot_t *slot; 3703 rsmipc_cookie_t *cookie; 3704 void *data = (void *) msg; 3705 size_t size = sizeof (rsmipc_reply_t); 3706 DBG_DEFINE(category, 3707 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3708 3709 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply enter\n")); 3710 3711 cookie = &msg->rsmipc_cookie; 3712 if (cookie->ic.index >= RSMIPC_SZ) { 3713 DBG_PRINTF((category, RSM_ERR, 3714 "rsm: rsm_intr_reply bad cookie %d\n", cookie->ic.index)); 3715 return; 3716 } 3717 3718 ASSERT(cookie->ic.index < RSMIPC_SZ); 3719 slot = &rsm_ipc.slots[cookie->ic.index]; 3720 mutex_enter(&slot->rsmipc_lock); 3721 if (slot->rsmipc_cookie.value == cookie->value) { 3722 /* found a match */ 3723 if (RSMIPC_GET(slot, RSMIPC_PENDING)) { 3724 bcopy(data, slot->rsmipc_data, size); 3725 RSMIPC_CLEAR(slot, RSMIPC_PENDING); 3726 cv_signal(&slot->rsmipc_cv); 3727 } 3728 } else { 3729 DBG_PRINTF((category, RSM_DEBUG, 3730 "rsm: rsm_intr_reply mismatched reply %d\n", 3731 cookie->ic.index)); 3732 } 3733 mutex_exit(&slot->rsmipc_lock); 3734 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_reply done\n")); 3735 } 3736 3737 /* 3738 * This function gets dispatched on the worker thread when we receive 3739 * the SQREADY message. This function sends the SQREADY_ACK message. 3740 */ 3741 static void 3742 rsm_sqready_ack_deferred(void *arg) 3743 { 3744 path_t *path = (path_t *)arg; 3745 DBG_DEFINE(category, 3746 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3747 3748 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3749 "rsm_sqready_ack_deferred enter\n")); 3750 3751 mutex_enter(&path->mutex); 3752 3753 /* 3754 * If path is not active no point in sending the ACK 3755 * because the whole SQREADY protocol will again start 3756 * when the path becomes active. 3757 */ 3758 if (path->state != RSMKA_PATH_ACTIVE) { 3759 /* 3760 * decrement the path refcnt incremented in rsm_proc_sqready 3761 */ 3762 PATH_RELE_NOLOCK(path); 3763 mutex_exit(&path->mutex); 3764 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3765 "rsm_sqready_ack_deferred done:!ACTIVE\n")); 3766 return; 3767 } 3768 3769 /* send an SQREADY_ACK message */ 3770 (void) rsmipc_send_controlmsg(path, RSMIPC_MSG_SQREADY_ACK); 3771 3772 /* initialize credits to the max level */ 3773 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3774 3775 /* wake up any send that is waiting for credits */ 3776 cv_broadcast(&path->sendq_token.sendq_cv); 3777 3778 /* 3779 * decrement the path refcnt since we incremented it in 3780 * rsm_proc_sqready 3781 */ 3782 PATH_RELE_NOLOCK(path); 3783 3784 mutex_exit(&path->mutex); 3785 3786 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3787 "rsm_sqready_ack_deferred done\n")); 3788 } 3789 3790 /* 3791 * Process the SQREADY message 3792 */ 3793 static void 3794 rsm_proc_sqready(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3795 rsm_intr_hand_arg_t arg) 3796 { 3797 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3798 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3799 path_t *path; 3800 DBG_DEFINE(category, 3801 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3802 3803 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready enter\n")); 3804 3805 /* look up the path - incr the path refcnt */ 3806 path = rsm_find_path(hdlr_argp->adapter_name, 3807 hdlr_argp->adapter_instance, src_hwaddr); 3808 3809 /* 3810 * No path exists or path is not active - drop the message 3811 */ 3812 if (path == NULL) { 3813 DBG_PRINTF((category, RSM_DEBUG, 3814 "rsm_proc_sqready done: msg dropped no path\n")); 3815 return; 3816 } 3817 3818 mutex_exit(&path->mutex); 3819 3820 /* drain any tasks from the previous incarnation */ 3821 taskq_wait(path->recv_taskq); 3822 3823 mutex_enter(&path->mutex); 3824 /* 3825 * If we'd sent an SQREADY message and were waiting for SQREADY_ACK 3826 * in the meanwhile we received an SQREADY message, blindly reset 3827 * the WAIT_FOR_SQACK flag because we'll just send SQREADY_ACK 3828 * and forget about the SQREADY that we sent. 3829 */ 3830 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3831 3832 if (path->state != RSMKA_PATH_ACTIVE) { 3833 /* decr refcnt and drop the mutex */ 3834 PATH_RELE_NOLOCK(path); 3835 mutex_exit(&path->mutex); 3836 DBG_PRINTF((category, RSM_DEBUG, 3837 "rsm_proc_sqready done: msg dropped path !ACTIVE\n")); 3838 return; 3839 } 3840 3841 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready:path=%lx " 3842 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3843 3844 /* 3845 * The sender's local incarnation number is our remote incarnation 3846 * number save it in the path data structure 3847 */ 3848 path->remote_incn = msg->rsmipc_local_incn; 3849 path->sendq_token.msgbuf_avail = 0; 3850 path->procmsg_cnt = 0; 3851 3852 /* 3853 * path is active - dispatch task to send SQREADY_ACK - remember 3854 * RSMPI calls can't be done in interrupt context 3855 * 3856 * We can use the recv_taskq to send because the remote endpoint 3857 * cannot start sending messages till it receives SQREADY_ACK hence 3858 * at this point there are no tasks on recv_taskq. 3859 * 3860 * The path refcnt will be decremented in rsm_sqready_ack_deferred. 3861 */ 3862 (void) taskq_dispatch(path->recv_taskq, 3863 rsm_sqready_ack_deferred, path, KM_NOSLEEP); 3864 3865 mutex_exit(&path->mutex); 3866 3867 3868 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_proc_sqready done\n")); 3869 } 3870 3871 /* 3872 * Process the SQREADY_ACK message 3873 */ 3874 static void 3875 rsm_proc_sqready_ack(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3876 rsm_intr_hand_arg_t arg) 3877 { 3878 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3879 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3880 path_t *path; 3881 DBG_DEFINE(category, 3882 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 3883 3884 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3885 "rsm_proc_sqready_ack enter\n")); 3886 3887 /* look up the path - incr the path refcnt */ 3888 path = rsm_find_path(hdlr_argp->adapter_name, 3889 hdlr_argp->adapter_instance, src_hwaddr); 3890 3891 /* 3892 * drop the message if - no path exists or path is not active 3893 * or if its not waiting for SQREADY_ACK message 3894 */ 3895 if (path == NULL) { 3896 DBG_PRINTF((category, RSM_DEBUG, 3897 "rsm_proc_sqready_ack done: msg dropped no path\n")); 3898 return; 3899 } 3900 3901 if ((path->state != RSMKA_PATH_ACTIVE) || 3902 !(path->flags & RSMKA_WAIT_FOR_SQACK)) { 3903 /* decrement the refcnt */ 3904 PATH_RELE_NOLOCK(path); 3905 mutex_exit(&path->mutex); 3906 DBG_PRINTF((category, RSM_DEBUG, 3907 "rsm_proc_sqready_ack done: msg dropped\n")); 3908 return; 3909 } 3910 3911 /* 3912 * Check if this message is in response to the last RSMIPC_MSG_SQREADY 3913 * sent, if not drop it. 3914 */ 3915 if (path->local_incn != msghdr->rsmipc_incn) { 3916 /* decrement the refcnt */ 3917 PATH_RELE_NOLOCK(path); 3918 mutex_exit(&path->mutex); 3919 DBG_PRINTF((category, RSM_DEBUG, 3920 "rsm_proc_sqready_ack done: msg old incn %lld\n", 3921 msghdr->rsmipc_incn)); 3922 return; 3923 } 3924 3925 DBG_PRINTF((category, RSM_DEBUG, "rsm_proc_sqready_ack:path=%lx " 3926 " src=%lx:%llx\n", path, msghdr->rsmipc_src, src_hwaddr)); 3927 3928 /* 3929 * clear the WAIT_FOR_SQACK flag since we have recvd the ack 3930 */ 3931 path->flags &= ~RSMKA_WAIT_FOR_SQACK; 3932 3933 /* save the remote sendq incn number */ 3934 path->remote_incn = msg->rsmipc_local_incn; 3935 3936 /* initialize credits to the max level */ 3937 path->sendq_token.msgbuf_avail = RSMIPC_MAX_MESSAGES; 3938 3939 /* wake up any send that is waiting for credits */ 3940 cv_broadcast(&path->sendq_token.sendq_cv); 3941 3942 /* decrement the refcnt */ 3943 PATH_RELE_NOLOCK(path); 3944 3945 mutex_exit(&path->mutex); 3946 3947 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 3948 "rsm_proc_sqready_ack done\n")); 3949 } 3950 3951 /* 3952 * process the RSMIPC_MSG_CREDIT message 3953 */ 3954 static void 3955 rsm_add_credits(rsmipc_controlmsg_t *msg, rsm_addr_t src_hwaddr, 3956 rsm_intr_hand_arg_t arg) 3957 { 3958 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)msg; 3959 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 3960 path_t *path; 3961 DBG_DEFINE(category, 3962 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK | RSM_FLOWCONTROL); 3963 3964 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits enter\n")); 3965 3966 /* look up the path - incr the path refcnt */ 3967 path = rsm_find_path(hdlr_argp->adapter_name, 3968 hdlr_argp->adapter_instance, src_hwaddr); 3969 3970 if (path == NULL) { 3971 DBG_PRINTF((category, RSM_DEBUG, 3972 "rsm_add_credits enter: path not found\n")); 3973 return; 3974 } 3975 3976 /* the path is not active - discard credits */ 3977 if (path->state != RSMKA_PATH_ACTIVE) { 3978 PATH_RELE_NOLOCK(path); 3979 mutex_exit(&path->mutex); 3980 DBG_PRINTF((category, RSM_DEBUG, 3981 "rsm_add_credits enter:path=%lx !ACTIVE\n", path)); 3982 return; 3983 } 3984 3985 /* 3986 * Check if these credits are for current incarnation of the path. 3987 */ 3988 if (path->local_incn != msghdr->rsmipc_incn) { 3989 /* decrement the refcnt */ 3990 PATH_RELE_NOLOCK(path); 3991 mutex_exit(&path->mutex); 3992 DBG_PRINTF((category, RSM_DEBUG, 3993 "rsm_add_credits enter: old incn %lld\n", 3994 msghdr->rsmipc_incn)); 3995 return; 3996 } 3997 3998 DBG_PRINTF((category, RSM_DEBUG, 3999 "rsm_add_credits:path=%lx new-creds=%d " 4000 "curr credits=%d src=%lx:%llx\n", path, msg->rsmipc_credits, 4001 path->sendq_token.msgbuf_avail, msghdr->rsmipc_src, 4002 src_hwaddr)); 4003 4004 4005 /* add credits to the path's sendq */ 4006 path->sendq_token.msgbuf_avail += msg->rsmipc_credits; 4007 4008 ASSERT(path->sendq_token.msgbuf_avail <= RSMIPC_MAX_MESSAGES); 4009 4010 /* wake up any send that is waiting for credits */ 4011 cv_broadcast(&path->sendq_token.sendq_cv); 4012 4013 /* decrement the refcnt */ 4014 PATH_RELE_NOLOCK(path); 4015 4016 mutex_exit(&path->mutex); 4017 4018 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_add_credits done\n")); 4019 } 4020 4021 static void 4022 rsm_intr_event(rsmipc_request_t *msg) 4023 { 4024 rsmseg_t *seg; 4025 rsmresource_t *p; 4026 rsm_node_id_t src_node; 4027 DBG_DEFINE(category, 4028 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4029 4030 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event enter\n")); 4031 4032 src_node = msg->rsmipc_hdr.rsmipc_src; 4033 4034 if ((seg = msg->rsmipc_segment_cookie) != NULL) { 4035 /* This is for an import segment */ 4036 uint_t hashval = rsmhash(msg->rsmipc_key); 4037 4038 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4039 4040 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4041 4042 for (; p; p = p->rsmrc_next) { 4043 if ((p->rsmrc_key == msg->rsmipc_key) && 4044 (p->rsmrc_node == src_node)) { 4045 seg = (rsmseg_t *)p; 4046 rsmseglock_acquire(seg); 4047 4048 atomic_add_32(&seg->s_pollevent, 1); 4049 4050 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4051 pollwakeup(&seg->s_poll, POLLRDNORM); 4052 4053 rsmseglock_release(seg); 4054 } 4055 } 4056 4057 rw_exit(&rsm_import_segs.rsmhash_rw); 4058 } else { 4059 /* This is for an export segment */ 4060 seg = rsmexport_lookup(msg->rsmipc_key); 4061 if (!seg) { 4062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4063 "rsm_intr_event done: exp seg not found\n")); 4064 return; 4065 } 4066 4067 ASSERT(rsmseglock_held(seg)); 4068 4069 atomic_add_32(&seg->s_pollevent, 1); 4070 4071 /* 4072 * We must hold the segment lock here, or else the segment 4073 * can be freed while pollwakeup is using it. This implies 4074 * that we MUST NOT grab the segment lock during rsm_chpoll, 4075 * as outlined in the chpoll(2) man page. 4076 */ 4077 if (seg->s_pollflag & RSM_SEGMENT_POLL) 4078 pollwakeup(&seg->s_poll, POLLRDNORM); 4079 4080 rsmseglock_release(seg); 4081 } 4082 4083 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_event done\n")); 4084 } 4085 4086 /* 4087 * The exporter did a republish and changed the ACL - this change is only 4088 * visible to new importers. 4089 */ 4090 static void 4091 importer_update(rsm_node_id_t src_node, rsm_memseg_id_t key, 4092 rsm_permission_t perm) 4093 { 4094 4095 rsmresource_t *p; 4096 rsmseg_t *seg; 4097 uint_t hashval = rsmhash(key); 4098 DBG_DEFINE(category, 4099 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4100 4101 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update enter\n")); 4102 4103 rw_enter(&rsm_import_segs.rsmhash_rw, RW_READER); 4104 4105 p = (rsmresource_t *)rsmhash_getbkt(&rsm_import_segs, hashval); 4106 4107 for (; p; p = p->rsmrc_next) { 4108 /* 4109 * find the importer and update the permission in the shared 4110 * data structure. Any new importers will use the new perms 4111 */ 4112 if ((p->rsmrc_key == key) && (p->rsmrc_node == src_node)) { 4113 seg = (rsmseg_t *)p; 4114 4115 rsmseglock_acquire(seg); 4116 rsmsharelock_acquire(seg); 4117 seg->s_share->rsmsi_mode = perm; 4118 rsmsharelock_release(seg); 4119 rsmseglock_release(seg); 4120 4121 break; 4122 } 4123 } 4124 4125 rw_exit(&rsm_import_segs.rsmhash_rw); 4126 4127 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_update done\n")); 4128 } 4129 4130 void 4131 rsm_suspend_complete(rsm_node_id_t src_node, int flag) 4132 { 4133 int done = 1; /* indicate all SUSPENDS have been acked */ 4134 list_element_t *elem; 4135 DBG_DEFINE(category, 4136 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4137 4138 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4139 "rsm_suspend_complete enter\n")); 4140 4141 mutex_enter(&rsm_suspend_list.list_lock); 4142 4143 if (rsm_suspend_list.list_head == NULL) { 4144 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4145 "rsm_suspend_complete done: suspend_list is empty\n")); 4146 mutex_exit(&rsm_suspend_list.list_lock); 4147 return; 4148 } 4149 4150 elem = rsm_suspend_list.list_head; 4151 while (elem != NULL) { 4152 if (elem->nodeid == src_node) { 4153 /* clear the pending flag for the node */ 4154 elem->flags &= ~RSM_SUSPEND_ACKPENDING; 4155 elem->flags |= flag; 4156 } 4157 4158 if (done && (elem->flags & RSM_SUSPEND_ACKPENDING)) 4159 done = 0; /* still some nodes have not yet ACKED */ 4160 4161 elem = elem->next; 4162 } 4163 4164 mutex_exit(&rsm_suspend_list.list_lock); 4165 4166 if (!done) { 4167 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4168 "rsm_suspend_complete done: acks pending\n")); 4169 return; 4170 } 4171 /* 4172 * Now that we are done with suspending all the remote importers 4173 * time to quiesce the local exporters 4174 */ 4175 exporter_quiesce(); 4176 4177 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4178 "rsm_suspend_complete done\n")); 4179 } 4180 4181 static void 4182 exporter_quiesce() 4183 { 4184 int i, e; 4185 rsmresource_t *current; 4186 rsmseg_t *seg; 4187 adapter_t *adapter; 4188 DBG_DEFINE(category, 4189 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4190 4191 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce enter\n")); 4192 /* 4193 * The importers send a SUSPEND_COMPLETE to the exporter node 4194 * Unpublish, unbind the export segment and 4195 * move the segments to the EXPORT_QUIESCED state 4196 */ 4197 4198 rw_enter(&rsm_export_segs.rsmhash_rw, RW_READER); 4199 4200 for (i = 0; i < rsm_hash_size; i++) { 4201 current = rsm_export_segs.bucket[i]; 4202 while (current != NULL) { 4203 seg = (rsmseg_t *)current; 4204 rsmseglock_acquire(seg); 4205 if (current->rsmrc_state == 4206 RSM_STATE_EXPORT_QUIESCING) { 4207 adapter = seg->s_adapter; 4208 /* 4209 * some local memory handles are not published 4210 * check if it was published 4211 */ 4212 if ((seg->s_acl == NULL) || 4213 (seg->s_acl[0].ae_node != my_nodeid) || 4214 (seg->s_acl[0].ae_permission != 0)) { 4215 4216 e = adapter->rsmpi_ops->rsm_unpublish( 4217 seg->s_handle.out); 4218 DBG_PRINTF((category, RSM_DEBUG, 4219 "exporter_quiesce:unpub %d\n", e)); 4220 4221 e = adapter->rsmpi_ops->rsm_seg_destroy( 4222 seg->s_handle.out); 4223 4224 DBG_PRINTF((category, RSM_DEBUG, 4225 "exporter_quiesce:destroy %d\n", 4226 e)); 4227 } 4228 4229 (void) rsm_unbind_pages(seg); 4230 seg->s_state = RSM_STATE_EXPORT_QUIESCED; 4231 cv_broadcast(&seg->s_cv); 4232 } 4233 rsmseglock_release(seg); 4234 current = current->rsmrc_next; 4235 } 4236 } 4237 rw_exit(&rsm_export_segs.rsmhash_rw); 4238 4239 /* 4240 * All the local segments we are done with the pre-del processing 4241 * - time to move to PREDEL_COMPLETED. 4242 */ 4243 4244 mutex_enter(&rsm_drv_data.drv_lock); 4245 4246 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED); 4247 4248 rsm_drv_data.drv_state = RSM_DRV_PREDEL_COMPLETED; 4249 4250 cv_broadcast(&rsm_drv_data.drv_cv); 4251 4252 mutex_exit(&rsm_drv_data.drv_lock); 4253 4254 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exporter_quiesce done\n")); 4255 } 4256 4257 static void 4258 importer_suspend(rsm_node_id_t src_node) 4259 { 4260 int i; 4261 int susp_flg; /* true means already suspended */ 4262 int num_importers; 4263 rsmresource_t *p = NULL, *curp; 4264 rsmhash_table_t *rhash = &rsm_import_segs; 4265 rsmseg_t *seg; 4266 rsmipc_request_t request; 4267 DBG_DEFINE(category, 4268 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4269 4270 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend enter\n")); 4271 4272 rw_enter(&rhash->rsmhash_rw, RW_READER); 4273 for (i = 0; i < rsm_hash_size; i++) { 4274 p = rhash->bucket[i]; 4275 4276 /* 4277 * Suspend all importers with same <node, key> pair. 4278 * After the last one of the shared importers has been 4279 * suspended - suspend the shared mappings/connection. 4280 */ 4281 for (; p; p = p->rsmrc_next) { 4282 rsmseg_t *first = (rsmseg_t *)p; 4283 if ((first->s_node != src_node) || 4284 (first->s_state == RSM_STATE_DISCONNECT)) 4285 continue; /* go to next entry */ 4286 /* 4287 * search the rest of the bucket for 4288 * other siblings (imprtrs with the same key) 4289 * of "first" and suspend them. 4290 * All importers with same key fall in 4291 * the same bucket. 4292 */ 4293 num_importers = 0; 4294 for (curp = p; curp; curp = curp->rsmrc_next) { 4295 seg = (rsmseg_t *)curp; 4296 4297 rsmseglock_acquire(seg); 4298 4299 if ((seg->s_node != first->s_node) || 4300 (seg->s_key != first->s_key) || 4301 (seg->s_state == RSM_STATE_DISCONNECT)) { 4302 /* 4303 * either not a peer segment or its a 4304 * disconnected segment - skip it 4305 */ 4306 rsmseglock_release(seg); 4307 continue; 4308 } 4309 4310 rsmseg_suspend(seg, &susp_flg); 4311 4312 if (susp_flg) { /* seg already suspended */ 4313 rsmseglock_release(seg); 4314 break; /* the inner for loop */ 4315 } 4316 4317 num_importers++; 4318 rsmsharelock_acquire(seg); 4319 /* 4320 * we've processed all importers that are 4321 * siblings of "first" 4322 */ 4323 if (num_importers == 4324 seg->s_share->rsmsi_refcnt) { 4325 rsmsharelock_release(seg); 4326 rsmseglock_release(seg); 4327 break; 4328 } 4329 rsmsharelock_release(seg); 4330 rsmseglock_release(seg); 4331 } 4332 4333 /* 4334 * All the importers with the same key and 4335 * nodeid as "first" have been suspended. 4336 * Now suspend the shared connect/mapping. 4337 * This is done only once. 4338 */ 4339 if (!susp_flg) { 4340 rsmsegshare_suspend(seg); 4341 } 4342 } 4343 } 4344 4345 rw_exit(&rhash->rsmhash_rw); 4346 4347 /* send an ACK for SUSPEND message */ 4348 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND_DONE; 4349 (void) rsmipc_send(src_node, &request, RSM_NO_REPLY); 4350 4351 4352 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_suspend done\n")); 4353 4354 } 4355 4356 static void 4357 rsmseg_suspend(rsmseg_t *seg, int *susp_flg) 4358 { 4359 int recheck_state; 4360 rsmcookie_t *hdl; 4361 DBG_DEFINE(category, 4362 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4363 4364 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4365 "rsmseg_suspend enter: key=%u\n", seg->s_key)); 4366 4367 *susp_flg = 0; 4368 4369 ASSERT(rsmseglock_held(seg)); 4370 /* wait if putv/getv is in progress */ 4371 while (seg->s_rdmacnt > 0) 4372 cv_wait(&seg->s_cv, &seg->s_lock); 4373 4374 do { 4375 recheck_state = 0; 4376 4377 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4378 "rsmseg_suspend:segment %x state=%d\n", 4379 seg->s_key, seg->s_state)); 4380 4381 switch (seg->s_state) { 4382 case RSM_STATE_NEW: 4383 /* not a valid state */ 4384 break; 4385 case RSM_STATE_CONNECTING: 4386 seg->s_state = RSM_STATE_ABORT_CONNECT; 4387 break; 4388 case RSM_STATE_ABORT_CONNECT: 4389 break; 4390 case RSM_STATE_CONNECT: 4391 seg->s_handle.in = NULL; 4392 seg->s_state = RSM_STATE_CONN_QUIESCE; 4393 break; 4394 case RSM_STATE_MAPPING: 4395 /* wait until segment leaves the mapping state */ 4396 while (seg->s_state == RSM_STATE_MAPPING) 4397 cv_wait(&seg->s_cv, &seg->s_lock); 4398 recheck_state = 1; 4399 break; 4400 case RSM_STATE_ACTIVE: 4401 /* unload the mappings */ 4402 if (seg->s_ckl != NULL) { 4403 hdl = seg->s_ckl; 4404 for (; hdl != NULL; hdl = hdl->c_next) { 4405 (void) devmap_unload(hdl->c_dhp, 4406 hdl->c_off, hdl->c_len); 4407 } 4408 } 4409 seg->s_mapinfo = NULL; 4410 seg->s_state = RSM_STATE_MAP_QUIESCE; 4411 break; 4412 case RSM_STATE_CONN_QUIESCE: 4413 /* FALLTHRU */ 4414 case RSM_STATE_MAP_QUIESCE: 4415 /* rsmseg_suspend already done for seg */ 4416 *susp_flg = 1; 4417 break; 4418 case RSM_STATE_DISCONNECT: 4419 break; 4420 default: 4421 ASSERT(0); /* invalid state */ 4422 } 4423 } while (recheck_state); 4424 4425 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_suspend done\n")); 4426 } 4427 4428 static void 4429 rsmsegshare_suspend(rsmseg_t *seg) 4430 { 4431 int e; 4432 adapter_t *adapter; 4433 rsm_import_share_t *sharedp; 4434 DBG_DEFINE(category, 4435 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4436 4437 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4438 "rsmsegshare_suspend enter\n")); 4439 4440 rsmseglock_acquire(seg); 4441 rsmsharelock_acquire(seg); 4442 4443 sharedp = seg->s_share; 4444 adapter = seg->s_adapter; 4445 switch (sharedp->rsmsi_state) { 4446 case RSMSI_STATE_NEW: 4447 break; 4448 case RSMSI_STATE_CONNECTING: 4449 sharedp->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 4450 break; 4451 case RSMSI_STATE_ABORT_CONNECT: 4452 break; 4453 case RSMSI_STATE_CONNECTED: 4454 /* do the rsmpi disconnect */ 4455 if (sharedp->rsmsi_node != my_nodeid) { 4456 e = adapter->rsmpi_ops-> 4457 rsm_disconnect(sharedp->rsmsi_handle); 4458 4459 DBG_PRINTF((category, RSM_DEBUG, 4460 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4461 sharedp->rsmsi_segid, e)); 4462 } 4463 4464 sharedp->rsmsi_handle = NULL; 4465 4466 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 4467 break; 4468 case RSMSI_STATE_CONN_QUIESCE: 4469 break; 4470 case RSMSI_STATE_MAPPED: 4471 /* do the rsmpi unmap and disconnect */ 4472 if (sharedp->rsmsi_node != my_nodeid) { 4473 e = adapter->rsmpi_ops->rsm_unmap(seg->s_handle.in); 4474 4475 DBG_PRINTF((category, RSM_DEBUG, 4476 "rsmshare_suspend: rsmpi unmap %d\n", e)); 4477 4478 e = adapter->rsmpi_ops-> 4479 rsm_disconnect(sharedp->rsmsi_handle); 4480 DBG_PRINTF((category, RSM_DEBUG, 4481 "rsm:rsmpi disconnect seg=%x:err=%d\n", 4482 sharedp->rsmsi_segid, e)); 4483 } 4484 4485 sharedp->rsmsi_handle = NULL; 4486 4487 sharedp->rsmsi_state = RSMSI_STATE_MAP_QUIESCE; 4488 break; 4489 case RSMSI_STATE_MAP_QUIESCE: 4490 break; 4491 case RSMSI_STATE_DISCONNECTED: 4492 break; 4493 default: 4494 ASSERT(0); /* invalid state */ 4495 } 4496 4497 rsmsharelock_release(seg); 4498 rsmseglock_release(seg); 4499 4500 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4501 "rsmsegshare_suspend done\n")); 4502 } 4503 4504 /* 4505 * This should get called on receiving a RESUME message or from 4506 * the pathmanger if the node undergoing DR dies. 4507 */ 4508 static void 4509 importer_resume(rsm_node_id_t src_node) 4510 { 4511 int i; 4512 rsmresource_t *p = NULL; 4513 rsmhash_table_t *rhash = &rsm_import_segs; 4514 void *cookie; 4515 DBG_DEFINE(category, 4516 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4517 4518 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume enter\n")); 4519 4520 rw_enter(&rhash->rsmhash_rw, RW_READER); 4521 4522 for (i = 0; i < rsm_hash_size; i++) { 4523 p = rhash->bucket[i]; 4524 4525 for (; p; p = p->rsmrc_next) { 4526 rsmseg_t *seg = (rsmseg_t *)p; 4527 4528 rsmseglock_acquire(seg); 4529 4530 /* process only importers of node undergoing DR */ 4531 if (seg->s_node != src_node) { 4532 rsmseglock_release(seg); 4533 continue; 4534 } 4535 4536 if (rsmseg_resume(seg, &cookie) != RSM_SUCCESS) { 4537 rsmipc_request_t request; 4538 /* 4539 * rsmpi map/connect failed 4540 * inform the exporter so that it can 4541 * remove the importer. 4542 */ 4543 request.rsmipc_hdr.rsmipc_type = 4544 RSMIPC_MSG_NOTIMPORTING; 4545 request.rsmipc_key = seg->s_segid; 4546 request.rsmipc_segment_cookie = cookie; 4547 rsmseglock_release(seg); 4548 (void) rsmipc_send(seg->s_node, &request, 4549 RSM_NO_REPLY); 4550 } else { 4551 rsmseglock_release(seg); 4552 } 4553 } 4554 } 4555 4556 rw_exit(&rhash->rsmhash_rw); 4557 4558 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importer_resume done\n")); 4559 } 4560 4561 static int 4562 rsmseg_resume(rsmseg_t *seg, void **cookie) 4563 { 4564 int e; 4565 int retc; 4566 off_t dev_offset; 4567 size_t maplen; 4568 uint_t maxprot; 4569 rsm_mapinfo_t *p; 4570 rsmcookie_t *hdl; 4571 rsm_import_share_t *sharedp; 4572 DBG_DEFINE(category, 4573 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4574 4575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4576 "rsmseg_resume enter: key=%u\n", seg->s_key)); 4577 4578 *cookie = NULL; 4579 4580 ASSERT(rsmseglock_held(seg)); 4581 4582 if ((seg->s_state != RSM_STATE_CONN_QUIESCE) && 4583 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 4584 return (RSM_SUCCESS); 4585 } 4586 4587 sharedp = seg->s_share; 4588 4589 rsmsharelock_acquire(seg); 4590 4591 /* resume the shared connection and/or mapping */ 4592 retc = rsmsegshare_resume(seg); 4593 4594 if (seg->s_state == RSM_STATE_CONN_QUIESCE) { 4595 /* shared state can either be connected or mapped */ 4596 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) || 4597 (sharedp->rsmsi_state == RSMSI_STATE_MAPPED)) { 4598 ASSERT(retc == RSM_SUCCESS); 4599 seg->s_handle.in = sharedp->rsmsi_handle; 4600 rsmsharelock_release(seg); 4601 seg->s_state = RSM_STATE_CONNECT; 4602 4603 } else { /* error in rsmpi connect during resume */ 4604 seg->s_handle.in = NULL; 4605 seg->s_state = RSM_STATE_DISCONNECT; 4606 4607 sharedp->rsmsi_refcnt--; 4608 cookie = (void *)sharedp->rsmsi_cookie; 4609 4610 if (sharedp->rsmsi_refcnt == 0) { 4611 ASSERT(sharedp->rsmsi_mapcnt == 0); 4612 rsmsharelock_release(seg); 4613 4614 /* clean up the shared data structure */ 4615 mutex_destroy(&sharedp->rsmsi_lock); 4616 cv_destroy(&sharedp->rsmsi_cv); 4617 kmem_free((void *)(sharedp), 4618 sizeof (rsm_import_share_t)); 4619 4620 } else { 4621 rsmsharelock_release(seg); 4622 } 4623 /* 4624 * The following needs to be done after any 4625 * rsmsharelock calls which use seg->s_share. 4626 */ 4627 seg->s_share = NULL; 4628 } 4629 4630 /* signal any waiting segment */ 4631 cv_broadcast(&seg->s_cv); 4632 4633 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4634 "rsmseg_resume done:state=%d\n", seg->s_state)); 4635 return (retc); 4636 } 4637 4638 ASSERT(seg->s_state == RSM_STATE_MAP_QUIESCE); 4639 4640 /* Setup protections for remap */ 4641 maxprot = PROT_USER; 4642 if (seg->s_mode & RSM_PERM_READ) { 4643 maxprot |= PROT_READ; 4644 } 4645 if (seg->s_mode & RSM_PERM_WRITE) { 4646 maxprot |= PROT_WRITE; 4647 } 4648 4649 if (sharedp->rsmsi_state != RSMSI_STATE_MAPPED) { 4650 /* error in rsmpi connect or map during resume */ 4651 4652 /* remap to trash page */ 4653 ASSERT(seg->s_ckl != NULL); 4654 4655 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4656 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 4657 remap_cookie, hdl->c_off, hdl->c_len, 4658 maxprot, 0, NULL); 4659 4660 DBG_PRINTF((category, RSM_ERR, 4661 "rsmseg_resume:remap=%d\n", e)); 4662 } 4663 4664 seg->s_handle.in = NULL; 4665 seg->s_state = RSM_STATE_DISCONNECT; 4666 4667 sharedp->rsmsi_refcnt--; 4668 4669 sharedp->rsmsi_mapcnt--; 4670 seg->s_mapinfo = NULL; 4671 4672 if (sharedp->rsmsi_refcnt == 0) { 4673 ASSERT(sharedp->rsmsi_mapcnt == 0); 4674 rsmsharelock_release(seg); 4675 4676 /* clean up the shared data structure */ 4677 mutex_destroy(&sharedp->rsmsi_lock); 4678 cv_destroy(&sharedp->rsmsi_cv); 4679 kmem_free((void *)(sharedp), 4680 sizeof (rsm_import_share_t)); 4681 4682 } else { 4683 rsmsharelock_release(seg); 4684 } 4685 /* 4686 * The following needs to be done after any 4687 * rsmsharelock calls which use seg->s_share. 4688 */ 4689 seg->s_share = NULL; 4690 4691 /* signal any waiting segment */ 4692 cv_broadcast(&seg->s_cv); 4693 4694 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4695 "rsmseg_resume done:seg=%x,err=%d\n", 4696 seg->s_key, retc)); 4697 return (retc); 4698 4699 } 4700 4701 seg->s_handle.in = sharedp->rsmsi_handle; 4702 4703 if (seg->s_node == my_nodeid) { /* loopback */ 4704 ASSERT(seg->s_mapinfo == NULL); 4705 4706 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4707 e = devmap_umem_remap(hdl->c_dhp, 4708 rsm_dip, seg->s_cookie, 4709 hdl->c_off, hdl->c_len, 4710 maxprot, 0, NULL); 4711 4712 DBG_PRINTF((category, RSM_ERR, 4713 "rsmseg_resume:remap=%d\n", e)); 4714 } 4715 } else { /* remote exporter */ 4716 /* remap to the new rsmpi maps */ 4717 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 4718 4719 for (hdl = seg->s_ckl; hdl != NULL; hdl = hdl->c_next) { 4720 p = rsm_get_mapinfo(seg, hdl->c_off, hdl->c_len, 4721 &dev_offset, &maplen); 4722 e = devmap_devmem_remap(hdl->c_dhp, 4723 p->dip, p->dev_register, dev_offset, 4724 maplen, maxprot, 0, NULL); 4725 4726 DBG_PRINTF((category, RSM_ERR, 4727 "rsmseg_resume:remap=%d\n", e)); 4728 } 4729 } 4730 4731 rsmsharelock_release(seg); 4732 4733 seg->s_state = RSM_STATE_ACTIVE; 4734 cv_broadcast(&seg->s_cv); 4735 4736 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_resume done\n")); 4737 4738 return (retc); 4739 } 4740 4741 static int 4742 rsmsegshare_resume(rsmseg_t *seg) 4743 { 4744 int e = RSM_SUCCESS; 4745 adapter_t *adapter; 4746 rsm_import_share_t *sharedp; 4747 DBG_DEFINE(category, 4748 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4749 4750 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume enter\n")); 4751 4752 ASSERT(rsmseglock_held(seg)); 4753 ASSERT(rsmsharelock_held(seg)); 4754 4755 sharedp = seg->s_share; 4756 4757 /* 4758 * If we are not in a xxxx_QUIESCE state that means shared 4759 * connect/mapping processing has been already been done 4760 * so return success. 4761 */ 4762 if ((sharedp->rsmsi_state != RSMSI_STATE_CONN_QUIESCE) && 4763 (sharedp->rsmsi_state != RSMSI_STATE_MAP_QUIESCE)) { 4764 return (RSM_SUCCESS); 4765 } 4766 4767 adapter = seg->s_adapter; 4768 4769 if (sharedp->rsmsi_node != my_nodeid) { 4770 rsm_addr_t hwaddr; 4771 hwaddr = get_remote_hwaddr(adapter, sharedp->rsmsi_node); 4772 4773 e = adapter->rsmpi_ops->rsm_connect( 4774 adapter->rsmpi_handle, hwaddr, 4775 sharedp->rsmsi_segid, &sharedp->rsmsi_handle); 4776 4777 DBG_PRINTF((category, RSM_DEBUG, 4778 "rsmsegshare_resume:rsmpi connect seg=%x:err=%d\n", 4779 sharedp->rsmsi_segid, e)); 4780 4781 if (e != RSM_SUCCESS) { 4782 /* when do we send the NOT_IMPORTING message */ 4783 sharedp->rsmsi_handle = NULL; 4784 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4785 /* signal any waiting segment */ 4786 cv_broadcast(&sharedp->rsmsi_cv); 4787 return (e); 4788 } 4789 } 4790 4791 if (sharedp->rsmsi_state == RSMSI_STATE_CONN_QUIESCE) { 4792 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 4793 /* signal any waiting segment */ 4794 cv_broadcast(&sharedp->rsmsi_cv); 4795 return (e); 4796 } 4797 4798 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 4799 4800 /* do the rsmpi map of the whole segment here */ 4801 if (sharedp->rsmsi_node != my_nodeid) { 4802 size_t mapped_len; 4803 rsm_mapinfo_t *p; 4804 4805 /* 4806 * We need to do rsmpi maps with <off, lens> identical to 4807 * the old mapinfo list because the segment mapping handles 4808 * dhp and such need the fragmentation of rsmpi maps to be 4809 * identical to what it was during the mmap of the segment 4810 */ 4811 p = sharedp->rsmsi_mapinfo; 4812 4813 while (p != NULL) { 4814 mapped_len = 0; 4815 4816 e = adapter->rsmpi_ops->rsm_map( 4817 sharedp->rsmsi_handle, p->start_offset, 4818 p->individual_len, &mapped_len, 4819 &p->dip, &p->dev_register, &p->dev_offset, 4820 NULL, NULL); 4821 4822 if (e != 0) { 4823 DBG_PRINTF((category, RSM_ERR, 4824 "rsmsegshare_resume: rsmpi map err=%d\n", 4825 e)); 4826 break; 4827 } 4828 4829 if (mapped_len != p->individual_len) { 4830 DBG_PRINTF((category, RSM_ERR, 4831 "rsmsegshare_resume: rsmpi maplen" 4832 "< reqlen=%lx\n", mapped_len)); 4833 e = RSMERR_BAD_LENGTH; 4834 break; 4835 } 4836 4837 p = p->next; 4838 4839 } 4840 4841 4842 if (e != RSM_SUCCESS) { /* rsmpi map failed */ 4843 int err; 4844 /* Check if this is the first rsm_map */ 4845 if (p != sharedp->rsmsi_mapinfo) { 4846 /* 4847 * A single rsm_unmap undoes multiple rsm_maps. 4848 */ 4849 (void) seg->s_adapter->rsmpi_ops-> 4850 rsm_unmap(sharedp->rsmsi_handle); 4851 } 4852 4853 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 4854 sharedp->rsmsi_mapinfo = NULL; 4855 4856 err = adapter->rsmpi_ops-> 4857 rsm_disconnect(sharedp->rsmsi_handle); 4858 4859 DBG_PRINTF((category, RSM_DEBUG, 4860 "rsmsegshare_resume:disconn seg=%x:err=%d\n", 4861 sharedp->rsmsi_segid, err)); 4862 4863 sharedp->rsmsi_handle = NULL; 4864 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 4865 4866 /* signal the waiting segments */ 4867 cv_broadcast(&sharedp->rsmsi_cv); 4868 DBG_PRINTF((category, RSM_DEBUG, 4869 "rsmsegshare_resume done: rsmpi map err\n")); 4870 return (e); 4871 } 4872 } 4873 4874 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 4875 4876 /* signal any waiting segment */ 4877 cv_broadcast(&sharedp->rsmsi_cv); 4878 4879 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmsegshare_resume done\n")); 4880 4881 return (e); 4882 } 4883 4884 /* 4885 * this is the routine that gets called by recv_taskq which is the 4886 * thread that processes messages that are flow-controlled. 4887 */ 4888 static void 4889 rsm_intr_proc_deferred(void *arg) 4890 { 4891 path_t *path = (path_t *)arg; 4892 rsmipc_request_t *msg; 4893 rsmipc_msghdr_t *msghdr; 4894 rsm_node_id_t src_node; 4895 msgbuf_elem_t *head; 4896 int e; 4897 DBG_DEFINE(category, 4898 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4899 4900 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4901 "rsm_intr_proc_deferred enter\n")); 4902 4903 mutex_enter(&path->mutex); 4904 4905 /* use the head of the msgbuf_queue */ 4906 head = rsmka_gethead_msgbuf(path); 4907 4908 mutex_exit(&path->mutex); 4909 4910 msg = (rsmipc_request_t *)&(head->msg); 4911 msghdr = (rsmipc_msghdr_t *)msg; 4912 4913 src_node = msghdr->rsmipc_src; 4914 4915 /* 4916 * messages that need to send a reply should check the message version 4917 * before processing the message. And all messages that need to 4918 * send a reply should be processed here by the worker thread. 4919 */ 4920 switch (msghdr->rsmipc_type) { 4921 case RSMIPC_MSG_SEGCONNECT: 4922 if (msghdr->rsmipc_version != RSM_VERSION) { 4923 rsmipc_reply_t reply; 4924 reply.rsmipc_status = RSMERR_BAD_DRIVER_VERSION; 4925 reply.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPLY; 4926 reply.rsmipc_hdr.rsmipc_cookie = msghdr->rsmipc_cookie; 4927 (void) rsmipc_send(msghdr->rsmipc_src, NULL, &reply); 4928 } else { 4929 rsm_intr_segconnect(src_node, msg); 4930 } 4931 break; 4932 case RSMIPC_MSG_DISCONNECT: 4933 rsm_force_unload(src_node, msg->rsmipc_key, DISCONNECT); 4934 break; 4935 case RSMIPC_MSG_SUSPEND: 4936 importer_suspend(src_node); 4937 break; 4938 case RSMIPC_MSG_SUSPEND_DONE: 4939 rsm_suspend_complete(src_node, 0); 4940 break; 4941 case RSMIPC_MSG_RESUME: 4942 importer_resume(src_node); 4943 break; 4944 default: 4945 ASSERT(0); 4946 } 4947 4948 mutex_enter(&path->mutex); 4949 4950 rsmka_dequeue_msgbuf(path); 4951 4952 /* incr procmsg_cnt can be at most RSMIPC_MAX_MESSAGES */ 4953 if (path->procmsg_cnt < RSMIPC_MAX_MESSAGES) 4954 path->procmsg_cnt++; 4955 4956 ASSERT(path->procmsg_cnt <= RSMIPC_MAX_MESSAGES); 4957 4958 /* No need to send credits if path is going down */ 4959 if ((path->state == RSMKA_PATH_ACTIVE) && 4960 (path->procmsg_cnt >= RSMIPC_LOTSFREE_MSGBUFS)) { 4961 /* 4962 * send credits and reset procmsg_cnt if success otherwise 4963 * credits will be sent after processing the next message 4964 */ 4965 e = rsmipc_send_controlmsg(path, RSMIPC_MSG_CREDIT); 4966 if (e == 0) 4967 path->procmsg_cnt = 0; 4968 else 4969 DBG_PRINTF((category, RSM_ERR, 4970 "rsm_intr_proc_deferred:send credits err=%d\n", e)); 4971 } 4972 4973 /* 4974 * decrement the path refcnt since we incremented it in 4975 * rsm_intr_callback_dispatch 4976 */ 4977 PATH_RELE_NOLOCK(path); 4978 4979 mutex_exit(&path->mutex); 4980 4981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4982 "rsm_intr_proc_deferred done\n")); 4983 } 4984 4985 /* 4986 * Flow-controlled messages are enqueued and dispatched onto a taskq here 4987 */ 4988 static void 4989 rsm_intr_callback_dispatch(void *data, rsm_addr_t src_hwaddr, 4990 rsm_intr_hand_arg_t arg) 4991 { 4992 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 4993 path_t *path; 4994 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 4995 DBG_DEFINE(category, 4996 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 4997 4998 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 4999 "rsm_intr_callback_dispatch enter\n")); 5000 ASSERT(data && hdlr_argp); 5001 5002 /* look up the path - incr the path refcnt */ 5003 path = rsm_find_path(hdlr_argp->adapter_name, 5004 hdlr_argp->adapter_instance, src_hwaddr); 5005 5006 /* the path has been removed - drop this message */ 5007 if (path == NULL) { 5008 DBG_PRINTF((category, RSM_DEBUG, 5009 "rsm_intr_callback_dispatch done: msg dropped\n")); 5010 return; 5011 } 5012 /* the path is not active - don't accept new messages */ 5013 if (path->state != RSMKA_PATH_ACTIVE) { 5014 PATH_RELE_NOLOCK(path); 5015 mutex_exit(&path->mutex); 5016 DBG_PRINTF((category, RSM_DEBUG, 5017 "rsm_intr_callback_dispatch done: msg dropped" 5018 " path=%lx !ACTIVE\n", path)); 5019 return; 5020 } 5021 5022 /* 5023 * Check if this message was sent to an older incarnation 5024 * of the path/sendq. 5025 */ 5026 if (path->local_incn != msghdr->rsmipc_incn) { 5027 /* decrement the refcnt */ 5028 PATH_RELE_NOLOCK(path); 5029 mutex_exit(&path->mutex); 5030 DBG_PRINTF((category, RSM_DEBUG, 5031 "rsm_intr_callback_dispatch done: old incn %lld\n", 5032 msghdr->rsmipc_incn)); 5033 return; 5034 } 5035 5036 /* copy and enqueue msg on the path's msgbuf queue */ 5037 rsmka_enqueue_msgbuf(path, data); 5038 5039 /* 5040 * schedule task to process messages - ignore retval from 5041 * task_dispatch because we sender cannot send more than 5042 * what receiver can handle. 5043 */ 5044 (void) taskq_dispatch(path->recv_taskq, 5045 rsm_intr_proc_deferred, path, KM_NOSLEEP); 5046 5047 mutex_exit(&path->mutex); 5048 5049 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5050 "rsm_intr_callback_dispatch done\n")); 5051 } 5052 5053 /* 5054 * This procedure is called from rsm_srv_func when a remote node creates a 5055 * a send queue. This event is used as a hint that an earlier failed 5056 * attempt to create a send queue to that remote node may now succeed and 5057 * should be retried. Indication of an earlier failed attempt is provided 5058 * by the RSMKA_SQCREATE_PENDING flag. 5059 */ 5060 static void 5061 rsm_sqcreateop_callback(rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5062 { 5063 srv_handler_arg_t *hdlr_argp = (srv_handler_arg_t *)arg; 5064 path_t *path; 5065 DBG_DEFINE(category, 5066 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5067 5068 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5069 "rsm_sqcreateop_callback enter\n")); 5070 5071 /* look up the path - incr the path refcnt */ 5072 path = rsm_find_path(hdlr_argp->adapter_name, 5073 hdlr_argp->adapter_instance, src_hwaddr); 5074 5075 if (path == NULL) { 5076 DBG_PRINTF((category, RSM_DEBUG, 5077 "rsm_sqcreateop_callback done: no path\n")); 5078 return; 5079 } 5080 5081 if ((path->state == RSMKA_PATH_UP) && 5082 (path->flags & RSMKA_SQCREATE_PENDING)) { 5083 /* 5084 * previous attempt to create sendq had failed, retry 5085 * it and move to RSMKA_PATH_ACTIVE state if successful. 5086 * the refcnt will be decremented in the do_deferred_work 5087 */ 5088 (void) rsmka_do_path_active(path, RSMKA_NO_SLEEP); 5089 } else { 5090 /* decrement the refcnt */ 5091 PATH_RELE_NOLOCK(path); 5092 } 5093 mutex_exit(&path->mutex); 5094 5095 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5096 "rsm_sqcreateop_callback done\n")); 5097 } 5098 5099 static void 5100 rsm_intr_callback(void *data, rsm_addr_t src_hwaddr, rsm_intr_hand_arg_t arg) 5101 { 5102 rsmipc_msghdr_t *msghdr = (rsmipc_msghdr_t *)data; 5103 rsmipc_request_t *msg = (rsmipc_request_t *)data; 5104 rsmipc_controlmsg_t *ctrlmsg = (rsmipc_controlmsg_t *)data; 5105 rsm_node_id_t src_node; 5106 DBG_DEFINE(category, 5107 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5108 5109 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback enter:" 5110 "src=%d, type=%d\n", msghdr->rsmipc_src, 5111 msghdr->rsmipc_type)); 5112 5113 /* 5114 * Check for the version number in the msg header. If it is not 5115 * RSM_VERSION, drop the message. In the future, we need to manage 5116 * incompatible version numbers in some way 5117 */ 5118 if (msghdr->rsmipc_version != RSM_VERSION) { 5119 DBG_PRINTF((category, RSM_ERR, "wrong KA version\n")); 5120 /* 5121 * Drop requests that don't have a reply right here 5122 * Request with reply will send a BAD_VERSION reply 5123 * when they get processed by the worker thread. 5124 */ 5125 if (msghdr->rsmipc_type != RSMIPC_MSG_SEGCONNECT) { 5126 return; 5127 } 5128 5129 } 5130 5131 src_node = msghdr->rsmipc_src; 5132 5133 switch (msghdr->rsmipc_type) { 5134 case RSMIPC_MSG_SEGCONNECT: 5135 case RSMIPC_MSG_DISCONNECT: 5136 case RSMIPC_MSG_SUSPEND: 5137 case RSMIPC_MSG_SUSPEND_DONE: 5138 case RSMIPC_MSG_RESUME: 5139 /* 5140 * These message types are handled by a worker thread using 5141 * the flow-control algorithm. 5142 * Any message processing that does one or more of the 5143 * following should be handled in a worker thread. 5144 * - allocates resources and might sleep 5145 * - makes RSMPI calls down to the interconnect driver 5146 * this by defn include requests with reply. 5147 * - takes a long duration of time 5148 */ 5149 rsm_intr_callback_dispatch(data, src_hwaddr, arg); 5150 break; 5151 case RSMIPC_MSG_NOTIMPORTING: 5152 importer_list_rm(src_node, msg->rsmipc_key, 5153 msg->rsmipc_segment_cookie); 5154 break; 5155 case RSMIPC_MSG_SQREADY: 5156 rsm_proc_sqready(data, src_hwaddr, arg); 5157 break; 5158 case RSMIPC_MSG_SQREADY_ACK: 5159 rsm_proc_sqready_ack(data, src_hwaddr, arg); 5160 break; 5161 case RSMIPC_MSG_CREDIT: 5162 rsm_add_credits(ctrlmsg, src_hwaddr, arg); 5163 break; 5164 case RSMIPC_MSG_REPLY: 5165 rsm_intr_reply(msghdr); 5166 break; 5167 case RSMIPC_MSG_BELL: 5168 rsm_intr_event(msg); 5169 break; 5170 case RSMIPC_MSG_IMPORTING: 5171 importer_list_add(src_node, msg->rsmipc_key, 5172 msg->rsmipc_adapter_hwaddr, 5173 msg->rsmipc_segment_cookie); 5174 break; 5175 case RSMIPC_MSG_REPUBLISH: 5176 importer_update(src_node, msg->rsmipc_key, msg->rsmipc_perm); 5177 break; 5178 default: 5179 DBG_PRINTF((category, RSM_DEBUG, 5180 "rsm_intr_callback: bad msg %lx type %d data %lx\n", 5181 (size_t)msg, (int)(msghdr->rsmipc_type), (size_t)data)); 5182 } 5183 5184 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_intr_callback done\n")); 5185 5186 } 5187 5188 rsm_intr_hand_ret_t rsm_srv_func(rsm_controller_object_t *chd, 5189 rsm_intr_q_op_t opcode, rsm_addr_t src, 5190 void *data, size_t size, rsm_intr_hand_arg_t arg) 5191 { 5192 DBG_DEFINE(category, 5193 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5194 5195 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func enter\n")); 5196 5197 switch (opcode) { 5198 case RSM_INTR_Q_OP_CREATE: 5199 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_CREATE\n")); 5200 rsm_sqcreateop_callback(src, arg); 5201 break; 5202 case RSM_INTR_Q_OP_DESTROY: 5203 DBG_PRINTF((category, RSM_DEBUG, "rsm_srv_func:OP_DESTROY\n")); 5204 break; 5205 case RSM_INTR_Q_OP_RECEIVE: 5206 rsm_intr_callback(data, src, arg); 5207 break; 5208 default: 5209 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5210 "rsm_srv_func: unknown opcode = %x\n", opcode)); 5211 } 5212 5213 chd = chd; 5214 size = size; 5215 5216 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_srv_func done\n")); 5217 5218 return (RSM_INTR_HAND_CLAIMED); 5219 } 5220 5221 /* *************************** IPC slots ************************* */ 5222 static rsmipc_slot_t * 5223 rsmipc_alloc() 5224 { 5225 int i; 5226 rsmipc_slot_t *slot; 5227 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5228 5229 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc enter\n")); 5230 5231 /* try to find a free slot, if not wait */ 5232 mutex_enter(&rsm_ipc.lock); 5233 5234 while (rsm_ipc.count == 0) { 5235 rsm_ipc.wanted = 1; 5236 cv_wait(&rsm_ipc.cv, &rsm_ipc.lock); 5237 } 5238 5239 /* An empty slot is available, find it */ 5240 slot = &rsm_ipc.slots[0]; 5241 for (i = 0; i < RSMIPC_SZ; i++, slot++) { 5242 if (RSMIPC_GET(slot, RSMIPC_FREE)) { 5243 RSMIPC_CLEAR(slot, RSMIPC_FREE); 5244 break; 5245 } 5246 } 5247 5248 ASSERT(i < RSMIPC_SZ); 5249 rsm_ipc.count--; /* one less is available */ 5250 rsm_ipc.sequence++; /* new sequence */ 5251 5252 slot->rsmipc_cookie.ic.sequence = (uint_t)rsm_ipc.sequence; 5253 slot->rsmipc_cookie.ic.index = (uint_t)i; 5254 5255 mutex_exit(&rsm_ipc.lock); 5256 5257 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_alloc done\n")); 5258 5259 return (slot); 5260 } 5261 5262 static void 5263 rsmipc_free(rsmipc_slot_t *slot) 5264 { 5265 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 5266 5267 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free enter\n")); 5268 5269 ASSERT(MUTEX_HELD(&slot->rsmipc_lock)); 5270 ASSERT(&rsm_ipc.slots[slot->rsmipc_cookie.ic.index] == slot); 5271 5272 mutex_enter(&rsm_ipc.lock); 5273 5274 RSMIPC_SET(slot, RSMIPC_FREE); 5275 5276 slot->rsmipc_cookie.ic.sequence = 0; 5277 5278 mutex_exit(&slot->rsmipc_lock); 5279 rsm_ipc.count++; 5280 ASSERT(rsm_ipc.count <= RSMIPC_SZ); 5281 if (rsm_ipc.wanted) { 5282 rsm_ipc.wanted = 0; 5283 cv_broadcast(&rsm_ipc.cv); 5284 } 5285 5286 mutex_exit(&rsm_ipc.lock); 5287 5288 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_free done\n")); 5289 } 5290 5291 static int 5292 rsmipc_send(rsm_node_id_t dest, rsmipc_request_t *req, rsmipc_reply_t *reply) 5293 { 5294 int e = 0; 5295 int credit_check = 0; 5296 int retry_cnt = 0; 5297 int min_retry_cnt = 10; 5298 clock_t ticks; 5299 rsm_send_t is; 5300 rsmipc_slot_t *rslot; 5301 adapter_t *adapter; 5302 path_t *path; 5303 sendq_token_t *sendq_token; 5304 sendq_token_t *used_sendq_token = NULL; 5305 rsm_send_q_handle_t ipc_handle; 5306 DBG_DEFINE(category, 5307 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5308 5309 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send enter:dest=%d", 5310 dest)); 5311 5312 /* 5313 * Check if this is a local case 5314 */ 5315 if (dest == my_nodeid) { 5316 switch (req->rsmipc_hdr.rsmipc_type) { 5317 case RSMIPC_MSG_SEGCONNECT: 5318 reply->rsmipc_status = (short)rsmsegacl_validate( 5319 req, dest, reply); 5320 break; 5321 case RSMIPC_MSG_BELL: 5322 req->rsmipc_hdr.rsmipc_src = dest; 5323 rsm_intr_event(req); 5324 break; 5325 case RSMIPC_MSG_IMPORTING: 5326 importer_list_add(dest, req->rsmipc_key, 5327 req->rsmipc_adapter_hwaddr, 5328 req->rsmipc_segment_cookie); 5329 break; 5330 case RSMIPC_MSG_NOTIMPORTING: 5331 importer_list_rm(dest, req->rsmipc_key, 5332 req->rsmipc_segment_cookie); 5333 break; 5334 case RSMIPC_MSG_REPUBLISH: 5335 importer_update(dest, req->rsmipc_key, 5336 req->rsmipc_perm); 5337 break; 5338 case RSMIPC_MSG_SUSPEND: 5339 importer_suspend(dest); 5340 break; 5341 case RSMIPC_MSG_SUSPEND_DONE: 5342 rsm_suspend_complete(dest, 0); 5343 break; 5344 case RSMIPC_MSG_RESUME: 5345 importer_resume(dest); 5346 break; 5347 default: 5348 ASSERT(0); 5349 } 5350 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5351 "rsmipc_send done\n")); 5352 return (0); 5353 } 5354 5355 if (dest >= MAX_NODES) { 5356 DBG_PRINTF((category, RSM_ERR, 5357 "rsm: rsmipc_send bad node number %x\n", dest)); 5358 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5359 } 5360 5361 /* 5362 * Oh boy! we are going remote. 5363 */ 5364 5365 /* 5366 * identify if we need to have credits to send this message 5367 * - only selected requests are flow controlled 5368 */ 5369 if (req != NULL) { 5370 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5371 "rsmipc_send:request type=%d\n", 5372 req->rsmipc_hdr.rsmipc_type)); 5373 5374 switch (req->rsmipc_hdr.rsmipc_type) { 5375 case RSMIPC_MSG_SEGCONNECT: 5376 case RSMIPC_MSG_DISCONNECT: 5377 case RSMIPC_MSG_IMPORTING: 5378 case RSMIPC_MSG_SUSPEND: 5379 case RSMIPC_MSG_SUSPEND_DONE: 5380 case RSMIPC_MSG_RESUME: 5381 credit_check = 1; 5382 break; 5383 default: 5384 credit_check = 0; 5385 } 5386 } 5387 5388 again: 5389 if (retry_cnt++ == min_retry_cnt) { 5390 /* backoff before further retries for 10ms */ 5391 delay(drv_usectohz(10000)); 5392 retry_cnt = 0; /* reset retry_cnt */ 5393 } 5394 sendq_token = rsmka_get_sendq_token(dest, used_sendq_token); 5395 if (sendq_token == NULL) { 5396 DBG_PRINTF((category, RSM_ERR, 5397 "rsm: rsmipc_send no device to reach node %d\n", dest)); 5398 return (RSMERR_REMOTE_NODE_UNREACHABLE); 5399 } 5400 5401 if ((sendq_token == used_sendq_token) && 5402 ((e == RSMERR_CONN_ABORTED) || (e == RSMERR_TIMEOUT) || 5403 (e == RSMERR_COMM_ERR_MAYBE_DELIVERED))) { 5404 rele_sendq_token(sendq_token); 5405 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send done=%d\n", e)); 5406 return (RSMERR_CONN_ABORTED); 5407 } else 5408 used_sendq_token = sendq_token; 5409 5410 /* lint -save -e413 */ 5411 path = SQ_TOKEN_TO_PATH(sendq_token); 5412 adapter = path->local_adapter; 5413 /* lint -restore */ 5414 ipc_handle = sendq_token->rsmpi_sendq_handle; 5415 5416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5417 "rsmipc_send: path=%lx sendq_hdl=%lx\n", path, ipc_handle)); 5418 5419 if (reply == NULL) { 5420 /* Send request without ack */ 5421 /* 5422 * Set the rsmipc_version number in the msghdr for KA 5423 * communication versioning 5424 */ 5425 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5426 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5427 /* 5428 * remote endpoints incn should match the value in our 5429 * path's remote_incn field. No need to grab any lock 5430 * since we have refcnted the path in rsmka_get_sendq_token 5431 */ 5432 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5433 5434 is.is_data = (void *)req; 5435 is.is_size = sizeof (*req); 5436 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5437 is.is_wait = 0; 5438 5439 if (credit_check) { 5440 mutex_enter(&path->mutex); 5441 /* 5442 * wait till we recv credits or path goes down. If path 5443 * goes down rsm_send will fail and we handle the error 5444 * then 5445 */ 5446 while ((sendq_token->msgbuf_avail == 0) && 5447 (path->state == RSMKA_PATH_ACTIVE)) { 5448 e = cv_wait_sig(&sendq_token->sendq_cv, 5449 &path->mutex); 5450 if (e == 0) { 5451 mutex_exit(&path->mutex); 5452 no_reply_cnt++; 5453 rele_sendq_token(sendq_token); 5454 DBG_PRINTF((category, RSM_DEBUG, 5455 "rsmipc_send done: " 5456 "cv_wait INTERRUPTED")); 5457 return (RSMERR_INTERRUPTED); 5458 } 5459 } 5460 5461 /* 5462 * path is not active retry on another path. 5463 */ 5464 if (path->state != RSMKA_PATH_ACTIVE) { 5465 mutex_exit(&path->mutex); 5466 rele_sendq_token(sendq_token); 5467 e = RSMERR_CONN_ABORTED; 5468 DBG_PRINTF((category, RSM_ERR, 5469 "rsm: rsmipc_send: path !ACTIVE")); 5470 goto again; 5471 } 5472 5473 ASSERT(sendq_token->msgbuf_avail > 0); 5474 5475 /* 5476 * reserve a msgbuf 5477 */ 5478 sendq_token->msgbuf_avail--; 5479 5480 mutex_exit(&path->mutex); 5481 5482 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5483 NULL); 5484 5485 if (e != RSM_SUCCESS) { 5486 mutex_enter(&path->mutex); 5487 /* 5488 * release the reserved msgbuf since 5489 * the send failed 5490 */ 5491 sendq_token->msgbuf_avail++; 5492 cv_broadcast(&sendq_token->sendq_cv); 5493 mutex_exit(&path->mutex); 5494 } 5495 } else 5496 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5497 NULL); 5498 5499 no_reply_cnt++; 5500 rele_sendq_token(sendq_token); 5501 if (e != RSM_SUCCESS) { 5502 DBG_PRINTF((category, RSM_ERR, 5503 "rsm: rsmipc_send no reply send" 5504 " err = %d no reply count = %d\n", 5505 e, no_reply_cnt)); 5506 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5507 e != RSMERR_BAD_BARRIER_HNDL); 5508 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5509 goto again; 5510 } else { 5511 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5512 "rsmipc_send done\n")); 5513 return (e); 5514 } 5515 5516 } 5517 5518 if (req == NULL) { 5519 /* Send reply - No flow control is done for reply */ 5520 /* 5521 * Set the version in the msg header for KA communication 5522 * versioning 5523 */ 5524 reply->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5525 reply->rsmipc_hdr.rsmipc_src = my_nodeid; 5526 /* incn number is not used for reply msgs currently */ 5527 reply->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5528 5529 is.is_data = (void *)reply; 5530 is.is_size = sizeof (*reply); 5531 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5532 is.is_wait = 0; 5533 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5534 rele_sendq_token(sendq_token); 5535 if (e != RSM_SUCCESS) { 5536 DBG_PRINTF((category, RSM_ERR, 5537 "rsm: rsmipc_send reply send" 5538 " err = %d\n", e)); 5539 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5540 goto again; 5541 } else { 5542 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5543 "rsmipc_send done\n")); 5544 return (e); 5545 } 5546 } 5547 5548 /* Reply needed */ 5549 rslot = rsmipc_alloc(); /* allocate a new ipc slot */ 5550 5551 mutex_enter(&rslot->rsmipc_lock); 5552 5553 rslot->rsmipc_data = (void *)reply; 5554 RSMIPC_SET(rslot, RSMIPC_PENDING); 5555 5556 while (RSMIPC_GET(rslot, RSMIPC_PENDING)) { 5557 /* 5558 * Set the rsmipc_version number in the msghdr for KA 5559 * communication versioning 5560 */ 5561 req->rsmipc_hdr.rsmipc_version = RSM_VERSION; 5562 req->rsmipc_hdr.rsmipc_src = my_nodeid; 5563 req->rsmipc_hdr.rsmipc_cookie = rslot->rsmipc_cookie; 5564 /* 5565 * remote endpoints incn should match the value in our 5566 * path's remote_incn field. No need to grab any lock 5567 * since we have refcnted the path in rsmka_get_sendq_token 5568 */ 5569 req->rsmipc_hdr.rsmipc_incn = path->remote_incn; 5570 5571 is.is_data = (void *)req; 5572 is.is_size = sizeof (*req); 5573 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5574 is.is_wait = 0; 5575 if (credit_check) { 5576 5577 mutex_enter(&path->mutex); 5578 /* 5579 * wait till we recv credits or path goes down. If path 5580 * goes down rsm_send will fail and we handle the error 5581 * then. 5582 */ 5583 while ((sendq_token->msgbuf_avail == 0) && 5584 (path->state == RSMKA_PATH_ACTIVE)) { 5585 e = cv_wait_sig(&sendq_token->sendq_cv, 5586 &path->mutex); 5587 if (e == 0) { 5588 mutex_exit(&path->mutex); 5589 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5590 rsmipc_free(rslot); 5591 rele_sendq_token(sendq_token); 5592 DBG_PRINTF((category, RSM_DEBUG, 5593 "rsmipc_send done: " 5594 "cv_wait INTERRUPTED")); 5595 return (RSMERR_INTERRUPTED); 5596 } 5597 } 5598 5599 /* 5600 * path is not active retry on another path. 5601 */ 5602 if (path->state != RSMKA_PATH_ACTIVE) { 5603 mutex_exit(&path->mutex); 5604 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5605 rsmipc_free(rslot); 5606 rele_sendq_token(sendq_token); 5607 e = RSMERR_CONN_ABORTED; 5608 DBG_PRINTF((category, RSM_ERR, 5609 "rsm: rsmipc_send: path !ACTIVE")); 5610 goto again; 5611 } 5612 5613 ASSERT(sendq_token->msgbuf_avail > 0); 5614 5615 /* 5616 * reserve a msgbuf 5617 */ 5618 sendq_token->msgbuf_avail--; 5619 5620 mutex_exit(&path->mutex); 5621 5622 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5623 NULL); 5624 5625 if (e != RSM_SUCCESS) { 5626 mutex_enter(&path->mutex); 5627 /* 5628 * release the reserved msgbuf since 5629 * the send failed 5630 */ 5631 sendq_token->msgbuf_avail++; 5632 cv_broadcast(&sendq_token->sendq_cv); 5633 mutex_exit(&path->mutex); 5634 } 5635 } else 5636 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, 5637 NULL); 5638 5639 if (e != RSM_SUCCESS) { 5640 DBG_PRINTF((category, RSM_ERR, 5641 "rsm: rsmipc_send rsmpi send err = %d\n", e)); 5642 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5643 rsmipc_free(rslot); 5644 rele_sendq_token(sendq_token); 5645 atomic_add_64(&rsm_ipcsend_errcnt, 1); 5646 goto again; 5647 } 5648 5649 /* wait for a reply signal, a SIGINT, or 5 sec. timeout */ 5650 (void) drv_getparm(LBOLT, &ticks); 5651 ticks += drv_usectohz(5000000); 5652 e = cv_timedwait_sig(&rslot->rsmipc_cv, &rslot->rsmipc_lock, 5653 ticks); 5654 if (e < 0) { 5655 /* timed out - retry */ 5656 e = RSMERR_TIMEOUT; 5657 } else if (e == 0) { 5658 /* signalled - return error */ 5659 e = RSMERR_INTERRUPTED; 5660 break; 5661 } else { 5662 e = RSM_SUCCESS; 5663 } 5664 } 5665 5666 RSMIPC_CLEAR(rslot, RSMIPC_PENDING); 5667 rsmipc_free(rslot); 5668 rele_sendq_token(sendq_token); 5669 5670 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmipc_send done=%d\n", e)); 5671 return (e); 5672 } 5673 5674 static int 5675 rsm_send_notimporting(rsm_node_id_t dest, rsm_memseg_id_t segid, void *cookie) 5676 { 5677 rsmipc_request_t request; 5678 5679 /* 5680 * inform the exporter to delete this importer 5681 */ 5682 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 5683 request.rsmipc_key = segid; 5684 request.rsmipc_segment_cookie = cookie; 5685 return (rsmipc_send(dest, &request, RSM_NO_REPLY)); 5686 } 5687 5688 static void 5689 rsm_send_republish(rsm_memseg_id_t segid, rsmapi_access_entry_t *acl, 5690 int acl_len, rsm_permission_t default_permission) 5691 { 5692 int i; 5693 importing_token_t *token; 5694 rsmipc_request_t request; 5695 republish_token_t *republish_list = NULL; 5696 republish_token_t *rp; 5697 rsm_permission_t permission; 5698 int index; 5699 5700 /* 5701 * send the new access mode to all the nodes that have imported 5702 * this segment. 5703 * If the new acl does not have a node that was present in 5704 * the old acl a access permission of 0 is sent. 5705 */ 5706 5707 index = rsmhash(segid); 5708 5709 /* 5710 * create a list of node/permissions to send the republish message 5711 */ 5712 mutex_enter(&importer_list.lock); 5713 5714 token = importer_list.bucket[index]; 5715 while (token != NULL) { 5716 if (segid == token->key) { 5717 permission = default_permission; 5718 5719 for (i = 0; i < acl_len; i++) { 5720 if (token->importing_node == acl[i].ae_node) { 5721 permission = acl[i].ae_permission; 5722 break; 5723 } 5724 } 5725 rp = kmem_zalloc(sizeof (republish_token_t), KM_SLEEP); 5726 5727 rp->key = segid; 5728 rp->importing_node = token->importing_node; 5729 rp->permission = permission; 5730 rp->next = republish_list; 5731 republish_list = rp; 5732 } 5733 token = token->next; 5734 } 5735 5736 mutex_exit(&importer_list.lock); 5737 5738 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_REPUBLISH; 5739 request.rsmipc_key = segid; 5740 5741 while (republish_list != NULL) { 5742 request.rsmipc_perm = republish_list->permission; 5743 (void) rsmipc_send(republish_list->importing_node, 5744 &request, RSM_NO_REPLY); 5745 rp = republish_list; 5746 republish_list = republish_list->next; 5747 kmem_free(rp, sizeof (republish_token_t)); 5748 } 5749 } 5750 5751 static void 5752 rsm_send_suspend() 5753 { 5754 int i, e; 5755 rsmipc_request_t request; 5756 list_element_t *tokp; 5757 list_element_t *head = NULL; 5758 importing_token_t *token; 5759 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5760 "rsm_send_suspend enter\n")); 5761 5762 /* 5763 * create a list of node to send the suspend message 5764 * 5765 * Currently the whole importer list is scanned and we obtain 5766 * all the nodes - this basically gets all nodes that at least 5767 * import one segment from the local node. 5768 * 5769 * no need to grab the rsm_suspend_list lock here since we are 5770 * single threaded when suspend is called. 5771 */ 5772 5773 mutex_enter(&importer_list.lock); 5774 for (i = 0; i < rsm_hash_size; i++) { 5775 5776 token = importer_list.bucket[i]; 5777 5778 while (token != NULL) { 5779 5780 tokp = head; 5781 5782 /* 5783 * make sure that the token's node 5784 * is not already on the suspend list 5785 */ 5786 while (tokp != NULL) { 5787 if (tokp->nodeid == token->importing_node) { 5788 break; 5789 } 5790 tokp = tokp->next; 5791 } 5792 5793 if (tokp == NULL) { /* not in suspend list */ 5794 tokp = kmem_zalloc(sizeof (list_element_t), 5795 KM_SLEEP); 5796 tokp->nodeid = token->importing_node; 5797 tokp->next = head; 5798 head = tokp; 5799 } 5800 5801 token = token->next; 5802 } 5803 } 5804 mutex_exit(&importer_list.lock); 5805 5806 if (head == NULL) { /* no importers so go ahead and quiesce segments */ 5807 exporter_quiesce(); 5808 return; 5809 } 5810 5811 mutex_enter(&rsm_suspend_list.list_lock); 5812 ASSERT(rsm_suspend_list.list_head == NULL); 5813 /* 5814 * update the suspend list righaway so that if a node dies the 5815 * pathmanager can set the NODE dead flag 5816 */ 5817 rsm_suspend_list.list_head = head; 5818 mutex_exit(&rsm_suspend_list.list_lock); 5819 5820 tokp = head; 5821 5822 while (tokp != NULL) { 5823 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SUSPEND; 5824 e = rsmipc_send(tokp->nodeid, &request, RSM_NO_REPLY); 5825 /* 5826 * Error in rsmipc_send currently happens due to inaccessibility 5827 * of the remote node. 5828 */ 5829 if (e == RSM_SUCCESS) { /* send failed - don't wait for ack */ 5830 tokp->flags |= RSM_SUSPEND_ACKPENDING; 5831 } 5832 5833 tokp = tokp->next; 5834 } 5835 5836 DBG_PRINTF((RSM_KERNEL_AGENT | RSM_EXPORT, RSM_DEBUG_VERBOSE, 5837 "rsm_send_suspend done\n")); 5838 5839 } 5840 5841 static void 5842 rsm_send_resume() 5843 { 5844 rsmipc_request_t request; 5845 list_element_t *elem, *head; 5846 5847 /* 5848 * save the suspend list so that we know where to send 5849 * the resume messages and make the suspend list head 5850 * NULL. 5851 */ 5852 mutex_enter(&rsm_suspend_list.list_lock); 5853 head = rsm_suspend_list.list_head; 5854 rsm_suspend_list.list_head = NULL; 5855 mutex_exit(&rsm_suspend_list.list_lock); 5856 5857 while (head != NULL) { 5858 elem = head; 5859 head = head->next; 5860 5861 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_RESUME; 5862 5863 (void) rsmipc_send(elem->nodeid, &request, RSM_NO_REPLY); 5864 5865 kmem_free((void *)elem, sizeof (list_element_t)); 5866 5867 } 5868 5869 } 5870 5871 /* 5872 * This function takes path and sends a message using the sendq 5873 * corresponding to it. The RSMIPC_MSG_SQREADY, RSMIPC_MSG_SQREADY_ACK 5874 * and RSMIPC_MSG_CREDIT are sent using this function. 5875 */ 5876 int 5877 rsmipc_send_controlmsg(path_t *path, int msgtype) 5878 { 5879 int e; 5880 int retry_cnt = 0; 5881 int min_retry_cnt = 10; 5882 clock_t timeout; 5883 adapter_t *adapter; 5884 rsm_send_t is; 5885 rsm_send_q_handle_t ipc_handle; 5886 rsmipc_controlmsg_t msg; 5887 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_FLOWCONTROL); 5888 5889 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5890 "rsmipc_send_controlmsg enter\n")); 5891 5892 ASSERT(MUTEX_HELD(&path->mutex)); 5893 5894 adapter = path->local_adapter; 5895 5896 DBG_PRINTF((category, RSM_DEBUG, "rsmipc_send_controlmsg:path=%lx " 5897 "msgtype=%d %lx:%llx->%lx:%llx procmsg=%d\n", path, msgtype, 5898 my_nodeid, adapter->hwaddr, path->remote_node, 5899 path->remote_hwaddr, path->procmsg_cnt)); 5900 5901 if (path->state != RSMKA_PATH_ACTIVE) { 5902 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5903 "rsmipc_send_controlmsg done: ! RSMKA_PATH_ACTIVE")); 5904 return (1); 5905 } 5906 5907 ipc_handle = path->sendq_token.rsmpi_sendq_handle; 5908 5909 msg.rsmipc_hdr.rsmipc_version = RSM_VERSION; 5910 msg.rsmipc_hdr.rsmipc_src = my_nodeid; 5911 msg.rsmipc_hdr.rsmipc_type = msgtype; 5912 msg.rsmipc_hdr.rsmipc_incn = path->remote_incn; 5913 5914 if (msgtype == RSMIPC_MSG_CREDIT) 5915 msg.rsmipc_credits = path->procmsg_cnt; 5916 5917 msg.rsmipc_local_incn = path->local_incn; 5918 5919 msg.rsmipc_adapter_hwaddr = adapter->hwaddr; 5920 /* incr the sendq, path refcnt */ 5921 PATH_HOLD_NOLOCK(path); 5922 SENDQ_TOKEN_HOLD(path); 5923 5924 do { 5925 /* drop the path lock before doing the rsm_send */ 5926 mutex_exit(&path->mutex); 5927 5928 is.is_data = (void *)&msg; 5929 is.is_size = sizeof (msg); 5930 is.is_flags = RSM_INTR_SEND_DELIVER | RSM_INTR_SEND_SLEEP; 5931 is.is_wait = 0; 5932 5933 e = adapter->rsmpi_ops->rsm_send(ipc_handle, &is, NULL); 5934 5935 ASSERT(e != RSMERR_QUEUE_FENCE_UP && 5936 e != RSMERR_BAD_BARRIER_HNDL); 5937 5938 mutex_enter(&path->mutex); 5939 5940 if (e == RSM_SUCCESS) { 5941 break; 5942 } 5943 /* error counter for statistics */ 5944 atomic_add_64(&rsm_ctrlmsg_errcnt, 1); 5945 5946 DBG_PRINTF((category, RSM_ERR, 5947 "rsmipc_send_controlmsg:rsm_send error=%d", e)); 5948 5949 if (++retry_cnt == min_retry_cnt) { /* backoff before retry */ 5950 timeout = ddi_get_lbolt() + drv_usectohz(10000); 5951 (void) cv_timedwait(&path->sendq_token.sendq_cv, 5952 &path->mutex, timeout); 5953 retry_cnt = 0; 5954 } 5955 } while (path->state == RSMKA_PATH_ACTIVE); 5956 5957 /* decrement the sendq,path refcnt that we incr before rsm_send */ 5958 SENDQ_TOKEN_RELE(path); 5959 PATH_RELE_NOLOCK(path); 5960 5961 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 5962 "rsmipc_send_controlmsg done=%d", e)); 5963 return (e); 5964 } 5965 5966 /* 5967 * Called from rsm_force_unload and path_importer_disconnect. The memory 5968 * mapping for the imported segment is removed and the segment is 5969 * disconnected at the interconnect layer if disconnect_flag is TRUE. 5970 * rsm_force_unload will get disconnect_flag TRUE from rsm_intr_callback 5971 * and FALSE from rsm_rebind. 5972 * 5973 * When subsequent accesses cause page faulting, the dummy page is mapped 5974 * to resolve the fault, and the mapping generation number is incremented 5975 * so that the application can be notified on a close barrier operation. 5976 * 5977 * It is important to note that the caller of rsmseg_unload is responsible for 5978 * acquiring the segment lock before making a call to rsmseg_unload. This is 5979 * required to make the caller and rsmseg_unload thread safe. The segment lock 5980 * will be released by the rsmseg_unload function. 5981 */ 5982 void 5983 rsmseg_unload(rsmseg_t *im_seg) 5984 { 5985 rsmcookie_t *hdl; 5986 void *shared_cookie; 5987 rsmipc_request_t request; 5988 uint_t maxprot; 5989 5990 DBG_DEFINE(category, 5991 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_INTR_CALLBACK); 5992 5993 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload enter\n")); 5994 5995 ASSERT(im_seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 5996 5997 /* wait until segment leaves the mapping state */ 5998 while (im_seg->s_state == RSM_STATE_MAPPING) 5999 cv_wait(&im_seg->s_cv, &im_seg->s_lock); 6000 /* 6001 * An unload is only necessary if the segment is connected. However, 6002 * if the segment was on the import list in state RSM_STATE_CONNECTING 6003 * then a connection was in progress. Change to RSM_STATE_NEW 6004 * here to cause an early exit from the connection process. 6005 */ 6006 if (im_seg->s_state == RSM_STATE_NEW) { 6007 rsmseglock_release(im_seg); 6008 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6009 "rsmseg_unload done: RSM_STATE_NEW\n")); 6010 return; 6011 } else if (im_seg->s_state == RSM_STATE_CONNECTING) { 6012 im_seg->s_state = RSM_STATE_ABORT_CONNECT; 6013 rsmsharelock_acquire(im_seg); 6014 im_seg->s_share->rsmsi_state = RSMSI_STATE_ABORT_CONNECT; 6015 rsmsharelock_release(im_seg); 6016 rsmseglock_release(im_seg); 6017 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6018 "rsmseg_unload done: RSM_STATE_CONNECTING\n")); 6019 return; 6020 } 6021 6022 if (im_seg->s_flags & RSM_FORCE_DISCONNECT) { 6023 if (im_seg->s_ckl != NULL) { 6024 int e; 6025 /* Setup protections for remap */ 6026 maxprot = PROT_USER; 6027 if (im_seg->s_mode & RSM_PERM_READ) { 6028 maxprot |= PROT_READ; 6029 } 6030 if (im_seg->s_mode & RSM_PERM_WRITE) { 6031 maxprot |= PROT_WRITE; 6032 } 6033 hdl = im_seg->s_ckl; 6034 for (; hdl != NULL; hdl = hdl->c_next) { 6035 e = devmap_umem_remap(hdl->c_dhp, rsm_dip, 6036 remap_cookie, 6037 hdl->c_off, hdl->c_len, 6038 maxprot, 0, NULL); 6039 6040 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6041 "remap returns %d\n", e)); 6042 } 6043 } 6044 6045 (void) rsm_closeconnection(im_seg, &shared_cookie); 6046 6047 if (shared_cookie != NULL) { 6048 /* 6049 * inform the exporting node so this import 6050 * can be deleted from the list of importers. 6051 */ 6052 request.rsmipc_hdr.rsmipc_type = 6053 RSMIPC_MSG_NOTIMPORTING; 6054 request.rsmipc_key = im_seg->s_segid; 6055 request.rsmipc_segment_cookie = shared_cookie; 6056 rsmseglock_release(im_seg); 6057 (void) rsmipc_send(im_seg->s_node, &request, 6058 RSM_NO_REPLY); 6059 } else { 6060 rsmseglock_release(im_seg); 6061 } 6062 } 6063 else 6064 rsmseglock_release(im_seg); 6065 6066 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmseg_unload done\n")); 6067 6068 } 6069 6070 /* ****************************** Importer Calls ************************ */ 6071 6072 static int 6073 rsm_access(uid_t owner, gid_t group, int perm, int mode, const struct cred *cr) 6074 { 6075 int shifts = 0; 6076 6077 if (crgetuid(cr) != owner) { 6078 shifts += 3; 6079 if (!groupmember(group, cr)) 6080 shifts += 3; 6081 } 6082 6083 mode &= ~(perm << shifts); 6084 6085 if (mode == 0) 6086 return (0); 6087 6088 return (secpolicy_rsm_access(cr, owner, mode)); 6089 } 6090 6091 6092 static int 6093 rsm_connect(rsmseg_t *seg, rsm_ioctlmsg_t *msg, cred_t *cred, 6094 intptr_t dataptr, int mode) 6095 { 6096 int e; 6097 int recheck_state = 0; 6098 void *shared_cookie; 6099 rsmipc_request_t request; 6100 rsmipc_reply_t reply; 6101 rsm_permission_t access; 6102 adapter_t *adapter; 6103 rsm_addr_t addr = 0; 6104 rsm_import_share_t *sharedp; 6105 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6106 6107 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect enter\n")); 6108 6109 adapter = rsm_getadapter(msg, mode); 6110 if (adapter == NULL) { 6111 DBG_PRINTF((category, RSM_ERR, 6112 "rsm_connect done:ENODEV adapter=NULL\n")); 6113 return (RSMERR_CTLR_NOT_PRESENT); 6114 } 6115 6116 if ((adapter == &loopback_adapter) && (msg->nodeid != my_nodeid)) { 6117 rsmka_release_adapter(adapter); 6118 DBG_PRINTF((category, RSM_ERR, 6119 "rsm_connect done:ENODEV loopback\n")); 6120 return (RSMERR_CTLR_NOT_PRESENT); 6121 } 6122 6123 6124 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6125 ASSERT(seg->s_state == RSM_STATE_NEW); 6126 6127 /* 6128 * Translate perm to access 6129 */ 6130 if (msg->perm & ~RSM_PERM_RDWR) { 6131 rsmka_release_adapter(adapter); 6132 DBG_PRINTF((category, RSM_ERR, 6133 "rsm_connect done:EINVAL invalid perms\n")); 6134 return (RSMERR_BAD_PERMS); 6135 } 6136 access = 0; 6137 if (msg->perm & RSM_PERM_READ) 6138 access |= RSM_ACCESS_READ; 6139 if (msg->perm & RSM_PERM_WRITE) 6140 access |= RSM_ACCESS_WRITE; 6141 6142 seg->s_node = msg->nodeid; 6143 6144 /* 6145 * Adding to the import list locks the segment; release the segment 6146 * lock so we can get the reply for the send. 6147 */ 6148 e = rsmimport_add(seg, msg->key); 6149 if (e) { 6150 rsmka_release_adapter(adapter); 6151 DBG_PRINTF((category, RSM_ERR, 6152 "rsm_connect done:rsmimport_add failed %d\n", e)); 6153 return (e); 6154 } 6155 seg->s_state = RSM_STATE_CONNECTING; 6156 6157 /* 6158 * Set the s_adapter field here so as to have a valid comparison of 6159 * the adapter and the s_adapter value during rsmshare_get. For 6160 * any error, set s_adapter to NULL before doing a release_adapter 6161 */ 6162 seg->s_adapter = adapter; 6163 6164 rsmseglock_release(seg); 6165 6166 /* 6167 * get the pointer to the shared data structure; the 6168 * shared data is locked and refcount has been incremented 6169 */ 6170 sharedp = rsmshare_get(msg->key, msg->nodeid, adapter, seg); 6171 6172 ASSERT(rsmsharelock_held(seg)); 6173 6174 do { 6175 /* flag indicates whether we need to recheck the state */ 6176 recheck_state = 0; 6177 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6178 "rsm_connect:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 6179 switch (sharedp->rsmsi_state) { 6180 case RSMSI_STATE_NEW: 6181 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6182 break; 6183 case RSMSI_STATE_CONNECTING: 6184 /* FALLTHRU */ 6185 case RSMSI_STATE_CONN_QUIESCE: 6186 /* FALLTHRU */ 6187 case RSMSI_STATE_MAP_QUIESCE: 6188 /* wait for the state to change */ 6189 while ((sharedp->rsmsi_state == 6190 RSMSI_STATE_CONNECTING) || 6191 (sharedp->rsmsi_state == 6192 RSMSI_STATE_CONN_QUIESCE) || 6193 (sharedp->rsmsi_state == 6194 RSMSI_STATE_MAP_QUIESCE)) { 6195 if (cv_wait_sig(&sharedp->rsmsi_cv, 6196 &sharedp->rsmsi_lock) == 0) { 6197 /* signalled - clean up and return */ 6198 rsmsharelock_release(seg); 6199 rsmimport_rm(seg); 6200 seg->s_adapter = NULL; 6201 rsmka_release_adapter(adapter); 6202 seg->s_state = RSM_STATE_NEW; 6203 DBG_PRINTF((category, RSM_ERR, 6204 "rsm_connect done: INTERRUPTED\n")); 6205 return (RSMERR_INTERRUPTED); 6206 } 6207 } 6208 /* 6209 * the state changed, loop back and check what it is 6210 */ 6211 recheck_state = 1; 6212 break; 6213 case RSMSI_STATE_ABORT_CONNECT: 6214 /* exit the loop and clean up further down */ 6215 break; 6216 case RSMSI_STATE_CONNECTED: 6217 /* already connected, good - fall through */ 6218 case RSMSI_STATE_MAPPED: 6219 /* already mapped, wow - fall through */ 6220 /* access validation etc is done further down */ 6221 break; 6222 case RSMSI_STATE_DISCONNECTED: 6223 /* disconnected - so reconnect now */ 6224 sharedp->rsmsi_state = RSMSI_STATE_CONNECTING; 6225 break; 6226 default: 6227 ASSERT(0); /* Invalid State */ 6228 } 6229 } while (recheck_state); 6230 6231 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6232 /* we are the first to connect */ 6233 rsmsharelock_release(seg); 6234 6235 if (msg->nodeid != my_nodeid) { 6236 addr = get_remote_hwaddr(adapter, msg->nodeid); 6237 6238 if ((int64_t)addr < 0) { 6239 rsmsharelock_acquire(seg); 6240 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6241 RSMSI_STATE_NEW); 6242 rsmsharelock_release(seg); 6243 rsmimport_rm(seg); 6244 seg->s_adapter = NULL; 6245 rsmka_release_adapter(adapter); 6246 seg->s_state = RSM_STATE_NEW; 6247 DBG_PRINTF((category, RSM_ERR, 6248 "rsm_connect done: hwaddr<0\n")); 6249 return (RSMERR_INTERNAL_ERROR); 6250 } 6251 } else { 6252 addr = adapter->hwaddr; 6253 } 6254 6255 /* 6256 * send request to node [src, dest, key, msgid] and get back 6257 * [status, msgid, cookie] 6258 */ 6259 request.rsmipc_key = msg->key; 6260 /* 6261 * we need the s_mode of the exporter so pass 6262 * RSM_ACCESS_TRUSTED 6263 */ 6264 request.rsmipc_perm = RSM_ACCESS_TRUSTED; 6265 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_SEGCONNECT; 6266 request.rsmipc_adapter_hwaddr = addr; 6267 request.rsmipc_segment_cookie = sharedp; 6268 6269 e = (int)rsmipc_send(msg->nodeid, &request, &reply); 6270 if (e) { 6271 rsmsharelock_acquire(seg); 6272 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6273 RSMSI_STATE_NEW); 6274 rsmsharelock_release(seg); 6275 rsmimport_rm(seg); 6276 seg->s_adapter = NULL; 6277 rsmka_release_adapter(adapter); 6278 seg->s_state = RSM_STATE_NEW; 6279 DBG_PRINTF((category, RSM_ERR, 6280 "rsm_connect done:rsmipc_send failed %d\n", e)); 6281 return (e); 6282 } 6283 6284 if (reply.rsmipc_status != RSM_SUCCESS) { 6285 rsmsharelock_acquire(seg); 6286 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, 6287 RSMSI_STATE_NEW); 6288 rsmsharelock_release(seg); 6289 rsmimport_rm(seg); 6290 seg->s_adapter = NULL; 6291 rsmka_release_adapter(adapter); 6292 seg->s_state = RSM_STATE_NEW; 6293 DBG_PRINTF((category, RSM_ERR, 6294 "rsm_connect done:rsmipc_send reply err %d\n", 6295 reply.rsmipc_status)); 6296 return (reply.rsmipc_status); 6297 } 6298 6299 rsmsharelock_acquire(seg); 6300 /* store the information recvd into the shared data struct */ 6301 sharedp->rsmsi_mode = reply.rsmipc_mode; 6302 sharedp->rsmsi_uid = reply.rsmipc_uid; 6303 sharedp->rsmsi_gid = reply.rsmipc_gid; 6304 sharedp->rsmsi_seglen = reply.rsmipc_seglen; 6305 sharedp->rsmsi_cookie = sharedp; 6306 } 6307 6308 rsmsharelock_release(seg); 6309 6310 /* 6311 * Get the segment lock and check for a force disconnect 6312 * from the export side which would have changed the state 6313 * back to RSM_STATE_NEW. Once the segment lock is acquired a 6314 * force disconnect will be held off until the connection 6315 * has completed. 6316 */ 6317 rsmseglock_acquire(seg); 6318 rsmsharelock_acquire(seg); 6319 ASSERT(seg->s_state == RSM_STATE_CONNECTING || 6320 seg->s_state == RSM_STATE_ABORT_CONNECT); 6321 6322 shared_cookie = sharedp->rsmsi_cookie; 6323 6324 if ((seg->s_state == RSM_STATE_ABORT_CONNECT) || 6325 (sharedp->rsmsi_state == RSMSI_STATE_ABORT_CONNECT)) { 6326 seg->s_state = RSM_STATE_NEW; 6327 seg->s_adapter = NULL; 6328 rsmsharelock_release(seg); 6329 rsmseglock_release(seg); 6330 rsmimport_rm(seg); 6331 rsmka_release_adapter(adapter); 6332 6333 rsmsharelock_acquire(seg); 6334 if (!(sharedp->rsmsi_flags & RSMSI_FLAGS_ABORTDONE)) { 6335 /* 6336 * set a flag indicating abort handling has been 6337 * done 6338 */ 6339 sharedp->rsmsi_flags |= RSMSI_FLAGS_ABORTDONE; 6340 rsmsharelock_release(seg); 6341 /* send a message to exporter - only once */ 6342 (void) rsm_send_notimporting(msg->nodeid, 6343 msg->key, shared_cookie); 6344 rsmsharelock_acquire(seg); 6345 /* 6346 * wake up any waiting importers and inform that 6347 * connection has been aborted 6348 */ 6349 cv_broadcast(&sharedp->rsmsi_cv); 6350 } 6351 rsmsharelock_release(seg); 6352 6353 DBG_PRINTF((category, RSM_ERR, 6354 "rsm_connect done: RSM_STATE_ABORT_CONNECT\n")); 6355 return (RSMERR_INTERRUPTED); 6356 } 6357 6358 6359 /* 6360 * We need to verify that this process has access 6361 */ 6362 e = rsm_access(sharedp->rsmsi_uid, sharedp->rsmsi_gid, 6363 access & sharedp->rsmsi_mode, 6364 (int)(msg->perm & RSM_PERM_RDWR), cred); 6365 if (e) { 6366 rsmsharelock_release(seg); 6367 seg->s_state = RSM_STATE_NEW; 6368 seg->s_adapter = NULL; 6369 rsmseglock_release(seg); 6370 rsmimport_rm(seg); 6371 rsmka_release_adapter(adapter); 6372 /* 6373 * No need to lock segment it has been removed 6374 * from the hash table 6375 */ 6376 rsmsharelock_acquire(seg); 6377 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6378 rsmsharelock_release(seg); 6379 /* this is the first importer */ 6380 6381 (void) rsm_send_notimporting(msg->nodeid, msg->key, 6382 shared_cookie); 6383 rsmsharelock_acquire(seg); 6384 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6385 cv_broadcast(&sharedp->rsmsi_cv); 6386 } 6387 rsmsharelock_release(seg); 6388 6389 DBG_PRINTF((category, RSM_ERR, 6390 "rsm_connect done: ipcaccess failed\n")); 6391 return (RSMERR_PERM_DENIED); 6392 } 6393 6394 /* update state and cookie */ 6395 seg->s_segid = sharedp->rsmsi_segid; 6396 seg->s_len = sharedp->rsmsi_seglen; 6397 seg->s_mode = access & sharedp->rsmsi_mode; 6398 seg->s_pid = ddi_get_pid(); 6399 seg->s_mapinfo = NULL; 6400 6401 if (seg->s_node != my_nodeid) { 6402 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTING) { 6403 e = adapter->rsmpi_ops->rsm_connect( 6404 adapter->rsmpi_handle, 6405 addr, seg->s_segid, &sharedp->rsmsi_handle); 6406 6407 if (e != RSM_SUCCESS) { 6408 seg->s_state = RSM_STATE_NEW; 6409 seg->s_adapter = NULL; 6410 rsmsharelock_release(seg); 6411 rsmseglock_release(seg); 6412 rsmimport_rm(seg); 6413 rsmka_release_adapter(adapter); 6414 /* 6415 * inform the exporter to delete this importer 6416 */ 6417 (void) rsm_send_notimporting(msg->nodeid, 6418 msg->key, shared_cookie); 6419 6420 /* 6421 * Now inform any waiting importers to 6422 * retry connect. This needs to be done 6423 * after sending notimporting so that 6424 * the notimporting is sent before a waiting 6425 * importer sends a segconnect while retrying 6426 * 6427 * No need to lock segment it has been removed 6428 * from the hash table 6429 */ 6430 6431 rsmsharelock_acquire(seg); 6432 sharedp->rsmsi_state = RSMSI_STATE_NEW; 6433 cv_broadcast(&sharedp->rsmsi_cv); 6434 rsmsharelock_release(seg); 6435 6436 DBG_PRINTF((category, RSM_ERR, 6437 "rsm_connect error %d\n", e)); 6438 if (e == RSMERR_SEG_NOT_PUBLISHED_TO_RSM_ADDR) 6439 return ( 6440 RSMERR_SEG_NOT_PUBLISHED_TO_NODE); 6441 else if ((e == RSMERR_RSM_ADDR_UNREACHABLE) || 6442 (e == RSMERR_UNKNOWN_RSM_ADDR)) 6443 return (RSMERR_REMOTE_NODE_UNREACHABLE); 6444 else 6445 return (e); 6446 } 6447 6448 } 6449 seg->s_handle.in = sharedp->rsmsi_handle; 6450 6451 } 6452 6453 seg->s_state = RSM_STATE_CONNECT; 6454 6455 6456 seg->s_flags &= ~RSM_IMPORT_DUMMY; /* clear dummy flag */ 6457 if (bar_va) { 6458 /* increment generation number on barrier page */ 6459 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6460 /* return user off into barrier page where status will be */ 6461 msg->off = (int)seg->s_hdr.rsmrc_num; 6462 msg->gnum = bar_va[msg->off]; /* gnum race */ 6463 } else { 6464 msg->off = 0; 6465 msg->gnum = 0; /* gnum race */ 6466 } 6467 6468 msg->len = (int)sharedp->rsmsi_seglen; 6469 msg->rnum = seg->s_minor; 6470 rsmsharecv_signal(seg, RSMSI_STATE_CONNECTING, RSMSI_STATE_CONNECTED); 6471 rsmsharelock_release(seg); 6472 rsmseglock_release(seg); 6473 6474 /* Return back to user the segment size & perm in case it's needed */ 6475 6476 #ifdef _MULTI_DATAMODEL 6477 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6478 rsm_ioctlmsg32_t msg32; 6479 6480 if (msg->len > UINT_MAX) 6481 msg32.len = RSM_MAXSZ_PAGE_ALIGNED; 6482 else 6483 msg32.len = msg->len; 6484 msg32.off = msg->off; 6485 msg32.perm = msg->perm; 6486 msg32.gnum = msg->gnum; 6487 msg32.rnum = msg->rnum; 6488 6489 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6490 "rsm_connect done\n")); 6491 6492 if (ddi_copyout((caddr_t)&msg32, (caddr_t)dataptr, 6493 sizeof (msg32), mode)) 6494 return (RSMERR_BAD_ADDR); 6495 else 6496 return (RSM_SUCCESS); 6497 } 6498 #endif 6499 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_connect done\n")); 6500 6501 if (ddi_copyout((caddr_t)msg, (caddr_t)dataptr, sizeof (*msg), 6502 mode)) 6503 return (RSMERR_BAD_ADDR); 6504 else 6505 return (RSM_SUCCESS); 6506 } 6507 6508 static int 6509 rsm_unmap(rsmseg_t *seg) 6510 { 6511 int err; 6512 adapter_t *adapter; 6513 rsm_import_share_t *sharedp; 6514 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6515 6516 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6517 "rsm_unmap enter %u\n", seg->s_segid)); 6518 6519 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6520 6521 /* assert seg is locked */ 6522 ASSERT(rsmseglock_held(seg)); 6523 ASSERT(seg->s_state != RSM_STATE_MAPPING); 6524 6525 if ((seg->s_state != RSM_STATE_ACTIVE) && 6526 (seg->s_state != RSM_STATE_MAP_QUIESCE)) { 6527 /* segment unmap has already been done */ 6528 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6529 return (RSM_SUCCESS); 6530 } 6531 6532 sharedp = seg->s_share; 6533 6534 rsmsharelock_acquire(seg); 6535 6536 /* 6537 * - shared data struct is in MAPPED or MAP_QUIESCE state 6538 */ 6539 6540 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED || 6541 sharedp->rsmsi_state == RSMSI_STATE_MAP_QUIESCE); 6542 6543 /* 6544 * Unmap pages - previously rsm_memseg_import_unmap was called only if 6545 * the segment cookie list was NULL; but it is always NULL when 6546 * called from rsmmap_unmap and won't be NULL when called for 6547 * a force disconnect - so the check for NULL cookie list was removed 6548 */ 6549 6550 ASSERT(sharedp->rsmsi_mapcnt > 0); 6551 6552 sharedp->rsmsi_mapcnt--; 6553 6554 if (sharedp->rsmsi_mapcnt == 0) { 6555 if (sharedp->rsmsi_state == RSMSI_STATE_MAPPED) { 6556 /* unmap the shared RSMPI mapping */ 6557 adapter = seg->s_adapter; 6558 if (seg->s_node != my_nodeid) { 6559 ASSERT(sharedp->rsmsi_handle != NULL); 6560 err = adapter->rsmpi_ops-> 6561 rsm_unmap(sharedp->rsmsi_handle); 6562 DBG_PRINTF((category, RSM_DEBUG, 6563 "rsm_unmap: rsmpi unmap %d\n", err)); 6564 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 6565 sharedp->rsmsi_mapinfo = NULL; 6566 } 6567 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 6568 } else { /* MAP_QUIESCE --munmap()--> CONN_QUIESCE */ 6569 sharedp->rsmsi_state = RSMSI_STATE_CONN_QUIESCE; 6570 } 6571 } 6572 6573 rsmsharelock_release(seg); 6574 6575 /* 6576 * The s_cookie field is used to store the cookie returned from the 6577 * ddi_umem_lock when binding the pages for an export segment. This 6578 * is the primary use of the s_cookie field and does not normally 6579 * pertain to any importing segment except in the loopback case. 6580 * For the loopback case, the import segment and export segment are 6581 * on the same node, the s_cookie field of the segment structure for 6582 * the importer is initialized to the s_cookie field in the exported 6583 * segment during the map operation and is used during the call to 6584 * devmap_umem_setup for the import mapping. 6585 * Thus, during unmap, we simply need to set s_cookie to NULL to 6586 * indicate that the mapping no longer exists. 6587 */ 6588 seg->s_cookie = NULL; 6589 6590 seg->s_mapinfo = NULL; 6591 6592 if (seg->s_state == RSM_STATE_ACTIVE) 6593 seg->s_state = RSM_STATE_CONNECT; 6594 else 6595 seg->s_state = RSM_STATE_CONN_QUIESCE; 6596 6597 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_unmap done\n")); 6598 6599 return (RSM_SUCCESS); 6600 } 6601 6602 /* 6603 * cookie returned here if not null indicates that it is 6604 * the last importer and it can be used in the RSMIPC_NOT_IMPORTING 6605 * message. 6606 */ 6607 static int 6608 rsm_closeconnection(rsmseg_t *seg, void **cookie) 6609 { 6610 int e; 6611 adapter_t *adapter; 6612 rsm_import_share_t *sharedp; 6613 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6614 6615 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6616 "rsm_closeconnection enter\n")); 6617 6618 *cookie = (void *)NULL; 6619 6620 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6621 6622 /* assert seg is locked */ 6623 ASSERT(rsmseglock_held(seg)); 6624 6625 if (seg->s_state == RSM_STATE_DISCONNECT) { 6626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6627 "rsm_closeconnection done: already disconnected\n")); 6628 return (RSM_SUCCESS); 6629 } 6630 6631 /* wait for all putv/getv ops to get done */ 6632 while (seg->s_rdmacnt > 0) { 6633 cv_wait(&seg->s_cv, &seg->s_lock); 6634 } 6635 6636 (void) rsm_unmap(seg); 6637 6638 ASSERT(seg->s_state == RSM_STATE_CONNECT || 6639 seg->s_state == RSM_STATE_CONN_QUIESCE); 6640 6641 adapter = seg->s_adapter; 6642 sharedp = seg->s_share; 6643 6644 ASSERT(sharedp != NULL); 6645 6646 rsmsharelock_acquire(seg); 6647 6648 /* 6649 * Disconnect on adapter 6650 * 6651 * The current algorithm is stateless, I don't have to contact 6652 * server when I go away. He only gives me permissions. Of course, 6653 * the adapters will talk to terminate the connect. 6654 * 6655 * disconnect is needed only if we are CONNECTED not in CONN_QUIESCE 6656 */ 6657 if ((sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) && 6658 (sharedp->rsmsi_node != my_nodeid)) { 6659 6660 if (sharedp->rsmsi_refcnt == 1) { 6661 /* this is the last importer */ 6662 ASSERT(sharedp->rsmsi_mapcnt == 0); 6663 6664 e = adapter->rsmpi_ops-> 6665 rsm_disconnect(sharedp->rsmsi_handle); 6666 if (e != RSM_SUCCESS) { 6667 DBG_PRINTF((category, RSM_DEBUG, 6668 "rsm:disconnect failed seg=%x:err=%d\n", 6669 seg->s_key, e)); 6670 } 6671 } 6672 } 6673 6674 seg->s_handle.in = NULL; 6675 6676 sharedp->rsmsi_refcnt--; 6677 6678 if (sharedp->rsmsi_refcnt == 0) { 6679 *cookie = (void *)sharedp->rsmsi_cookie; 6680 sharedp->rsmsi_state = RSMSI_STATE_DISCONNECTED; 6681 sharedp->rsmsi_handle = NULL; 6682 rsmsharelock_release(seg); 6683 6684 /* clean up the shared data structure */ 6685 mutex_destroy(&sharedp->rsmsi_lock); 6686 cv_destroy(&sharedp->rsmsi_cv); 6687 kmem_free((void *)(sharedp), sizeof (rsm_import_share_t)); 6688 6689 } else { 6690 rsmsharelock_release(seg); 6691 } 6692 6693 /* increment generation number on barrier page */ 6694 if (bar_va) { 6695 atomic_add_16(bar_va + seg->s_hdr.rsmrc_num, 1); 6696 } 6697 6698 /* 6699 * The following needs to be done after any 6700 * rsmsharelock calls which use seg->s_share. 6701 */ 6702 seg->s_share = NULL; 6703 6704 seg->s_state = RSM_STATE_DISCONNECT; 6705 /* signal anyone waiting in the CONN_QUIESCE state */ 6706 cv_broadcast(&seg->s_cv); 6707 6708 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6709 "rsm_closeconnection done\n")); 6710 6711 return (RSM_SUCCESS); 6712 } 6713 6714 int 6715 rsm_disconnect(rsmseg_t *seg) 6716 { 6717 rsmipc_request_t request; 6718 void *shared_cookie; 6719 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT); 6720 6721 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect enter\n")); 6722 6723 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 6724 6725 /* assert seg isn't locked */ 6726 ASSERT(!rsmseglock_held(seg)); 6727 6728 6729 /* Remove segment from imported list */ 6730 rsmimport_rm(seg); 6731 6732 /* acquire the segment */ 6733 rsmseglock_acquire(seg); 6734 6735 /* wait until segment leaves the mapping state */ 6736 while (seg->s_state == RSM_STATE_MAPPING) 6737 cv_wait(&seg->s_cv, &seg->s_lock); 6738 6739 if (seg->s_state == RSM_STATE_DISCONNECT) { 6740 seg->s_state = RSM_STATE_NEW; 6741 rsmseglock_release(seg); 6742 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6743 "rsm_disconnect done: already disconnected\n")); 6744 return (RSM_SUCCESS); 6745 } 6746 6747 (void) rsm_closeconnection(seg, &shared_cookie); 6748 6749 /* update state */ 6750 seg->s_state = RSM_STATE_NEW; 6751 6752 if (shared_cookie != NULL) { 6753 /* 6754 * This is the last importer so inform the exporting node 6755 * so this import can be deleted from the list of importers. 6756 */ 6757 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_NOTIMPORTING; 6758 request.rsmipc_key = seg->s_segid; 6759 request.rsmipc_segment_cookie = shared_cookie; 6760 rsmseglock_release(seg); 6761 (void) rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 6762 } else { 6763 rsmseglock_release(seg); 6764 } 6765 6766 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_disconnect done\n")); 6767 6768 return (DDI_SUCCESS); 6769 } 6770 6771 /*ARGSUSED*/ 6772 static int 6773 rsm_chpoll(dev_t dev, short events, int anyyet, short *reventsp, 6774 struct pollhead **phpp) 6775 { 6776 minor_t rnum; 6777 rsmresource_t *res; 6778 rsmseg_t *seg; 6779 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 6780 6781 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll enter\n")); 6782 6783 /* find minor, no lock */ 6784 rnum = getminor(dev); 6785 res = rsmresource_lookup(rnum, RSM_NOLOCK); 6786 6787 /* poll is supported only for export/import segments */ 6788 if ((res == NULL) || (res == RSMRC_RESERVED) || 6789 (res->rsmrc_type == RSM_RESOURCE_BAR)) { 6790 return (ENXIO); 6791 } 6792 6793 *reventsp = 0; 6794 6795 /* 6796 * An exported segment must be in state RSM_STATE_EXPORT; an 6797 * imported segment must be in state RSM_STATE_ACTIVE. 6798 */ 6799 seg = (rsmseg_t *)res; 6800 6801 if (seg->s_pollevent) { 6802 *reventsp = POLLRDNORM; 6803 } else if (!anyyet) { 6804 /* cannot take segment lock here */ 6805 *phpp = &seg->s_poll; 6806 seg->s_pollflag |= RSM_SEGMENT_POLL; 6807 } 6808 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_chpoll done\n")); 6809 return (0); 6810 } 6811 6812 6813 6814 /* ************************* IOCTL Commands ********************* */ 6815 6816 static rsmseg_t * 6817 rsmresource_seg(rsmresource_t *res, minor_t rnum, cred_t *credp, 6818 rsm_resource_type_t type) 6819 { 6820 /* get segment from resource handle */ 6821 rsmseg_t *seg; 6822 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 6823 6824 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg enter\n")); 6825 6826 6827 if (res != RSMRC_RESERVED) { 6828 seg = (rsmseg_t *)res; 6829 } else { 6830 /* Allocate segment now and bind it */ 6831 seg = rsmseg_alloc(rnum, credp); 6832 6833 /* 6834 * if DR pre-processing is going on or DR is in progress 6835 * then the new export segments should be in the NEW_QSCD state 6836 */ 6837 if (type == RSM_RESOURCE_EXPORT_SEGMENT) { 6838 mutex_enter(&rsm_drv_data.drv_lock); 6839 if ((rsm_drv_data.drv_state == 6840 RSM_DRV_PREDEL_STARTED) || 6841 (rsm_drv_data.drv_state == 6842 RSM_DRV_PREDEL_COMPLETED) || 6843 (rsm_drv_data.drv_state == 6844 RSM_DRV_DR_IN_PROGRESS)) { 6845 seg->s_state = RSM_STATE_NEW_QUIESCED; 6846 } 6847 mutex_exit(&rsm_drv_data.drv_lock); 6848 } 6849 6850 rsmresource_insert(rnum, (rsmresource_t *)seg, type); 6851 } 6852 6853 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmresource_seg done\n")); 6854 6855 return (seg); 6856 } 6857 6858 static int 6859 rsmexport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6860 int mode, cred_t *credp) 6861 { 6862 int error; 6863 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 6864 6865 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl enter\n")); 6866 6867 arg = arg; 6868 credp = credp; 6869 6870 ASSERT(seg != NULL); 6871 6872 switch (cmd) { 6873 case RSM_IOCTL_BIND: 6874 error = rsm_bind(seg, msg, arg, mode); 6875 break; 6876 case RSM_IOCTL_REBIND: 6877 error = rsm_rebind(seg, msg); 6878 break; 6879 case RSM_IOCTL_UNBIND: 6880 error = ENOTSUP; 6881 break; 6882 case RSM_IOCTL_PUBLISH: 6883 error = rsm_publish(seg, msg, arg, mode); 6884 break; 6885 case RSM_IOCTL_REPUBLISH: 6886 error = rsm_republish(seg, msg, mode); 6887 break; 6888 case RSM_IOCTL_UNPUBLISH: 6889 error = rsm_unpublish(seg, 1); 6890 break; 6891 default: 6892 error = EINVAL; 6893 break; 6894 } 6895 6896 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmexport_ioctl done: %d\n", 6897 error)); 6898 6899 return (error); 6900 } 6901 static int 6902 rsmimport_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6903 int mode, cred_t *credp) 6904 { 6905 int error; 6906 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6907 6908 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl enter\n")); 6909 6910 ASSERT(seg); 6911 6912 switch (cmd) { 6913 case RSM_IOCTL_CONNECT: 6914 error = rsm_connect(seg, msg, credp, arg, mode); 6915 break; 6916 default: 6917 error = EINVAL; 6918 break; 6919 } 6920 6921 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmimport_ioctl done: %d\n", 6922 error)); 6923 return (error); 6924 } 6925 6926 static int 6927 rsmbar_ioctl(rsmseg_t *seg, rsm_ioctlmsg_t *msg, int cmd, intptr_t arg, 6928 int mode) 6929 { 6930 int e; 6931 adapter_t *adapter; 6932 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 6933 6934 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmbar_ioctl enter\n")); 6935 6936 6937 if ((seg->s_flags & RSM_IMPORT_DUMMY) != 0) { 6938 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6939 "rsmbar_ioctl done: RSM_IMPORT_DUMMY\n")); 6940 return (RSMERR_CONN_ABORTED); 6941 } else if (seg->s_node == my_nodeid) { 6942 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6943 "rsmbar_ioctl done: loopback\n")); 6944 return (RSM_SUCCESS); 6945 } 6946 6947 adapter = seg->s_adapter; 6948 6949 switch (cmd) { 6950 case RSM_IOCTL_BAR_CHECK: 6951 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6952 "rsmbar_ioctl done: RSM_BAR_CHECK %d\n", bar_va)); 6953 return (bar_va ? RSM_SUCCESS : EINVAL); 6954 case RSM_IOCTL_BAR_OPEN: 6955 e = adapter->rsmpi_ops-> 6956 rsm_open_barrier_ctrl(adapter->rsmpi_handle, &msg->bar); 6957 break; 6958 case RSM_IOCTL_BAR_ORDER: 6959 e = adapter->rsmpi_ops->rsm_order_barrier(&msg->bar); 6960 break; 6961 case RSM_IOCTL_BAR_CLOSE: 6962 e = adapter->rsmpi_ops->rsm_close_barrier(&msg->bar); 6963 break; 6964 default: 6965 e = EINVAL; 6966 break; 6967 } 6968 6969 if (e == RSM_SUCCESS) { 6970 #ifdef _MULTI_DATAMODEL 6971 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 6972 rsm_ioctlmsg32_t msg32; 6973 int i; 6974 6975 for (i = 0; i < 4; i++) { 6976 msg32.bar.comp[i].u64 = msg->bar.comp[i].u64; 6977 } 6978 6979 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6980 "rsmbar_ioctl done\n")); 6981 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 6982 sizeof (msg32), mode)) 6983 return (RSMERR_BAD_ADDR); 6984 else 6985 return (RSM_SUCCESS); 6986 } 6987 #endif 6988 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6989 "rsmbar_ioctl done\n")); 6990 if (ddi_copyout((caddr_t)&msg->bar, (caddr_t)arg, 6991 sizeof (*msg), mode)) 6992 return (RSMERR_BAD_ADDR); 6993 else 6994 return (RSM_SUCCESS); 6995 } 6996 6997 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 6998 "rsmbar_ioctl done: error=%d\n", e)); 6999 7000 return (e); 7001 } 7002 7003 /* 7004 * Ring the doorbell of the export segment to which this segment is 7005 * connected. 7006 */ 7007 static int 7008 exportbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7009 { 7010 int e = 0; 7011 rsmipc_request_t request; 7012 7013 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7014 7015 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "exportbell_ioctl enter\n")); 7016 7017 request.rsmipc_key = seg->s_segid; 7018 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7019 request.rsmipc_segment_cookie = NULL; 7020 e = rsmipc_send(seg->s_node, &request, RSM_NO_REPLY); 7021 7022 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7023 "exportbell_ioctl done: %d\n", e)); 7024 7025 return (e); 7026 } 7027 7028 /* 7029 * Ring the doorbells of all segments importing this segment 7030 */ 7031 static int 7032 importbell_ioctl(rsmseg_t *seg, int cmd /*ARGSUSED*/) 7033 { 7034 importing_token_t *token = NULL; 7035 rsmipc_request_t request; 7036 int index; 7037 7038 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_EXPORT | RSM_IOCTL); 7039 7040 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "importbell_ioctl enter\n")); 7041 7042 ASSERT(seg->s_state != RSM_STATE_NEW && 7043 seg->s_state != RSM_STATE_NEW_QUIESCED); 7044 7045 request.rsmipc_key = seg->s_segid; 7046 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7047 7048 index = rsmhash(seg->s_segid); 7049 7050 token = importer_list.bucket[index]; 7051 7052 while (token != NULL) { 7053 if (seg->s_key == token->key) { 7054 request.rsmipc_segment_cookie = 7055 token->import_segment_cookie; 7056 (void) rsmipc_send(token->importing_node, 7057 &request, RSM_NO_REPLY); 7058 } 7059 token = token->next; 7060 } 7061 7062 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7063 "importbell_ioctl done\n")); 7064 return (RSM_SUCCESS); 7065 } 7066 7067 static int 7068 rsm_consumeevent_copyin(caddr_t arg, rsm_consume_event_msg_t *msgp, 7069 rsm_poll_event_t **eventspp, int mode) 7070 { 7071 rsm_poll_event_t *evlist = NULL; 7072 size_t evlistsz; 7073 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7074 7075 #ifdef _MULTI_DATAMODEL 7076 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7077 int i; 7078 rsm_consume_event_msg32_t cemsg32 = {0}; 7079 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7080 rsm_poll_event32_t *evlist32; 7081 size_t evlistsz32; 7082 7083 /* copyin the ioctl message */ 7084 if (ddi_copyin(arg, (caddr_t)&cemsg32, 7085 sizeof (rsm_consume_event_msg32_t), mode)) { 7086 DBG_PRINTF((category, RSM_ERR, 7087 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7088 return (RSMERR_BAD_ADDR); 7089 } 7090 msgp->seglist = (caddr_t)(uintptr_t)cemsg32.seglist; 7091 msgp->numents = (int)cemsg32.numents; 7092 7093 evlistsz32 = sizeof (rsm_poll_event32_t) * msgp->numents; 7094 /* 7095 * If numents is large alloc events list on heap otherwise 7096 * use the address of array that was passed in. 7097 */ 7098 if (msgp->numents > RSM_MAX_POLLFDS) { 7099 if (msgp->numents > max_segs) { /* validate numents */ 7100 DBG_PRINTF((category, RSM_ERR, 7101 "consumeevent_copyin: " 7102 "RSMERR_BAD_ARGS_ERRORS\n")); 7103 return (RSMERR_BAD_ARGS_ERRORS); 7104 } 7105 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7106 } else { 7107 evlist32 = event32; 7108 } 7109 7110 /* copyin the seglist into the rsm_poll_event32_t array */ 7111 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)evlist32, 7112 evlistsz32, mode)) { 7113 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7114 kmem_free(evlist32, evlistsz32); 7115 } 7116 DBG_PRINTF((category, RSM_ERR, 7117 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7118 return (RSMERR_BAD_ADDR); 7119 } 7120 7121 /* evlist and evlistsz are based on rsm_poll_event_t type */ 7122 evlistsz = sizeof (rsm_poll_event_t)* msgp->numents; 7123 7124 if (msgp->numents > RSM_MAX_POLLFDS) { 7125 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7126 *eventspp = evlist; 7127 } else { 7128 evlist = *eventspp; 7129 } 7130 /* 7131 * copy the rsm_poll_event32_t array to the rsm_poll_event_t 7132 * array 7133 */ 7134 for (i = 0; i < msgp->numents; i++) { 7135 evlist[i].rnum = evlist32[i].rnum; 7136 evlist[i].fdsidx = evlist32[i].fdsidx; 7137 evlist[i].revent = evlist32[i].revent; 7138 } 7139 /* free the temp 32-bit event list */ 7140 if ((msgp->numents > RSM_MAX_POLLFDS) && evlist32) { 7141 kmem_free(evlist32, evlistsz32); 7142 } 7143 7144 return (RSM_SUCCESS); 7145 } 7146 #endif 7147 /* copyin the ioctl message */ 7148 if (ddi_copyin(arg, (caddr_t)msgp, sizeof (rsm_consume_event_msg_t), 7149 mode)) { 7150 DBG_PRINTF((category, RSM_ERR, 7151 "consumeevent_copyin msgp: RSMERR_BAD_ADDR\n")); 7152 return (RSMERR_BAD_ADDR); 7153 } 7154 /* 7155 * If numents is large alloc events list on heap otherwise 7156 * use the address of array that was passed in. 7157 */ 7158 if (msgp->numents > RSM_MAX_POLLFDS) { 7159 if (msgp->numents > max_segs) { /* validate numents */ 7160 DBG_PRINTF((category, RSM_ERR, 7161 "consumeevent_copyin: RSMERR_BAD_ARGS_ERRORS\n")); 7162 return (RSMERR_BAD_ARGS_ERRORS); 7163 } 7164 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7165 evlist = kmem_zalloc(evlistsz, KM_SLEEP); 7166 *eventspp = evlist; 7167 } 7168 7169 /* copyin the seglist */ 7170 if (ddi_copyin((caddr_t)msgp->seglist, (caddr_t)(*eventspp), 7171 sizeof (rsm_poll_event_t)*msgp->numents, mode)) { 7172 if (evlist) { 7173 kmem_free(evlist, evlistsz); 7174 *eventspp = NULL; 7175 } 7176 DBG_PRINTF((category, RSM_ERR, 7177 "consumeevent_copyin evlist: RSMERR_BAD_ADDR\n")); 7178 return (RSMERR_BAD_ADDR); 7179 } 7180 7181 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7182 "consumeevent_copyin done\n")); 7183 return (RSM_SUCCESS); 7184 } 7185 7186 static int 7187 rsm_consumeevent_copyout(rsm_consume_event_msg_t *msgp, 7188 rsm_poll_event_t *eventsp, int mode) 7189 { 7190 size_t evlistsz; 7191 int err = RSM_SUCCESS; 7192 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7193 7194 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7195 "consumeevent_copyout enter: numents(%d) eventsp(%p)\n", 7196 msgp->numents, eventsp)); 7197 7198 #ifdef _MULTI_DATAMODEL 7199 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7200 int i; 7201 rsm_poll_event32_t event32[RSM_MAX_POLLFDS]; 7202 rsm_poll_event32_t *evlist32; 7203 size_t evlistsz32; 7204 7205 evlistsz32 = sizeof (rsm_poll_event32_t)*msgp->numents; 7206 if (msgp->numents > RSM_MAX_POLLFDS) { 7207 evlist32 = kmem_zalloc(evlistsz32, KM_SLEEP); 7208 } else { 7209 evlist32 = event32; 7210 } 7211 7212 /* 7213 * copy the rsm_poll_event_t array to the rsm_poll_event32_t 7214 * array 7215 */ 7216 for (i = 0; i < msgp->numents; i++) { 7217 evlist32[i].rnum = eventsp[i].rnum; 7218 evlist32[i].fdsidx = eventsp[i].fdsidx; 7219 evlist32[i].revent = eventsp[i].revent; 7220 } 7221 7222 if (ddi_copyout((caddr_t)evlist32, (caddr_t)msgp->seglist, 7223 evlistsz32, mode)) { 7224 err = RSMERR_BAD_ADDR; 7225 } 7226 7227 if (msgp->numents > RSM_MAX_POLLFDS) { 7228 if (evlist32) { /* free the temp 32-bit event list */ 7229 kmem_free(evlist32, evlistsz32); 7230 } 7231 /* 7232 * eventsp and evlistsz are based on rsm_poll_event_t 7233 * type 7234 */ 7235 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7236 /* event list on the heap and needs to be freed here */ 7237 if (eventsp) { 7238 kmem_free(eventsp, evlistsz); 7239 } 7240 } 7241 7242 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7243 "consumeevent_copyout done: err=%d\n", err)); 7244 return (err); 7245 } 7246 #endif 7247 evlistsz = sizeof (rsm_poll_event_t)*msgp->numents; 7248 7249 if (ddi_copyout((caddr_t)eventsp, (caddr_t)msgp->seglist, evlistsz, 7250 mode)) { 7251 err = RSMERR_BAD_ADDR; 7252 } 7253 7254 if ((msgp->numents > RSM_MAX_POLLFDS) && eventsp) { 7255 /* event list on the heap and needs to be freed here */ 7256 kmem_free(eventsp, evlistsz); 7257 } 7258 7259 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7260 "consumeevent_copyout done: err=%d\n", err)); 7261 return (err); 7262 } 7263 7264 static int 7265 rsm_consumeevent_ioctl(caddr_t arg, int mode) 7266 { 7267 int rc; 7268 int i; 7269 minor_t rnum; 7270 rsm_consume_event_msg_t msg = {0}; 7271 rsmseg_t *seg; 7272 rsm_poll_event_t *event_list; 7273 rsm_poll_event_t events[RSM_MAX_POLLFDS]; 7274 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IOCTL); 7275 7276 event_list = events; 7277 7278 if ((rc = rsm_consumeevent_copyin(arg, &msg, &event_list, mode)) != 7279 RSM_SUCCESS) { 7280 return (rc); 7281 } 7282 7283 for (i = 0; i < msg.numents; i++) { 7284 rnum = event_list[i].rnum; 7285 event_list[i].revent = 0; 7286 /* get the segment structure */ 7287 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 7288 if (seg) { 7289 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7290 "consumeevent_ioctl: rnum(%d) seg(%p)\n", rnum, 7291 seg)); 7292 if (seg->s_pollevent) { 7293 /* consume the event */ 7294 atomic_add_32(&seg->s_pollevent, -1); 7295 event_list[i].revent = POLLRDNORM; 7296 } 7297 rsmseglock_release(seg); 7298 } 7299 } 7300 7301 if ((rc = rsm_consumeevent_copyout(&msg, event_list, mode)) != 7302 RSM_SUCCESS) { 7303 return (rc); 7304 } 7305 7306 return (RSM_SUCCESS); 7307 } 7308 7309 static int 7310 iovec_copyin(caddr_t user_vec, rsmka_iovec_t *iovec, int count, int mode) 7311 { 7312 int size; 7313 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7314 7315 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin enter\n")); 7316 7317 #ifdef _MULTI_DATAMODEL 7318 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7319 rsmka_iovec32_t *iovec32, *iovec32_base; 7320 int i; 7321 7322 size = count * sizeof (rsmka_iovec32_t); 7323 iovec32_base = iovec32 = kmem_zalloc(size, KM_SLEEP); 7324 if (ddi_copyin((caddr_t)user_vec, 7325 (caddr_t)iovec32, size, mode)) { 7326 kmem_free(iovec32, size); 7327 DBG_PRINTF((category, RSM_DEBUG, 7328 "iovec_copyin: returning RSMERR_BAD_ADDR\n")); 7329 return (RSMERR_BAD_ADDR); 7330 } 7331 7332 for (i = 0; i < count; i++, iovec++, iovec32++) { 7333 iovec->io_type = (int)iovec32->io_type; 7334 if (iovec->io_type == RSM_HANDLE_TYPE) 7335 iovec->local.segid = (rsm_memseg_id_t) 7336 iovec32->local; 7337 else 7338 iovec->local.vaddr = 7339 (caddr_t)(uintptr_t)iovec32->local; 7340 iovec->local_offset = (size_t)iovec32->local_offset; 7341 iovec->remote_offset = (size_t)iovec32->remote_offset; 7342 iovec->transfer_len = (size_t)iovec32->transfer_len; 7343 7344 } 7345 kmem_free(iovec32_base, size); 7346 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7347 "iovec_copyin done\n")); 7348 return (DDI_SUCCESS); 7349 } 7350 #endif 7351 7352 size = count * sizeof (rsmka_iovec_t); 7353 if (ddi_copyin((caddr_t)user_vec, (caddr_t)iovec, size, mode)) { 7354 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7355 "iovec_copyin done: RSMERR_BAD_ADDR\n")); 7356 return (RSMERR_BAD_ADDR); 7357 } 7358 7359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "iovec_copyin done\n")); 7360 7361 return (DDI_SUCCESS); 7362 } 7363 7364 7365 static int 7366 sgio_copyin(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7367 { 7368 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7369 7370 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin enter\n")); 7371 7372 #ifdef _MULTI_DATAMODEL 7373 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7374 rsmka_scat_gath32_t sg_io32; 7375 7376 if (ddi_copyin(arg, (caddr_t)&sg_io32, sizeof (sg_io32), 7377 mode)) { 7378 DBG_PRINTF((category, RSM_DEBUG, 7379 "sgio_copyin done: returning EFAULT\n")); 7380 return (RSMERR_BAD_ADDR); 7381 } 7382 sg_io->local_nodeid = (rsm_node_id_t)sg_io32.local_nodeid; 7383 sg_io->io_request_count = (size_t)sg_io32.io_request_count; 7384 sg_io->io_residual_count = (size_t)sg_io32.io_residual_count; 7385 sg_io->flags = (size_t)sg_io32.flags; 7386 sg_io->remote_handle = (rsm_memseg_import_handle_t) 7387 (uintptr_t)sg_io32.remote_handle; 7388 sg_io->iovec = (rsmka_iovec_t *)(uintptr_t)sg_io32.iovec; 7389 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7390 "sgio_copyin done\n")); 7391 return (DDI_SUCCESS); 7392 } 7393 #endif 7394 if (ddi_copyin(arg, (caddr_t)sg_io, sizeof (rsmka_scat_gath_t), 7395 mode)) { 7396 DBG_PRINTF((category, RSM_DEBUG, 7397 "sgio_copyin done: returning EFAULT\n")); 7398 return (RSMERR_BAD_ADDR); 7399 } 7400 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_copyin done\n")); 7401 return (DDI_SUCCESS); 7402 } 7403 7404 static int 7405 sgio_resid_copyout(caddr_t arg, rsmka_scat_gath_t *sg_io, int mode) 7406 { 7407 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7408 7409 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7410 "sgio_resid_copyout enter\n")); 7411 7412 #ifdef _MULTI_DATAMODEL 7413 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7414 rsmka_scat_gath32_t sg_io32; 7415 7416 sg_io32.io_residual_count = sg_io->io_residual_count; 7417 sg_io32.flags = sg_io->flags; 7418 7419 if (ddi_copyout((caddr_t)&sg_io32.io_residual_count, 7420 (caddr_t)&((rsmka_scat_gath32_t *)arg)->io_residual_count, 7421 sizeof (uint32_t), mode)) { 7422 7423 DBG_PRINTF((category, RSM_ERR, 7424 "sgio_resid_copyout error: rescnt\n")); 7425 return (RSMERR_BAD_ADDR); 7426 } 7427 7428 if (ddi_copyout((caddr_t)&sg_io32.flags, 7429 (caddr_t)&((rsmka_scat_gath32_t *)arg)->flags, 7430 sizeof (uint32_t), mode)) { 7431 7432 DBG_PRINTF((category, RSM_ERR, 7433 "sgio_resid_copyout error: flags\n")); 7434 return (RSMERR_BAD_ADDR); 7435 } 7436 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7437 "sgio_resid_copyout done\n")); 7438 return (DDI_SUCCESS); 7439 } 7440 #endif 7441 if (ddi_copyout((caddr_t)&sg_io->io_residual_count, 7442 (caddr_t)&((rsmka_scat_gath_t *)arg)->io_residual_count, 7443 sizeof (ulong_t), mode)) { 7444 7445 DBG_PRINTF((category, RSM_ERR, 7446 "sgio_resid_copyout error:rescnt\n")); 7447 return (RSMERR_BAD_ADDR); 7448 } 7449 7450 if (ddi_copyout((caddr_t)&sg_io->flags, 7451 (caddr_t)&((rsmka_scat_gath_t *)arg)->flags, 7452 sizeof (uint_t), mode)) { 7453 7454 DBG_PRINTF((category, RSM_ERR, 7455 "sgio_resid_copyout error:flags\n")); 7456 return (RSMERR_BAD_ADDR); 7457 } 7458 7459 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "sgio_resid_copyout done\n")); 7460 return (DDI_SUCCESS); 7461 } 7462 7463 7464 static int 7465 rsm_iovec_ioctl(dev_t dev, caddr_t arg, int cmd, int mode, cred_t *credp) 7466 { 7467 rsmka_scat_gath_t sg_io; 7468 rsmka_iovec_t ka_iovec_arr[RSM_MAX_IOVLEN]; 7469 rsmka_iovec_t *ka_iovec; 7470 rsmka_iovec_t *ka_iovec_start; 7471 rsmpi_scat_gath_t rsmpi_sg_io; 7472 rsmpi_iovec_t iovec_arr[RSM_MAX_IOVLEN]; 7473 rsmpi_iovec_t *iovec; 7474 rsmpi_iovec_t *iovec_start = NULL; 7475 rsmapi_access_entry_t *acl; 7476 rsmresource_t *res; 7477 minor_t rnum; 7478 rsmseg_t *im_seg, *ex_seg; 7479 int e; 7480 int error = 0; 7481 uint_t i; 7482 uint_t iov_proc = 0; /* num of iovecs processed */ 7483 size_t size = 0; 7484 size_t ka_size; 7485 7486 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_IMPORT | RSM_IOCTL); 7487 7488 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_iovec_ioctl enter\n")); 7489 7490 credp = credp; 7491 7492 /* 7493 * Copyin the scatter/gather structure and build new structure 7494 * for rsmpi. 7495 */ 7496 e = sgio_copyin(arg, &sg_io, mode); 7497 if (e != DDI_SUCCESS) { 7498 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7499 "rsm_iovec_ioctl done: sgio_copyin %d\n", e)); 7500 return (e); 7501 } 7502 7503 if (sg_io.io_request_count > RSM_MAX_SGIOREQS) { 7504 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7505 "rsm_iovec_ioctl done: request_count(%d) too large\n", 7506 sg_io.io_request_count)); 7507 return (RSMERR_BAD_SGIO); 7508 } 7509 7510 rsmpi_sg_io.io_request_count = sg_io.io_request_count; 7511 rsmpi_sg_io.io_residual_count = sg_io.io_request_count; 7512 rsmpi_sg_io.io_segflg = 0; 7513 7514 /* Allocate memory and copyin io vector array */ 7515 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7516 ka_size = sg_io.io_request_count * sizeof (rsmka_iovec_t); 7517 ka_iovec_start = ka_iovec = kmem_zalloc(ka_size, KM_SLEEP); 7518 } else { 7519 ka_iovec_start = ka_iovec = ka_iovec_arr; 7520 } 7521 e = iovec_copyin((caddr_t)sg_io.iovec, ka_iovec, 7522 sg_io.io_request_count, mode); 7523 if (e != DDI_SUCCESS) { 7524 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7525 kmem_free(ka_iovec, ka_size); 7526 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7527 "rsm_iovec_ioctl done: iovec_copyin %d\n", e)); 7528 return (e); 7529 } 7530 7531 /* get the import segment descriptor */ 7532 rnum = getminor(dev); 7533 res = rsmresource_lookup(rnum, RSM_LOCK); 7534 7535 /* 7536 * The following sequence of locking may (or MAY NOT) cause a 7537 * deadlock but this is currently not addressed here since the 7538 * implementation will be changed to incorporate the use of 7539 * reference counting for both the import and the export segments. 7540 */ 7541 7542 /* rsmseglock_acquire(im_seg) done in rsmresource_lookup */ 7543 7544 im_seg = (rsmseg_t *)res; 7545 7546 if (im_seg == NULL) { 7547 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7548 kmem_free(ka_iovec, ka_size); 7549 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7550 "rsm_iovec_ioctl done: rsmresource_lookup failed\n")); 7551 return (EINVAL); 7552 } 7553 /* putv/getv supported is supported only on import segments */ 7554 if (im_seg->s_type != RSM_RESOURCE_IMPORT_SEGMENT) { 7555 rsmseglock_release(im_seg); 7556 if (sg_io.io_request_count > RSM_MAX_IOVLEN) 7557 kmem_free(ka_iovec, ka_size); 7558 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7559 "rsm_iovec_ioctl done: not an import segment\n")); 7560 return (EINVAL); 7561 } 7562 7563 /* 7564 * wait for a remote DR to complete ie. for segments to get UNQUIESCED 7565 * as well as wait for a local DR to complete. 7566 */ 7567 while ((im_seg->s_state == RSM_STATE_CONN_QUIESCE) || 7568 (im_seg->s_state == RSM_STATE_MAP_QUIESCE) || 7569 (im_seg->s_flags & RSM_DR_INPROGRESS)) { 7570 if (cv_wait_sig(&im_seg->s_cv, &im_seg->s_lock) == 0) { 7571 DBG_PRINTF((category, RSM_DEBUG, 7572 "rsm_iovec_ioctl done: cv_wait INTR")); 7573 rsmseglock_release(im_seg); 7574 return (RSMERR_INTERRUPTED); 7575 } 7576 } 7577 7578 if ((im_seg->s_state != RSM_STATE_CONNECT) && 7579 (im_seg->s_state != RSM_STATE_ACTIVE)) { 7580 7581 ASSERT(im_seg->s_state == RSM_STATE_DISCONNECT || 7582 im_seg->s_state == RSM_STATE_NEW); 7583 7584 DBG_PRINTF((category, RSM_DEBUG, 7585 "rsm_iovec_ioctl done: im_seg not conn/map")); 7586 rsmseglock_release(im_seg); 7587 e = RSMERR_BAD_SGIO; 7588 goto out; 7589 } 7590 7591 im_seg->s_rdmacnt++; 7592 rsmseglock_release(im_seg); 7593 7594 /* 7595 * Allocate and set up the io vector for rsmpi 7596 */ 7597 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7598 size = sg_io.io_request_count * sizeof (rsmpi_iovec_t); 7599 iovec_start = iovec = kmem_zalloc(size, KM_SLEEP); 7600 } else { 7601 iovec_start = iovec = iovec_arr; 7602 } 7603 7604 rsmpi_sg_io.iovec = iovec; 7605 for (iov_proc = 0; iov_proc < sg_io.io_request_count; iov_proc++) { 7606 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7607 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7608 7609 if (ex_seg == NULL) { 7610 e = RSMERR_BAD_SGIO; 7611 break; 7612 } 7613 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7614 7615 acl = ex_seg->s_acl; 7616 if (acl[0].ae_permission == 0) { 7617 struct buf *xbuf; 7618 dev_t sdev = 0; 7619 7620 xbuf = ddi_umem_iosetup(ex_seg->s_cookie, 7621 0, ex_seg->s_len, B_WRITE, 7622 sdev, 0, NULL, DDI_UMEM_SLEEP); 7623 7624 ASSERT(xbuf != NULL); 7625 7626 iovec->local_mem.ms_type = RSM_MEM_BUF; 7627 iovec->local_mem.ms_memory.bp = xbuf; 7628 } else { 7629 iovec->local_mem.ms_type = RSM_MEM_HANDLE; 7630 iovec->local_mem.ms_memory.handle = 7631 ex_seg->s_handle.out; 7632 } 7633 ex_seg->s_rdmacnt++; /* refcnt the handle */ 7634 rsmseglock_release(ex_seg); 7635 } else { 7636 iovec->local_mem.ms_type = RSM_MEM_VADDR; 7637 iovec->local_mem.ms_memory.vr.vaddr = 7638 ka_iovec->local.vaddr; 7639 } 7640 7641 iovec->local_offset = ka_iovec->local_offset; 7642 iovec->remote_handle = im_seg->s_handle.in; 7643 iovec->remote_offset = ka_iovec->remote_offset; 7644 iovec->transfer_length = ka_iovec->transfer_len; 7645 iovec++; 7646 ka_iovec++; 7647 } 7648 7649 if (iov_proc < sg_io.io_request_count) { 7650 /* error while processing handle */ 7651 rsmseglock_acquire(im_seg); 7652 im_seg->s_rdmacnt--; /* decrement the refcnt for importseg */ 7653 if (im_seg->s_rdmacnt == 0) { 7654 cv_broadcast(&im_seg->s_cv); 7655 } 7656 rsmseglock_release(im_seg); 7657 goto out; 7658 } 7659 7660 /* call rsmpi */ 7661 if (cmd == RSM_IOCTL_PUTV) 7662 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_putv( 7663 im_seg->s_adapter->rsmpi_handle, 7664 &rsmpi_sg_io); 7665 else if (cmd == RSM_IOCTL_GETV) 7666 e = im_seg->s_adapter->rsmpi_ops->rsm_memseg_import_getv( 7667 im_seg->s_adapter->rsmpi_handle, 7668 &rsmpi_sg_io); 7669 else { 7670 e = EINVAL; 7671 DBG_PRINTF((category, RSM_DEBUG, 7672 "iovec_ioctl: bad command = %x\n", cmd)); 7673 } 7674 7675 7676 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7677 "rsm_iovec_ioctl RSMPI oper done %d\n", e)); 7678 7679 sg_io.io_residual_count = rsmpi_sg_io.io_residual_count; 7680 7681 /* 7682 * Check for implicit signal post flag and do the signal 7683 * post if needed 7684 */ 7685 if (sg_io.flags & RSM_IMPLICIT_SIGPOST && 7686 e == RSM_SUCCESS) { 7687 rsmipc_request_t request; 7688 7689 request.rsmipc_key = im_seg->s_segid; 7690 request.rsmipc_hdr.rsmipc_type = RSMIPC_MSG_BELL; 7691 request.rsmipc_segment_cookie = NULL; 7692 e = rsmipc_send(im_seg->s_node, &request, RSM_NO_REPLY); 7693 /* 7694 * Reset the implicit signal post flag to 0 to indicate 7695 * that the signal post has been done and need not be 7696 * done in the RSMAPI library 7697 */ 7698 sg_io.flags &= ~RSM_IMPLICIT_SIGPOST; 7699 } 7700 7701 rsmseglock_acquire(im_seg); 7702 im_seg->s_rdmacnt--; 7703 if (im_seg->s_rdmacnt == 0) { 7704 cv_broadcast(&im_seg->s_cv); 7705 } 7706 rsmseglock_release(im_seg); 7707 error = sgio_resid_copyout(arg, &sg_io, mode); 7708 out: 7709 iovec = iovec_start; 7710 ka_iovec = ka_iovec_start; 7711 for (i = 0; i < iov_proc; i++) { 7712 if (ka_iovec->io_type == RSM_HANDLE_TYPE) { 7713 ex_seg = rsmexport_lookup(ka_iovec->local.segid); 7714 7715 ASSERT(ex_seg != NULL); 7716 ASSERT(ex_seg->s_state == RSM_STATE_EXPORT); 7717 7718 ex_seg->s_rdmacnt--; /* unrefcnt the handle */ 7719 if (ex_seg->s_rdmacnt == 0) { 7720 cv_broadcast(&ex_seg->s_cv); 7721 } 7722 rsmseglock_release(ex_seg); 7723 } 7724 7725 ASSERT(iovec != NULL); /* true if iov_proc > 0 */ 7726 7727 /* 7728 * At present there is no dependency on the existence of xbufs 7729 * created by ddi_umem_iosetup for each of the iovecs. So we 7730 * can these xbufs here. 7731 */ 7732 if (iovec->local_mem.ms_type == RSM_MEM_BUF) { 7733 freerbuf(iovec->local_mem.ms_memory.bp); 7734 } 7735 7736 iovec++; 7737 ka_iovec++; 7738 } 7739 7740 if (sg_io.io_request_count > RSM_MAX_IOVLEN) { 7741 if (iovec_start) 7742 kmem_free(iovec_start, size); 7743 kmem_free(ka_iovec_start, ka_size); 7744 } 7745 7746 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7747 "rsm_iovec_ioctl done %d\n", e)); 7748 /* if RSMPI call fails return that else return copyout's retval */ 7749 return ((e != RSM_SUCCESS) ? e : error); 7750 7751 } 7752 7753 7754 static int 7755 rsmaddr_ioctl(int cmd, rsm_ioctlmsg_t *msg, int mode) 7756 { 7757 adapter_t *adapter; 7758 rsm_addr_t addr; 7759 rsm_node_id_t node; 7760 int rval = DDI_SUCCESS; 7761 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7762 7763 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmaddr_ioctl enter\n")); 7764 7765 adapter = rsm_getadapter(msg, mode); 7766 if (adapter == NULL) { 7767 DBG_PRINTF((category, RSM_DEBUG, 7768 "rsmaddr_ioctl done: adapter not found\n")); 7769 return (RSMERR_CTLR_NOT_PRESENT); 7770 } 7771 7772 switch (cmd) { 7773 case RSM_IOCTL_MAP_TO_ADDR: /* nodeid to hwaddr mapping */ 7774 /* returns the hwaddr in msg->hwaddr */ 7775 if (msg->nodeid == my_nodeid) { 7776 msg->hwaddr = adapter->hwaddr; 7777 } else { 7778 addr = get_remote_hwaddr(adapter, msg->nodeid); 7779 if ((int64_t)addr < 0) { 7780 rval = RSMERR_INTERNAL_ERROR; 7781 } else { 7782 msg->hwaddr = addr; 7783 } 7784 } 7785 break; 7786 case RSM_IOCTL_MAP_TO_NODEID: /* hwaddr to nodeid mapping */ 7787 /* returns the nodeid in msg->nodeid */ 7788 if (msg->hwaddr == adapter->hwaddr) { 7789 msg->nodeid = my_nodeid; 7790 } else { 7791 node = get_remote_nodeid(adapter, msg->hwaddr); 7792 if ((int)node < 0) { 7793 rval = RSMERR_INTERNAL_ERROR; 7794 } else { 7795 msg->nodeid = (rsm_node_id_t)node; 7796 } 7797 } 7798 break; 7799 default: 7800 rval = EINVAL; 7801 break; 7802 } 7803 7804 rsmka_release_adapter(adapter); 7805 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7806 "rsmaddr_ioctl done: %d\n", rval)); 7807 return (rval); 7808 } 7809 7810 static int 7811 rsm_ddi_copyin(caddr_t arg, rsm_ioctlmsg_t *msg, int mode) 7812 { 7813 DBG_DEFINE(category, 7814 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7815 7816 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin enter\n")); 7817 7818 #ifdef _MULTI_DATAMODEL 7819 7820 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7821 rsm_ioctlmsg32_t msg32; 7822 int i; 7823 7824 if (ddi_copyin(arg, (caddr_t)&msg32, sizeof (msg32), mode)) { 7825 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7826 "rsm_ddi_copyin done: EFAULT\n")); 7827 return (RSMERR_BAD_ADDR); 7828 } 7829 msg->len = msg32.len; 7830 msg->vaddr = (caddr_t)(uintptr_t)msg32.vaddr; 7831 msg->arg = (caddr_t)(uintptr_t)msg32.arg; 7832 msg->key = msg32.key; 7833 msg->acl_len = msg32.acl_len; 7834 msg->acl = (rsmapi_access_entry_t *)(uintptr_t)msg32.acl; 7835 msg->cnum = msg32.cnum; 7836 msg->cname = (caddr_t)(uintptr_t)msg32.cname; 7837 msg->cname_len = msg32.cname_len; 7838 msg->nodeid = msg32.nodeid; 7839 msg->hwaddr = msg32.hwaddr; 7840 msg->perm = msg32.perm; 7841 for (i = 0; i < 4; i++) { 7842 msg->bar.comp[i].u64 = msg32.bar.comp[i].u64; 7843 } 7844 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7845 "rsm_ddi_copyin done\n")); 7846 return (RSM_SUCCESS); 7847 } 7848 #endif 7849 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ddi_copyin done\n")); 7850 if (ddi_copyin(arg, (caddr_t)msg, sizeof (*msg), mode)) 7851 return (RSMERR_BAD_ADDR); 7852 else 7853 return (RSM_SUCCESS); 7854 } 7855 7856 static int 7857 rsmattr_ddi_copyout(adapter_t *adapter, caddr_t arg, int mode) 7858 { 7859 rsmka_int_controller_attr_t rsm_cattr; 7860 DBG_DEFINE(category, 7861 RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL | RSM_DDI); 7862 7863 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7864 "rsmattr_ddi_copyout enter\n")); 7865 /* 7866 * need to copy appropriate data from rsm_controller_attr_t 7867 * to rsmka_int_controller_attr_t 7868 */ 7869 #ifdef _MULTI_DATAMODEL 7870 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 7871 rsmka_int_controller_attr32_t rsm_cattr32; 7872 7873 rsm_cattr32.attr_direct_access_sizes = 7874 adapter->rsm_attr.attr_direct_access_sizes; 7875 rsm_cattr32.attr_atomic_sizes = 7876 adapter->rsm_attr.attr_atomic_sizes; 7877 rsm_cattr32.attr_page_size = 7878 adapter->rsm_attr.attr_page_size; 7879 if (adapter->rsm_attr.attr_max_export_segment_size > 7880 UINT_MAX) 7881 rsm_cattr32.attr_max_export_segment_size = 7882 RSM_MAXSZ_PAGE_ALIGNED; 7883 else 7884 rsm_cattr32.attr_max_export_segment_size = 7885 adapter->rsm_attr.attr_max_export_segment_size; 7886 if (adapter->rsm_attr.attr_tot_export_segment_size > 7887 UINT_MAX) 7888 rsm_cattr32.attr_tot_export_segment_size = 7889 RSM_MAXSZ_PAGE_ALIGNED; 7890 else 7891 rsm_cattr32.attr_tot_export_segment_size = 7892 adapter->rsm_attr.attr_tot_export_segment_size; 7893 if (adapter->rsm_attr.attr_max_export_segments > 7894 UINT_MAX) 7895 rsm_cattr32.attr_max_export_segments = 7896 UINT_MAX; 7897 else 7898 rsm_cattr32.attr_max_export_segments = 7899 adapter->rsm_attr.attr_max_export_segments; 7900 if (adapter->rsm_attr.attr_max_import_map_size > 7901 UINT_MAX) 7902 rsm_cattr32.attr_max_import_map_size = 7903 RSM_MAXSZ_PAGE_ALIGNED; 7904 else 7905 rsm_cattr32.attr_max_import_map_size = 7906 adapter->rsm_attr.attr_max_import_map_size; 7907 if (adapter->rsm_attr.attr_tot_import_map_size > 7908 UINT_MAX) 7909 rsm_cattr32.attr_tot_import_map_size = 7910 RSM_MAXSZ_PAGE_ALIGNED; 7911 else 7912 rsm_cattr32.attr_tot_import_map_size = 7913 adapter->rsm_attr.attr_tot_import_map_size; 7914 if (adapter->rsm_attr.attr_max_import_segments > 7915 UINT_MAX) 7916 rsm_cattr32.attr_max_import_segments = 7917 UINT_MAX; 7918 else 7919 rsm_cattr32.attr_max_import_segments = 7920 adapter->rsm_attr.attr_max_import_segments; 7921 rsm_cattr32.attr_controller_addr = 7922 adapter->rsm_attr.attr_controller_addr; 7923 7924 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7925 "rsmattr_ddi_copyout done\n")); 7926 if (ddi_copyout((caddr_t)&rsm_cattr32, arg, 7927 sizeof (rsmka_int_controller_attr32_t), mode)) { 7928 return (RSMERR_BAD_ADDR); 7929 } 7930 else 7931 return (RSM_SUCCESS); 7932 } 7933 #endif 7934 rsm_cattr.attr_direct_access_sizes = 7935 adapter->rsm_attr.attr_direct_access_sizes; 7936 rsm_cattr.attr_atomic_sizes = 7937 adapter->rsm_attr.attr_atomic_sizes; 7938 rsm_cattr.attr_page_size = 7939 adapter->rsm_attr.attr_page_size; 7940 rsm_cattr.attr_max_export_segment_size = 7941 adapter->rsm_attr.attr_max_export_segment_size; 7942 rsm_cattr.attr_tot_export_segment_size = 7943 adapter->rsm_attr.attr_tot_export_segment_size; 7944 rsm_cattr.attr_max_export_segments = 7945 adapter->rsm_attr.attr_max_export_segments; 7946 rsm_cattr.attr_max_import_map_size = 7947 adapter->rsm_attr.attr_max_import_map_size; 7948 rsm_cattr.attr_tot_import_map_size = 7949 adapter->rsm_attr.attr_tot_import_map_size; 7950 rsm_cattr.attr_max_import_segments = 7951 adapter->rsm_attr.attr_max_import_segments; 7952 rsm_cattr.attr_controller_addr = 7953 adapter->rsm_attr.attr_controller_addr; 7954 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7955 "rsmattr_ddi_copyout done\n")); 7956 if (ddi_copyout((caddr_t)&rsm_cattr, arg, 7957 sizeof (rsmka_int_controller_attr_t), mode)) { 7958 return (RSMERR_BAD_ADDR); 7959 } 7960 else 7961 return (RSM_SUCCESS); 7962 } 7963 7964 /*ARGSUSED*/ 7965 static int 7966 rsm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 7967 int *rvalp) 7968 { 7969 rsmseg_t *seg; 7970 rsmresource_t *res; 7971 minor_t rnum; 7972 rsm_ioctlmsg_t msg = {0}; 7973 int error; 7974 adapter_t *adapter; 7975 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_IOCTL); 7976 7977 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl enter\n")); 7978 7979 if (cmd == RSM_IOCTL_CONSUMEEVENT) { 7980 error = rsm_consumeevent_ioctl((caddr_t)arg, mode); 7981 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7982 "rsm_ioctl RSM_IOCTL_CONSUMEEVENT done: %d\n", error)); 7983 return (error); 7984 } 7985 7986 /* topology cmd does not use the arg common to other cmds */ 7987 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_TOPOLOGY) { 7988 error = rsmka_topology_ioctl((caddr_t)arg, cmd, mode); 7989 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7990 "rsm_ioctl done: %d\n", error)); 7991 return (error); 7992 } 7993 7994 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_IOVEC) { 7995 error = rsm_iovec_ioctl(dev, (caddr_t)arg, cmd, mode, credp); 7996 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 7997 "rsm_ioctl done: %d\n", error)); 7998 return (error); 7999 } 8000 8001 /* 8002 * try to load arguments 8003 */ 8004 if (cmd != RSM_IOCTL_RING_BELL && 8005 rsm_ddi_copyin((caddr_t)arg, &msg, mode)) { 8006 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8007 "rsm_ioctl done: EFAULT\n")); 8008 return (RSMERR_BAD_ADDR); 8009 } 8010 8011 if (cmd == RSM_IOCTL_ATTR) { 8012 adapter = rsm_getadapter(&msg, mode); 8013 if (adapter == NULL) { 8014 DBG_PRINTF((category, RSM_DEBUG, 8015 "rsm_ioctl done: ENODEV\n")); 8016 return (RSMERR_CTLR_NOT_PRESENT); 8017 } 8018 error = rsmattr_ddi_copyout(adapter, msg.arg, mode); 8019 rsmka_release_adapter(adapter); 8020 DBG_PRINTF((category, RSM_DEBUG, 8021 "rsm_ioctl:after copyout %d\n", error)); 8022 return (error); 8023 } 8024 8025 if (cmd == RSM_IOCTL_BAR_INFO) { 8026 /* Return library off,len of barrier page */ 8027 msg.off = barrier_offset; 8028 msg.len = (int)barrier_size; 8029 #ifdef _MULTI_DATAMODEL 8030 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8031 rsm_ioctlmsg32_t msg32; 8032 8033 if (msg.len > UINT_MAX) 8034 msg.len = RSM_MAXSZ_PAGE_ALIGNED; 8035 else 8036 msg32.len = (int32_t)msg.len; 8037 msg32.off = (int32_t)msg.off; 8038 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8039 "rsm_ioctl done\n")); 8040 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8041 sizeof (msg32), mode)) 8042 return (RSMERR_BAD_ADDR); 8043 else 8044 return (RSM_SUCCESS); 8045 } 8046 #endif 8047 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8048 "rsm_ioctl done\n")); 8049 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8050 sizeof (msg), mode)) 8051 return (RSMERR_BAD_ADDR); 8052 else 8053 return (RSM_SUCCESS); 8054 } 8055 8056 if (RSM_IOCTL_CMDGRP(cmd) == RSM_IOCTL_MAP_ADDR) { 8057 /* map the nodeid or hwaddr */ 8058 error = rsmaddr_ioctl(cmd, &msg, mode); 8059 if (error == RSM_SUCCESS) { 8060 #ifdef _MULTI_DATAMODEL 8061 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 8062 rsm_ioctlmsg32_t msg32; 8063 8064 msg32.hwaddr = (uint64_t)msg.hwaddr; 8065 msg32.nodeid = (uint32_t)msg.nodeid; 8066 8067 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8068 "rsm_ioctl done\n")); 8069 if (ddi_copyout((caddr_t)&msg32, (caddr_t)arg, 8070 sizeof (msg32), mode)) 8071 return (RSMERR_BAD_ADDR); 8072 else 8073 return (RSM_SUCCESS); 8074 } 8075 #endif 8076 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8077 "rsm_ioctl done\n")); 8078 if (ddi_copyout((caddr_t)&msg, (caddr_t)arg, 8079 sizeof (msg), mode)) 8080 return (RSMERR_BAD_ADDR); 8081 else 8082 return (RSM_SUCCESS); 8083 } 8084 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8085 "rsm_ioctl done: %d\n", error)); 8086 return (error); 8087 } 8088 8089 /* Find resource and look it in read mode */ 8090 rnum = getminor(dev); 8091 res = rsmresource_lookup(rnum, RSM_NOLOCK); 8092 ASSERT(res != NULL); 8093 8094 /* 8095 * Find command group 8096 */ 8097 switch (RSM_IOCTL_CMDGRP(cmd)) { 8098 case RSM_IOCTL_EXPORT_SEG: 8099 /* 8100 * Export list is searched during publish, loopback and 8101 * remote lookup call. 8102 */ 8103 seg = rsmresource_seg(res, rnum, credp, 8104 RSM_RESOURCE_EXPORT_SEGMENT); 8105 if (seg->s_type == RSM_RESOURCE_EXPORT_SEGMENT) { 8106 error = rsmexport_ioctl(seg, &msg, cmd, arg, mode, 8107 credp); 8108 } else { /* export ioctl on an import/barrier resource */ 8109 error = RSMERR_BAD_SEG_HNDL; 8110 } 8111 break; 8112 case RSM_IOCTL_IMPORT_SEG: 8113 /* Import list is searched during remote unmap call. */ 8114 seg = rsmresource_seg(res, rnum, credp, 8115 RSM_RESOURCE_IMPORT_SEGMENT); 8116 if (seg->s_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8117 error = rsmimport_ioctl(seg, &msg, cmd, arg, mode, 8118 credp); 8119 } else { /* import ioctl on an export/barrier resource */ 8120 error = RSMERR_BAD_SEG_HNDL; 8121 } 8122 break; 8123 case RSM_IOCTL_BAR: 8124 if (res != RSMRC_RESERVED && 8125 res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) { 8126 error = rsmbar_ioctl((rsmseg_t *)res, &msg, cmd, arg, 8127 mode); 8128 } else { /* invalid res value */ 8129 error = RSMERR_BAD_SEG_HNDL; 8130 } 8131 break; 8132 case RSM_IOCTL_BELL: 8133 if (res != RSMRC_RESERVED) { 8134 if (res->rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT) 8135 error = exportbell_ioctl((rsmseg_t *)res, cmd); 8136 else if (res->rsmrc_type == RSM_RESOURCE_EXPORT_SEGMENT) 8137 error = importbell_ioctl((rsmseg_t *)res, cmd); 8138 else /* RSM_RESOURCE_BAR */ 8139 error = RSMERR_BAD_SEG_HNDL; 8140 } else { /* invalid res value */ 8141 error = RSMERR_BAD_SEG_HNDL; 8142 } 8143 break; 8144 default: 8145 error = EINVAL; 8146 } 8147 8148 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_ioctl done: %d\n", 8149 error)); 8150 return (error); 8151 } 8152 8153 8154 /* **************************** Segment Mapping Operations ********* */ 8155 static rsm_mapinfo_t * 8156 rsm_get_mapinfo(rsmseg_t *seg, off_t off, size_t len, off_t *dev_offset, 8157 size_t *map_len) 8158 { 8159 rsm_mapinfo_t *p; 8160 /* 8161 * Find the correct mapinfo structure to use during the mapping 8162 * from the seg->s_mapinfo list. 8163 * The seg->s_mapinfo list contains in reverse order the mappings 8164 * as returned by the RSMPI rsm_map. In rsm_devmap, we need to 8165 * access the correct entry within this list for the mapping 8166 * requested. 8167 * 8168 * The algorithm for selecting a list entry is as follows: 8169 * 8170 * When start_offset of an entry <= off we have found the entry 8171 * we were looking for. Adjust the dev_offset and map_len (needs 8172 * to be PAGESIZE aligned). 8173 */ 8174 p = seg->s_mapinfo; 8175 for (; p; p = p->next) { 8176 if (p->start_offset <= off) { 8177 *dev_offset = p->dev_offset + off - p->start_offset; 8178 *map_len = (len > p->individual_len) ? 8179 p->individual_len : ptob(btopr(len)); 8180 return (p); 8181 } 8182 p = p->next; 8183 } 8184 8185 return (NULL); 8186 } 8187 8188 static void 8189 rsm_free_mapinfo(rsm_mapinfo_t *mapinfo) 8190 { 8191 rsm_mapinfo_t *p; 8192 8193 while (mapinfo != NULL) { 8194 p = mapinfo; 8195 mapinfo = mapinfo->next; 8196 kmem_free(p, sizeof (*p)); 8197 } 8198 } 8199 8200 static int 8201 rsmmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off, 8202 size_t len, void **pvtp) 8203 { 8204 rsmcookie_t *p; 8205 rsmresource_t *res; 8206 rsmseg_t *seg; 8207 minor_t rnum; 8208 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8209 8210 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map enter\n")); 8211 8212 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8213 "rsmmap_map: dhp = %x\n", dhp)); 8214 8215 flags = flags; 8216 8217 rnum = getminor(dev); 8218 res = (rsmresource_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8219 ASSERT(res != NULL); 8220 8221 seg = (rsmseg_t *)res; 8222 8223 rsmseglock_acquire(seg); 8224 8225 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8226 8227 /* 8228 * Allocate structure and add cookie to segment list 8229 */ 8230 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8231 8232 p->c_dhp = dhp; 8233 p->c_off = off; 8234 p->c_len = len; 8235 p->c_next = seg->s_ckl; 8236 seg->s_ckl = p; 8237 8238 *pvtp = (void *)seg; 8239 8240 rsmseglock_release(seg); 8241 8242 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_map done\n")); 8243 return (DDI_SUCCESS); 8244 } 8245 8246 /* 8247 * Page fault handling is done here. The prerequisite mapping setup 8248 * has been done in rsm_devmap with calls to ddi_devmem_setup or 8249 * ddi_umem_setup 8250 */ 8251 static int 8252 rsmmap_access(devmap_cookie_t dhp, void *pvt, offset_t offset, size_t len, 8253 uint_t type, uint_t rw) 8254 { 8255 int e; 8256 rsmseg_t *seg = (rsmseg_t *)pvt; 8257 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8258 8259 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access enter\n")); 8260 8261 rsmseglock_acquire(seg); 8262 8263 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8264 8265 while (seg->s_state == RSM_STATE_MAP_QUIESCE) { 8266 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8267 DBG_PRINTF((category, RSM_DEBUG, 8268 "rsmmap_access done: cv_wait INTR")); 8269 rsmseglock_release(seg); 8270 return (RSMERR_INTERRUPTED); 8271 } 8272 } 8273 8274 ASSERT(seg->s_state == RSM_STATE_DISCONNECT || 8275 seg->s_state == RSM_STATE_ACTIVE); 8276 8277 if (seg->s_state == RSM_STATE_DISCONNECT) 8278 seg->s_flags |= RSM_IMPORT_DUMMY; 8279 8280 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8281 "rsmmap_access: dhp = %x\n", dhp)); 8282 8283 rsmseglock_release(seg); 8284 8285 if (e = devmap_load(dhp, offset, len, type, rw)) { 8286 DBG_PRINTF((category, RSM_ERR, "devmap_load failed\n")); 8287 } 8288 8289 8290 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_access done\n")); 8291 8292 return (e); 8293 } 8294 8295 static int 8296 rsmmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp, 8297 void **newpvt) 8298 { 8299 rsmseg_t *seg = (rsmseg_t *)oldpvt; 8300 rsmcookie_t *p, *old; 8301 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8302 8303 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup enter\n")); 8304 8305 /* 8306 * Same as map, create an entry to hold cookie and add it to 8307 * connect segment list. The oldpvt is a pointer to segment. 8308 * Return segment pointer in newpvt. 8309 */ 8310 rsmseglock_acquire(seg); 8311 8312 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8313 8314 /* 8315 * Find old cookie 8316 */ 8317 for (old = seg->s_ckl; old != NULL; old = old->c_next) { 8318 if (old->c_dhp == dhp) { 8319 break; 8320 } 8321 } 8322 if (old == NULL) { 8323 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8324 "rsmmap_dup done: EINVAL\n")); 8325 rsmseglock_release(seg); 8326 return (EINVAL); 8327 } 8328 8329 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8330 8331 p->c_dhp = new_dhp; 8332 p->c_off = old->c_off; 8333 p->c_len = old->c_len; 8334 p->c_next = seg->s_ckl; 8335 seg->s_ckl = p; 8336 8337 *newpvt = (void *)seg; 8338 8339 rsmseglock_release(seg); 8340 8341 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_dup done\n")); 8342 8343 return (DDI_SUCCESS); 8344 } 8345 8346 static void 8347 rsmmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len, 8348 devmap_cookie_t new_dhp1, void **pvtp1, 8349 devmap_cookie_t new_dhp2, void **pvtp2) 8350 { 8351 /* 8352 * Remove pvtp structure from segment list. 8353 */ 8354 rsmseg_t *seg = (rsmseg_t *)pvtp; 8355 int freeflag; 8356 8357 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8358 8359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap enter\n")); 8360 8361 off = off; len = len; 8362 pvtp1 = pvtp1; pvtp2 = pvtp2; 8363 8364 rsmseglock_acquire(seg); 8365 8366 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8367 8368 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8369 "rsmmap_unmap: dhp = %x\n", dhp)); 8370 /* 8371 * We can go ahead and remove the dhps even if we are in 8372 * the MAPPING state because the dhps being removed here 8373 * belong to a different mmap and we are holding the segment 8374 * lock. 8375 */ 8376 if (new_dhp1 == NULL && new_dhp2 == NULL) { 8377 /* find and remove dhp handle */ 8378 rsmcookie_t *tmp, **back = &seg->s_ckl; 8379 8380 while (*back != NULL) { 8381 tmp = *back; 8382 if (tmp->c_dhp == dhp) { 8383 *back = tmp->c_next; 8384 kmem_free(tmp, sizeof (*tmp)); 8385 break; 8386 } 8387 back = &tmp->c_next; 8388 } 8389 } else { 8390 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8391 "rsmmap_unmap:parital unmap" 8392 "new_dhp1 %lx, new_dhp2 %lx\n", 8393 (size_t)new_dhp1, (size_t)new_dhp2)); 8394 } 8395 8396 /* 8397 * rsmmap_unmap is called for each mapping cookie on the list. 8398 * When the list becomes empty and we are not in the MAPPING 8399 * state then unmap in the rsmpi driver. 8400 */ 8401 if ((seg->s_ckl == NULL) && (seg->s_state != RSM_STATE_MAPPING)) 8402 (void) rsm_unmap(seg); 8403 8404 if (seg->s_state == RSM_STATE_END && seg->s_ckl == NULL) { 8405 freeflag = 1; 8406 } else { 8407 freeflag = 0; 8408 } 8409 8410 rsmseglock_release(seg); 8411 8412 if (freeflag) { 8413 /* Free the segment structure */ 8414 rsmseg_free(seg); 8415 } 8416 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsmmap_unmap done\n")); 8417 8418 } 8419 8420 static struct devmap_callback_ctl rsmmap_ops = { 8421 DEVMAP_OPS_REV, /* devmap_ops version number */ 8422 rsmmap_map, /* devmap_ops map routine */ 8423 rsmmap_access, /* devmap_ops access routine */ 8424 rsmmap_dup, /* devmap_ops dup routine */ 8425 rsmmap_unmap, /* devmap_ops unmap routine */ 8426 }; 8427 8428 static int 8429 rsm_devmap(dev_t dev, devmap_cookie_t dhc, offset_t off, size_t len, 8430 size_t *maplen, uint_t model /*ARGSUSED*/) 8431 { 8432 struct devmap_callback_ctl *callbackops = &rsmmap_ops; 8433 int err; 8434 uint_t maxprot; 8435 minor_t rnum; 8436 rsmseg_t *seg; 8437 off_t dev_offset; 8438 size_t cur_len; 8439 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8440 8441 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_devmap enter\n")); 8442 8443 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8444 "rsm_devmap: off = %lx, len = %lx\n", off, len)); 8445 rnum = getminor(dev); 8446 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_NOLOCK); 8447 ASSERT(seg != NULL); 8448 8449 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8450 if ((off == barrier_offset) && 8451 (len == barrier_size)) { 8452 8453 ASSERT(bar_va != NULL && bar_cookie != NULL); 8454 8455 /* 8456 * The offset argument in devmap_umem_setup represents 8457 * the offset within the kernel memory defined by the 8458 * cookie. We use this offset as barrier_offset. 8459 */ 8460 err = devmap_umem_setup(dhc, rsm_dip, NULL, bar_cookie, 8461 barrier_offset, len, PROT_USER|PROT_READ, 8462 DEVMAP_DEFAULTS, 0); 8463 8464 if (err != 0) { 8465 DBG_PRINTF((category, RSM_ERR, 8466 "rsm_devmap done: %d\n", err)); 8467 return (RSMERR_MAP_FAILED); 8468 } 8469 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8470 "rsm_devmap done: %d\n", err)); 8471 8472 *maplen = barrier_size; 8473 8474 return (err); 8475 } else { 8476 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8477 "rsm_devmap done: %d\n", err)); 8478 return (RSMERR_MAP_FAILED); 8479 } 8480 } 8481 8482 ASSERT(seg->s_hdr.rsmrc_type == RSM_RESOURCE_IMPORT_SEGMENT); 8483 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8484 8485 /* 8486 * Make sure we still have permission for the map operation. 8487 */ 8488 maxprot = PROT_USER; 8489 if (seg->s_mode & RSM_PERM_READ) { 8490 maxprot |= PROT_READ; 8491 } 8492 8493 if (seg->s_mode & RSM_PERM_WRITE) { 8494 maxprot |= PROT_WRITE; 8495 } 8496 8497 /* 8498 * For each devmap call, rsmmap_map is called. This maintains driver 8499 * private information for the mapping. Thus, if there are multiple 8500 * devmap calls there will be multiple rsmmap_map calls and for each 8501 * call, the mapping information will be stored. 8502 * In case of an error during the processing of the devmap call, error 8503 * will be returned. This error return causes the caller of rsm_devmap 8504 * to undo all the mappings by calling rsmmap_unmap for each one. 8505 * rsmmap_unmap will free up the private information for the requested 8506 * mapping. 8507 */ 8508 if (seg->s_node != my_nodeid) { 8509 rsm_mapinfo_t *p; 8510 8511 p = rsm_get_mapinfo(seg, off, len, &dev_offset, &cur_len); 8512 if (p == NULL) { 8513 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8514 "rsm_devmap: incorrect mapping info\n")); 8515 return (RSMERR_MAP_FAILED); 8516 } 8517 err = devmap_devmem_setup(dhc, p->dip, 8518 callbackops, p->dev_register, 8519 dev_offset, cur_len, maxprot, 8520 DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS, 0); 8521 8522 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8523 "rsm_devmap: dip=%lx,dreg=%lu,doff=%lx," 8524 "off=%lx,len=%lx\n", 8525 p->dip, p->dev_register, dev_offset, off, cur_len)); 8526 8527 if (err != 0) { 8528 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8529 "rsm_devmap: devmap_devmem_setup failed %d\n", 8530 err)); 8531 return (RSMERR_MAP_FAILED); 8532 } 8533 /* cur_len is always an integral multiple pagesize */ 8534 ASSERT((cur_len & (PAGESIZE-1)) == 0); 8535 *maplen = cur_len; 8536 return (err); 8537 8538 } else { 8539 err = devmap_umem_setup(dhc, rsm_dip, callbackops, 8540 seg->s_cookie, off, len, maxprot, 8541 DEVMAP_ALLOW_REMAP|DEVMAP_DEFAULTS, 0); 8542 if (err != 0) { 8543 DBG_PRINTF((category, RSM_DEBUG, 8544 "rsm_devmap: devmap_umem_setup failed %d\n", 8545 err)); 8546 return (RSMERR_MAP_FAILED); 8547 } 8548 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8549 "rsm_devmap: loopback done\n")); 8550 8551 *maplen = ptob(btopr(len)); 8552 8553 return (err); 8554 } 8555 } 8556 8557 /* 8558 * We can use the devmap framework for mapping device memory to user space by 8559 * specifying this routine in the rsm_cb_ops structure. The kernel mmap 8560 * processing calls this entry point and devmap_setup is called within this 8561 * function, which eventually calls rsm_devmap 8562 */ 8563 static int 8564 rsm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len, 8565 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred) 8566 { 8567 int error = 0; 8568 int old_state; 8569 minor_t rnum; 8570 rsmseg_t *seg, *eseg; 8571 adapter_t *adapter; 8572 rsm_import_share_t *sharedp; 8573 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_DDI); 8574 8575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "rsm_segmap enter\n")); 8576 8577 /* 8578 * find segment 8579 */ 8580 rnum = getminor(dev); 8581 seg = (rsmseg_t *)rsmresource_lookup(rnum, RSM_LOCK); 8582 8583 if (seg == NULL) { 8584 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8585 "rsm_segmap done: invalid segment\n")); 8586 return (EINVAL); 8587 } 8588 8589 /* 8590 * the user is trying to map a resource that has not been 8591 * defined yet. The library uses this to map in the 8592 * barrier page. 8593 */ 8594 if (seg->s_hdr.rsmrc_type == RSM_RESOURCE_BAR) { 8595 rsmseglock_release(seg); 8596 8597 /* 8598 * The mapping for the barrier page is identified 8599 * by the special offset barrier_offset 8600 */ 8601 8602 if (off == (off_t)barrier_offset || 8603 len == (off_t)barrier_size) { 8604 if (bar_cookie == NULL || bar_va == NULL) { 8605 DBG_PRINTF((category, RSM_DEBUG, 8606 "rsm_segmap: bar cookie/va is NULL\n")); 8607 return (EINVAL); 8608 } 8609 8610 error = devmap_setup(dev, (offset_t)off, as, addrp, 8611 (size_t)len, prot, maxprot, flags, cred); 8612 8613 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8614 "rsm_segmap done: %d\n", error)); 8615 return (error); 8616 } else { 8617 DBG_PRINTF((category, RSM_DEBUG, 8618 "rsm_segmap: bad offset/length\n")); 8619 return (EINVAL); 8620 } 8621 } 8622 8623 /* Make sure you can only map imported segments */ 8624 if (seg->s_hdr.rsmrc_type != RSM_RESOURCE_IMPORT_SEGMENT) { 8625 rsmseglock_release(seg); 8626 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8627 "rsm_segmap done: not an import segment\n")); 8628 return (EINVAL); 8629 } 8630 /* check means library is broken */ 8631 ASSERT(seg->s_hdr.rsmrc_num == rnum); 8632 8633 /* wait for the segment to become unquiesced */ 8634 while (seg->s_state == RSM_STATE_CONN_QUIESCE) { 8635 if (cv_wait_sig(&seg->s_cv, &seg->s_lock) == 0) { 8636 rsmseglock_release(seg); 8637 DBG_PRINTF((category, RSM_DEBUG, 8638 "rsm_segmap done: cv_wait INTR")); 8639 return (ENODEV); 8640 } 8641 } 8642 8643 /* wait until segment leaves the mapping state */ 8644 while (seg->s_state == RSM_STATE_MAPPING) 8645 cv_wait(&seg->s_cv, &seg->s_lock); 8646 8647 /* 8648 * we allow multiple maps of the same segment in the KA 8649 * and it works because we do an rsmpi map of the whole 8650 * segment during the first map and all the device mapping 8651 * information needed in rsm_devmap is in the mapinfo list. 8652 */ 8653 if ((seg->s_state != RSM_STATE_CONNECT) && 8654 (seg->s_state != RSM_STATE_ACTIVE)) { 8655 rsmseglock_release(seg); 8656 DBG_PRINTF((category, RSM_DEBUG, 8657 "rsm_segmap done: segment not connected\n")); 8658 return (ENODEV); 8659 } 8660 8661 /* 8662 * Make sure we are not mapping a larger segment than what's 8663 * exported 8664 */ 8665 if ((size_t)off + ptob(btopr(len)) > seg->s_len) { 8666 rsmseglock_release(seg); 8667 DBG_PRINTF((category, RSM_DEBUG, 8668 "rsm_segmap done: off+len>seg size\n")); 8669 return (ENXIO); 8670 } 8671 8672 /* 8673 * Make sure we still have permission for the map operation. 8674 */ 8675 maxprot = PROT_USER; 8676 if (seg->s_mode & RSM_PERM_READ) { 8677 maxprot |= PROT_READ; 8678 } 8679 8680 if (seg->s_mode & RSM_PERM_WRITE) { 8681 maxprot |= PROT_WRITE; 8682 } 8683 8684 if ((prot & maxprot) != prot) { 8685 /* No permission */ 8686 rsmseglock_release(seg); 8687 DBG_PRINTF((category, RSM_DEBUG, 8688 "rsm_segmap done: no permission\n")); 8689 return (EACCES); 8690 } 8691 8692 old_state = seg->s_state; 8693 8694 ASSERT(seg->s_share != NULL); 8695 8696 rsmsharelock_acquire(seg); 8697 8698 sharedp = seg->s_share; 8699 8700 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 8701 "rsm_segmap:RSMSI_STATE=%d\n", sharedp->rsmsi_state)); 8702 8703 if ((sharedp->rsmsi_state != RSMSI_STATE_CONNECTED) && 8704 (sharedp->rsmsi_state != RSMSI_STATE_MAPPED)) { 8705 rsmsharelock_release(seg); 8706 rsmseglock_release(seg); 8707 DBG_PRINTF((category, RSM_DEBUG, 8708 "rsm_segmap done:RSMSI_STATE %d invalid\n", 8709 sharedp->rsmsi_state)); 8710 return (ENODEV); 8711 } 8712 8713 /* 8714 * Do the map - since we want importers to share mappings 8715 * we do the rsmpi map for the whole segment 8716 */ 8717 if (seg->s_node != my_nodeid) { 8718 uint_t dev_register; 8719 off_t dev_offset; 8720 dev_info_t *dip; 8721 size_t tmp_len; 8722 size_t total_length_mapped = 0; 8723 size_t length_to_map = seg->s_len; 8724 off_t tmp_off = 0; 8725 rsm_mapinfo_t *p; 8726 8727 /* 8728 * length_to_map = seg->s_len is always an integral 8729 * multiple of PAGESIZE. Length mapped in each entry in mapinfo 8730 * list is a multiple of PAGESIZE - RSMPI map ensures this 8731 */ 8732 8733 adapter = seg->s_adapter; 8734 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8735 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8736 8737 if (sharedp->rsmsi_state == RSMSI_STATE_CONNECTED) { 8738 error = 0; 8739 /* map the whole segment */ 8740 while (total_length_mapped < seg->s_len) { 8741 tmp_len = 0; 8742 8743 error = adapter->rsmpi_ops->rsm_map( 8744 seg->s_handle.in, tmp_off, 8745 length_to_map, &tmp_len, 8746 &dip, &dev_register, &dev_offset, 8747 NULL, NULL); 8748 8749 if (error != 0) 8750 break; 8751 8752 /* 8753 * Store the mapping info obtained from rsm_map 8754 */ 8755 p = kmem_alloc(sizeof (*p), KM_SLEEP); 8756 p->dev_register = dev_register; 8757 p->dev_offset = dev_offset; 8758 p->dip = dip; 8759 p->individual_len = tmp_len; 8760 p->start_offset = tmp_off; 8761 p->next = sharedp->rsmsi_mapinfo; 8762 sharedp->rsmsi_mapinfo = p; 8763 8764 total_length_mapped += tmp_len; 8765 length_to_map -= tmp_len; 8766 tmp_off += tmp_len; 8767 } 8768 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8769 8770 if (error != RSM_SUCCESS) { 8771 /* Check if this is the the first rsm_map */ 8772 if (sharedp->rsmsi_mapinfo != NULL) { 8773 /* 8774 * A single rsm_unmap undoes 8775 * multiple rsm_maps. 8776 */ 8777 (void) seg->s_adapter->rsmpi_ops-> 8778 rsm_unmap(sharedp->rsmsi_handle); 8779 rsm_free_mapinfo(sharedp-> 8780 rsmsi_mapinfo); 8781 } 8782 sharedp->rsmsi_mapinfo = NULL; 8783 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8784 rsmsharelock_release(seg); 8785 rsmseglock_release(seg); 8786 DBG_PRINTF((category, RSM_DEBUG, 8787 "rsm_segmap done: rsmpi map err %d\n", 8788 error)); 8789 ASSERT(error != RSMERR_BAD_LENGTH && 8790 error != RSMERR_BAD_MEM_ALIGNMENT && 8791 error != RSMERR_BAD_SEG_HNDL); 8792 if (error == RSMERR_UNSUPPORTED_OPERATION) 8793 return (ENOTSUP); 8794 else if (error == RSMERR_INSUFFICIENT_RESOURCES) 8795 return (EAGAIN); 8796 else if (error == RSMERR_CONN_ABORTED) 8797 return (ENODEV); 8798 else 8799 return (error); 8800 } else { 8801 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8802 } 8803 } else { 8804 seg->s_mapinfo = sharedp->rsmsi_mapinfo; 8805 } 8806 8807 sharedp->rsmsi_mapcnt++; 8808 8809 rsmsharelock_release(seg); 8810 8811 /* move to an intermediate mapping state */ 8812 seg->s_state = RSM_STATE_MAPPING; 8813 rsmseglock_release(seg); 8814 8815 error = devmap_setup(dev, (offset_t)off, as, addrp, 8816 len, prot, maxprot, flags, cred); 8817 8818 rsmseglock_acquire(seg); 8819 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8820 8821 if (error == DDI_SUCCESS) { 8822 seg->s_state = RSM_STATE_ACTIVE; 8823 } else { 8824 rsmsharelock_acquire(seg); 8825 8826 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8827 8828 sharedp->rsmsi_mapcnt--; 8829 if (sharedp->rsmsi_mapcnt == 0) { 8830 /* unmap the shared RSMPI mapping */ 8831 ASSERT(sharedp->rsmsi_handle != NULL); 8832 (void) adapter->rsmpi_ops-> 8833 rsm_unmap(sharedp->rsmsi_handle); 8834 rsm_free_mapinfo(sharedp->rsmsi_mapinfo); 8835 sharedp->rsmsi_mapinfo = NULL; 8836 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8837 } 8838 8839 rsmsharelock_release(seg); 8840 seg->s_state = old_state; 8841 DBG_PRINTF((category, RSM_ERR, 8842 "rsm: devmap_setup failed %d\n", error)); 8843 } 8844 cv_broadcast(&seg->s_cv); 8845 rsmseglock_release(seg); 8846 DBG_PRINTF((category, RSM_DEBUG_LVL2, "rsm_segmap done: %d\n", 8847 error)); 8848 return (error); 8849 } else { 8850 /* 8851 * For loopback, the export segment mapping cookie (s_cookie) 8852 * is also used as the s_cookie value for its import segments 8853 * during mapping. 8854 * Note that reference counting for s_cookie of the export 8855 * segment is not required due to the following: 8856 * We never have a case of the export segment being destroyed, 8857 * leaving the import segments with a stale value for the 8858 * s_cookie field, since a force disconnect is done prior to a 8859 * destroy of an export segment. The force disconnect causes 8860 * the s_cookie value to be reset to NULL. Also for the 8861 * rsm_rebind operation, we change the s_cookie value of the 8862 * export segment as well as of all its local (loopback) 8863 * importers. 8864 */ 8865 DBG_ADDCATEGORY(category, RSM_LOOPBACK); 8866 8867 rsmsharelock_release(seg); 8868 /* 8869 * In order to maintain the lock ordering between the export 8870 * and import segment locks, we need to acquire the export 8871 * segment lock first and only then acquire the import 8872 * segment lock. 8873 * The above is necessary to avoid any deadlock scenarios 8874 * with rsm_rebind which also acquires both the export 8875 * and import segment locks in the above mentioned order. 8876 * Based on code inspection, there seem to be no other 8877 * situations in which both the export and import segment 8878 * locks are acquired either in the same or opposite order 8879 * as mentioned above. 8880 * Thus in order to conform to the above lock order, we 8881 * need to change the state of the import segment to 8882 * RSM_STATE_MAPPING, release the lock. Once this is done we 8883 * can now safely acquire the export segment lock first 8884 * followed by the import segment lock which is as per 8885 * the lock order mentioned above. 8886 */ 8887 /* move to an intermediate mapping state */ 8888 seg->s_state = RSM_STATE_MAPPING; 8889 rsmseglock_release(seg); 8890 8891 eseg = rsmexport_lookup(seg->s_key); 8892 8893 if (eseg == NULL) { 8894 rsmseglock_acquire(seg); 8895 /* 8896 * Revert to old_state and signal any waiters 8897 * The shared state is not changed 8898 */ 8899 8900 seg->s_state = old_state; 8901 cv_broadcast(&seg->s_cv); 8902 rsmseglock_release(seg); 8903 DBG_PRINTF((category, RSM_DEBUG, 8904 "rsm_segmap done: key %d not found\n", seg->s_key)); 8905 return (ENODEV); 8906 } 8907 8908 rsmsharelock_acquire(seg); 8909 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_CONNECTED || 8910 sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8911 8912 sharedp->rsmsi_mapcnt++; 8913 sharedp->rsmsi_state = RSMSI_STATE_MAPPED; 8914 rsmsharelock_release(seg); 8915 8916 ASSERT(eseg->s_cookie != NULL); 8917 8918 /* 8919 * It is not required or necessary to acquire the import 8920 * segment lock here to change the value of s_cookie since 8921 * no one will touch the import segment as long as it is 8922 * in the RSM_STATE_MAPPING state. 8923 */ 8924 seg->s_cookie = eseg->s_cookie; 8925 8926 rsmseglock_release(eseg); 8927 8928 error = devmap_setup(dev, (offset_t)off, as, addrp, (size_t)len, 8929 prot, maxprot, flags, cred); 8930 8931 rsmseglock_acquire(seg); 8932 ASSERT(seg->s_state == RSM_STATE_MAPPING); 8933 if (error == 0) { 8934 seg->s_state = RSM_STATE_ACTIVE; 8935 } else { 8936 rsmsharelock_acquire(seg); 8937 8938 ASSERT(sharedp->rsmsi_state == RSMSI_STATE_MAPPED); 8939 8940 sharedp->rsmsi_mapcnt--; 8941 if (sharedp->rsmsi_mapcnt == 0) { 8942 sharedp->rsmsi_mapinfo = NULL; 8943 sharedp->rsmsi_state = RSMSI_STATE_CONNECTED; 8944 } 8945 rsmsharelock_release(seg); 8946 seg->s_state = old_state; 8947 seg->s_cookie = NULL; 8948 } 8949 cv_broadcast(&seg->s_cv); 8950 rsmseglock_release(seg); 8951 DBG_PRINTF((category, RSM_DEBUG_LVL2, 8952 "rsm_segmap done: %d\n", error)); 8953 return (error); 8954 } 8955 } 8956 8957 int 8958 rsmka_null_seg_create( 8959 rsm_controller_handle_t argcp, 8960 rsm_memseg_export_handle_t *handle, 8961 size_t size, 8962 uint_t flags, 8963 rsm_memory_local_t *memory, 8964 rsm_resource_callback_t callback, 8965 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8966 { 8967 return (RSM_SUCCESS); 8968 } 8969 8970 8971 int 8972 rsmka_null_seg_destroy( 8973 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 8974 { 8975 return (RSM_SUCCESS); 8976 } 8977 8978 8979 int 8980 rsmka_null_bind( 8981 rsm_memseg_export_handle_t argmemseg, 8982 off_t offset, 8983 rsm_memory_local_t *argmemory, 8984 rsm_resource_callback_t callback, 8985 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 8986 { 8987 return (RSM_SUCCESS); 8988 } 8989 8990 8991 int 8992 rsmka_null_unbind( 8993 rsm_memseg_export_handle_t argmemseg, 8994 off_t offset, 8995 size_t length /*ARGSUSED*/) 8996 { 8997 return (DDI_SUCCESS); 8998 } 8999 9000 int 9001 rsmka_null_rebind( 9002 rsm_memseg_export_handle_t argmemseg, 9003 off_t offset, 9004 rsm_memory_local_t *memory, 9005 rsm_resource_callback_t callback, 9006 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9007 { 9008 return (RSM_SUCCESS); 9009 } 9010 9011 int 9012 rsmka_null_publish( 9013 rsm_memseg_export_handle_t argmemseg, 9014 rsm_access_entry_t access_list[], 9015 uint_t access_list_length, 9016 rsm_memseg_id_t segment_id, 9017 rsm_resource_callback_t callback, 9018 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9019 { 9020 return (RSM_SUCCESS); 9021 } 9022 9023 9024 int 9025 rsmka_null_republish( 9026 rsm_memseg_export_handle_t memseg, 9027 rsm_access_entry_t access_list[], 9028 uint_t access_list_length, 9029 rsm_resource_callback_t callback, 9030 rsm_resource_callback_arg_t callback_arg /*ARGSUSED*/) 9031 { 9032 return (RSM_SUCCESS); 9033 } 9034 9035 int 9036 rsmka_null_unpublish( 9037 rsm_memseg_export_handle_t argmemseg /*ARGSUSED*/) 9038 { 9039 return (RSM_SUCCESS); 9040 } 9041 9042 9043 void 9044 rsmka_init_loopback() 9045 { 9046 rsm_ops_t *ops = &null_rsmpi_ops; 9047 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL | RSM_LOOPBACK); 9048 9049 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9050 "rsmka_init_loopback enter\n")); 9051 9052 /* initialize null ops vector */ 9053 ops->rsm_seg_create = rsmka_null_seg_create; 9054 ops->rsm_seg_destroy = rsmka_null_seg_destroy; 9055 ops->rsm_bind = rsmka_null_bind; 9056 ops->rsm_unbind = rsmka_null_unbind; 9057 ops->rsm_rebind = rsmka_null_rebind; 9058 ops->rsm_publish = rsmka_null_publish; 9059 ops->rsm_unpublish = rsmka_null_unpublish; 9060 ops->rsm_republish = rsmka_null_republish; 9061 9062 /* initialize attributes for loopback adapter */ 9063 loopback_attr.attr_name = loopback_str; 9064 loopback_attr.attr_page_size = 0x8; /* 8K */ 9065 9066 /* initialize loopback adapter */ 9067 loopback_adapter.rsm_attr = loopback_attr; 9068 loopback_adapter.rsmpi_ops = &null_rsmpi_ops; 9069 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9070 "rsmka_init_loopback done\n")); 9071 } 9072 9073 /* ************** DR functions ********************************** */ 9074 static void 9075 rsm_quiesce_exp_seg(rsmresource_t *resp) 9076 { 9077 int recheck_state; 9078 rsmseg_t *segp = (rsmseg_t *)resp; 9079 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9080 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9081 9082 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9083 "%s enter: key=%u\n", function, segp->s_key)); 9084 9085 rsmseglock_acquire(segp); 9086 do { 9087 recheck_state = 0; 9088 if ((segp->s_state == RSM_STATE_NEW_QUIESCED) || 9089 (segp->s_state == RSM_STATE_BIND_QUIESCED) || 9090 (segp->s_state == RSM_STATE_EXPORT_QUIESCING) || 9091 (segp->s_state == RSM_STATE_EXPORT_QUIESCED)) { 9092 rsmseglock_release(segp); 9093 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9094 "%s done:state =%d\n", function, 9095 segp->s_state)); 9096 return; 9097 } 9098 9099 if (segp->s_state == RSM_STATE_NEW) { 9100 segp->s_state = RSM_STATE_NEW_QUIESCED; 9101 rsmseglock_release(segp); 9102 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9103 "%s done:state =%d\n", function, 9104 segp->s_state)); 9105 return; 9106 } 9107 9108 if (segp->s_state == RSM_STATE_BIND) { 9109 /* unbind */ 9110 (void) rsm_unbind_pages(segp); 9111 segp->s_state = RSM_STATE_BIND_QUIESCED; 9112 rsmseglock_release(segp); 9113 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9114 "%s done:state =%d\n", function, 9115 segp->s_state)); 9116 return; 9117 } 9118 9119 if (segp->s_state == RSM_STATE_EXPORT) { 9120 /* 9121 * wait for putv/getv to complete if the segp is 9122 * a local memory handle 9123 */ 9124 while ((segp->s_state == RSM_STATE_EXPORT) && 9125 (segp->s_rdmacnt != 0)) { 9126 cv_wait(&segp->s_cv, &segp->s_lock); 9127 } 9128 9129 if (segp->s_state != RSM_STATE_EXPORT) { 9130 /* 9131 * state changed need to see what it 9132 * should be changed to. 9133 */ 9134 recheck_state = 1; 9135 continue; 9136 } 9137 9138 segp->s_state = RSM_STATE_EXPORT_QUIESCING; 9139 rsmseglock_release(segp); 9140 /* 9141 * send SUSPEND messages - currently it will be 9142 * done at the end 9143 */ 9144 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9145 "%s done:state =%d\n", function, 9146 segp->s_state)); 9147 return; 9148 } 9149 } while (recheck_state); 9150 9151 rsmseglock_release(segp); 9152 } 9153 9154 static void 9155 rsm_unquiesce_exp_seg(rsmresource_t *resp) 9156 { 9157 int ret; 9158 rsmseg_t *segp = (rsmseg_t *)resp; 9159 rsmapi_access_entry_t *acl; 9160 rsm_access_entry_t *rsmpi_acl; 9161 int acl_len; 9162 int create_flags = 0; 9163 struct buf *xbuf; 9164 rsm_memory_local_t mem; 9165 adapter_t *adapter; 9166 dev_t sdev = 0; 9167 rsm_resource_callback_t callback_flag; 9168 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9169 DBG_DEFINE_STR(function, "rsm_unquiesce_exp_seg"); 9170 9171 rsmseglock_acquire(segp); 9172 9173 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9174 "%s enter: key=%u, state=%d\n", function, segp->s_key, 9175 segp->s_state)); 9176 9177 if ((segp->s_state == RSM_STATE_NEW) || 9178 (segp->s_state == RSM_STATE_BIND) || 9179 (segp->s_state == RSM_STATE_EXPORT)) { 9180 rsmseglock_release(segp); 9181 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9182 function, segp->s_state)); 9183 return; 9184 } 9185 9186 if (segp->s_state == RSM_STATE_NEW_QUIESCED) { 9187 segp->s_state = RSM_STATE_NEW; 9188 cv_broadcast(&segp->s_cv); 9189 rsmseglock_release(segp); 9190 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done:state=%d\n", 9191 function, segp->s_state)); 9192 return; 9193 } 9194 9195 if (segp->s_state == RSM_STATE_BIND_QUIESCED) { 9196 /* bind the segment */ 9197 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9198 segp->s_len, segp->s_proc); 9199 if (ret == RSM_SUCCESS) { /* bind successful */ 9200 segp->s_state = RSM_STATE_BIND; 9201 } else { /* bind failed - resource unavailable */ 9202 segp->s_state = RSM_STATE_NEW; 9203 } 9204 cv_broadcast(&segp->s_cv); 9205 rsmseglock_release(segp); 9206 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9207 "%s done: bind_qscd bind = %d\n", function, ret)); 9208 return; 9209 } 9210 9211 while (segp->s_state == RSM_STATE_EXPORT_QUIESCING) { 9212 /* wait for the segment to move to EXPORT_QUIESCED state */ 9213 cv_wait(&segp->s_cv, &segp->s_lock); 9214 } 9215 9216 if (segp->s_state == RSM_STATE_EXPORT_QUIESCED) { 9217 /* bind the segment */ 9218 ret = rsm_bind_pages(&segp->s_cookie, segp->s_region.r_vaddr, 9219 segp->s_len, segp->s_proc); 9220 9221 if (ret != RSM_SUCCESS) { 9222 /* bind failed - resource unavailable */ 9223 acl_len = segp->s_acl_len; 9224 acl = segp->s_acl; 9225 rsmpi_acl = segp->s_acl_in; 9226 segp->s_acl_len = 0; 9227 segp->s_acl = NULL; 9228 segp->s_acl_in = NULL; 9229 rsmseglock_release(segp); 9230 9231 rsmexport_rm(segp); 9232 rsmacl_free(acl, acl_len); 9233 rsmpiacl_free(rsmpi_acl, acl_len); 9234 9235 rsmseglock_acquire(segp); 9236 segp->s_state = RSM_STATE_NEW; 9237 cv_broadcast(&segp->s_cv); 9238 rsmseglock_release(segp); 9239 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9240 "%s done: exp_qscd bind failed = %d\n", 9241 function, ret)); 9242 return; 9243 } 9244 /* 9245 * publish the segment 9246 * if successful 9247 * segp->s_state = RSM_STATE_EXPORT; 9248 * else failed 9249 * segp->s_state = RSM_STATE_BIND; 9250 */ 9251 9252 /* check whether it is a local_memory_handle */ 9253 if (segp->s_acl != (rsmapi_access_entry_t *)NULL) { 9254 if ((segp->s_acl[0].ae_node == my_nodeid) && 9255 (segp->s_acl[0].ae_permission == 0)) { 9256 segp->s_state = RSM_STATE_EXPORT; 9257 cv_broadcast(&segp->s_cv); 9258 rsmseglock_release(segp); 9259 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9260 "%s done:exp_qscd\n", function)); 9261 return; 9262 } 9263 } 9264 xbuf = ddi_umem_iosetup(segp->s_cookie, 0, segp->s_len, B_WRITE, 9265 sdev, 0, NULL, DDI_UMEM_SLEEP); 9266 ASSERT(xbuf != NULL); 9267 9268 mem.ms_type = RSM_MEM_BUF; 9269 mem.ms_bp = xbuf; 9270 9271 adapter = segp->s_adapter; 9272 9273 if (segp->s_flags & RSMKA_ALLOW_UNBIND_REBIND) { 9274 create_flags = RSM_ALLOW_UNBIND_REBIND; 9275 } 9276 9277 if (segp->s_flags & RSMKA_SET_RESOURCE_DONTWAIT) { 9278 callback_flag = RSM_RESOURCE_DONTWAIT; 9279 } else { 9280 callback_flag = RSM_RESOURCE_SLEEP; 9281 } 9282 9283 ret = adapter->rsmpi_ops->rsm_seg_create( 9284 adapter->rsmpi_handle, &segp->s_handle.out, 9285 segp->s_len, create_flags, &mem, 9286 callback_flag, NULL); 9287 9288 if (ret != RSM_SUCCESS) { 9289 acl_len = segp->s_acl_len; 9290 acl = segp->s_acl; 9291 rsmpi_acl = segp->s_acl_in; 9292 segp->s_acl_len = 0; 9293 segp->s_acl = NULL; 9294 segp->s_acl_in = NULL; 9295 rsmseglock_release(segp); 9296 9297 rsmexport_rm(segp); 9298 rsmacl_free(acl, acl_len); 9299 rsmpiacl_free(rsmpi_acl, acl_len); 9300 9301 rsmseglock_acquire(segp); 9302 segp->s_state = RSM_STATE_BIND; 9303 cv_broadcast(&segp->s_cv); 9304 rsmseglock_release(segp); 9305 DBG_PRINTF((category, RSM_ERR, 9306 "%s done: exp_qscd create failed = %d\n", 9307 function, ret)); 9308 return; 9309 } 9310 9311 ret = adapter->rsmpi_ops->rsm_publish( 9312 segp->s_handle.out, segp->s_acl_in, segp->s_acl_len, 9313 segp->s_segid, RSM_RESOURCE_DONTWAIT, NULL); 9314 9315 if (ret != RSM_SUCCESS) { 9316 acl_len = segp->s_acl_len; 9317 acl = segp->s_acl; 9318 rsmpi_acl = segp->s_acl_in; 9319 segp->s_acl_len = 0; 9320 segp->s_acl = NULL; 9321 segp->s_acl_in = NULL; 9322 adapter->rsmpi_ops->rsm_seg_destroy(segp->s_handle.out); 9323 rsmseglock_release(segp); 9324 9325 rsmexport_rm(segp); 9326 rsmacl_free(acl, acl_len); 9327 rsmpiacl_free(rsmpi_acl, acl_len); 9328 9329 rsmseglock_acquire(segp); 9330 segp->s_state = RSM_STATE_BIND; 9331 cv_broadcast(&segp->s_cv); 9332 rsmseglock_release(segp); 9333 DBG_PRINTF((category, RSM_ERR, 9334 "%s done: exp_qscd publish failed = %d\n", 9335 function, ret)); 9336 return; 9337 } 9338 9339 segp->s_state = RSM_STATE_EXPORT; 9340 cv_broadcast(&segp->s_cv); 9341 rsmseglock_release(segp); 9342 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done: exp_qscd\n", 9343 function)); 9344 return; 9345 } 9346 9347 rsmseglock_release(segp); 9348 9349 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9350 } 9351 9352 static void 9353 rsm_quiesce_imp_seg(rsmresource_t *resp) 9354 { 9355 rsmseg_t *segp = (rsmseg_t *)resp; 9356 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9357 DBG_DEFINE_STR(function, "rsm_quiesce_imp_seg"); 9358 9359 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9360 "%s enter: key=%u\n", function, segp->s_key)); 9361 9362 rsmseglock_acquire(segp); 9363 segp->s_flags |= RSM_DR_INPROGRESS; 9364 9365 while (segp->s_rdmacnt != 0) { 9366 /* wait for the RDMA to complete */ 9367 cv_wait(&segp->s_cv, &segp->s_lock); 9368 } 9369 9370 rsmseglock_release(segp); 9371 9372 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9373 9374 } 9375 9376 static void 9377 rsm_unquiesce_imp_seg(rsmresource_t *resp) 9378 { 9379 rsmseg_t *segp = (rsmseg_t *)resp; 9380 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9381 DBG_DEFINE_STR(function, "rsm_unquiesce_imp_seg"); 9382 9383 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9384 "%s enter: key=%u\n", function, segp->s_key)); 9385 9386 rsmseglock_acquire(segp); 9387 9388 segp->s_flags &= ~RSM_DR_INPROGRESS; 9389 /* wake up any waiting putv/getv ops */ 9390 cv_broadcast(&segp->s_cv); 9391 9392 rsmseglock_release(segp); 9393 9394 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, "%s done\n", function)); 9395 9396 9397 } 9398 9399 static void 9400 rsm_process_exp_seg(rsmresource_t *resp, int event) 9401 { 9402 if (event == RSM_DR_QUIESCE) 9403 rsm_quiesce_exp_seg(resp); 9404 else /* UNQUIESCE */ 9405 rsm_unquiesce_exp_seg(resp); 9406 } 9407 9408 static void 9409 rsm_process_imp_seg(rsmresource_t *resp, int event) 9410 { 9411 if (event == RSM_DR_QUIESCE) 9412 rsm_quiesce_imp_seg(resp); 9413 else /* UNQUIESCE */ 9414 rsm_unquiesce_imp_seg(resp); 9415 } 9416 9417 static void 9418 rsm_dr_process_local_segments(int event) 9419 { 9420 9421 int i, j; 9422 rsmresource_blk_t *blk; 9423 rsmresource_t *p; 9424 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9425 9426 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9427 "rsm_dr_process_local_segments enter\n")); 9428 9429 /* iterate through the resource structure */ 9430 9431 rw_enter(&rsm_resource.rsmrc_lock, RW_READER); 9432 9433 for (i = 0; i < rsm_resource.rsmrc_len; i++) { 9434 blk = rsm_resource.rsmrc_root[i]; 9435 if (blk != NULL) { 9436 for (j = 0; j < RSMRC_BLKSZ; j++) { 9437 p = blk->rsmrcblk_blks[j]; 9438 if ((p != NULL) && (p != RSMRC_RESERVED)) { 9439 /* valid resource */ 9440 if (p->rsmrc_type == 9441 RSM_RESOURCE_EXPORT_SEGMENT) 9442 rsm_process_exp_seg(p, event); 9443 else if (p->rsmrc_type == 9444 RSM_RESOURCE_IMPORT_SEGMENT) 9445 rsm_process_imp_seg(p, event); 9446 } 9447 } 9448 } 9449 } 9450 9451 rw_exit(&rsm_resource.rsmrc_lock); 9452 9453 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9454 "rsm_dr_process_local_segments done\n")); 9455 } 9456 9457 /* *************** DR callback functions ************ */ 9458 static void 9459 rsm_dr_callback_post_add(void *arg, pgcnt_t delta /* ARGSUSED */) 9460 { 9461 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9462 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9463 "rsm_dr_callback_post_add is a no-op\n")); 9464 /* Noop */ 9465 } 9466 9467 static int 9468 rsm_dr_callback_pre_del(void *arg, pgcnt_t delta /* ARGSUSED */) 9469 { 9470 int recheck_state = 0; 9471 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9472 9473 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9474 "rsm_dr_callback_pre_del enter\n")); 9475 9476 mutex_enter(&rsm_drv_data.drv_lock); 9477 9478 do { 9479 recheck_state = 0; 9480 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9481 "rsm_dr_callback_pre_del:state=%d\n", 9482 rsm_drv_data.drv_state)); 9483 9484 switch (rsm_drv_data.drv_state) { 9485 case RSM_DRV_NEW: 9486 /* 9487 * The state should usually never be RSM_DRV_NEW 9488 * since in this state the callbacks have not yet 9489 * been registered. So, ASSERT. 9490 */ 9491 ASSERT(0); 9492 return (0); 9493 case RSM_DRV_REG_PROCESSING: 9494 /* 9495 * The driver is in the process of registering 9496 * with the DR framework. So, wait till the 9497 * registration process is complete. 9498 */ 9499 recheck_state = 1; 9500 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9501 break; 9502 case RSM_DRV_UNREG_PROCESSING: 9503 /* 9504 * If the state is RSM_DRV_UNREG_PROCESSING, the 9505 * module is in the process of detaching and 9506 * unregistering the callbacks from the DR 9507 * framework. So, simply return. 9508 */ 9509 mutex_exit(&rsm_drv_data.drv_lock); 9510 DBG_PRINTF((category, RSM_DEBUG, 9511 "rsm_dr_callback_pre_del:" 9512 "pre-del on NEW/UNREG\n")); 9513 return (0); 9514 case RSM_DRV_OK: 9515 rsm_drv_data.drv_state = RSM_DRV_PREDEL_STARTED; 9516 break; 9517 case RSM_DRV_PREDEL_STARTED: 9518 /* FALLTHRU */ 9519 case RSM_DRV_PREDEL_COMPLETED: 9520 /* FALLTHRU */ 9521 case RSM_DRV_POSTDEL_IN_PROGRESS: 9522 recheck_state = 1; 9523 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9524 break; 9525 case RSM_DRV_DR_IN_PROGRESS: 9526 rsm_drv_data.drv_memdel_cnt++; 9527 mutex_exit(&rsm_drv_data.drv_lock); 9528 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9529 "rsm_dr_callback_pre_del done\n")); 9530 return (0); 9531 /* break; */ 9532 default: 9533 ASSERT(0); 9534 break; 9535 } 9536 9537 } while (recheck_state); 9538 9539 rsm_drv_data.drv_memdel_cnt++; 9540 9541 mutex_exit(&rsm_drv_data.drv_lock); 9542 9543 /* Do all the quiescing stuff here */ 9544 DBG_PRINTF((category, RSM_DEBUG, 9545 "rsm_dr_callback_pre_del: quiesce things now\n")); 9546 9547 rsm_dr_process_local_segments(RSM_DR_QUIESCE); 9548 9549 /* 9550 * now that all local segments have been quiesced lets inform 9551 * the importers 9552 */ 9553 rsm_send_suspend(); 9554 9555 /* 9556 * In response to the suspend message the remote node(s) will process 9557 * the segments and send a suspend_complete message. Till all 9558 * the nodes send the suspend_complete message we wait in the 9559 * RSM_DRV_PREDEL_STARTED state. In the exporter_quiesce 9560 * function we transition to the RSM_DRV_PREDEL_COMPLETED state. 9561 */ 9562 mutex_enter(&rsm_drv_data.drv_lock); 9563 9564 while (rsm_drv_data.drv_state == RSM_DRV_PREDEL_STARTED) { 9565 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9566 } 9567 9568 ASSERT(rsm_drv_data.drv_state == RSM_DRV_PREDEL_COMPLETED); 9569 9570 rsm_drv_data.drv_state = RSM_DRV_DR_IN_PROGRESS; 9571 cv_broadcast(&rsm_drv_data.drv_cv); 9572 9573 mutex_exit(&rsm_drv_data.drv_lock); 9574 9575 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9576 "rsm_dr_callback_pre_del done\n")); 9577 9578 return (0); 9579 } 9580 9581 static void 9582 rsm_dr_callback_post_del(void *arg, pgcnt_t delta, int cancelled /* ARGSUSED */) 9583 { 9584 int recheck_state = 0; 9585 DBG_DEFINE(category, RSM_KERNEL_AGENT | RSM_FUNC_ALL); 9586 9587 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9588 "rsm_dr_callback_post_del enter\n")); 9589 9590 mutex_enter(&rsm_drv_data.drv_lock); 9591 9592 do { 9593 recheck_state = 0; 9594 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9595 "rsm_dr_callback_post_del:state=%d\n", 9596 rsm_drv_data.drv_state)); 9597 9598 switch (rsm_drv_data.drv_state) { 9599 case RSM_DRV_NEW: 9600 /* 9601 * The driver state cannot not be RSM_DRV_NEW 9602 * since in this state the callbacks have not 9603 * yet been registered. 9604 */ 9605 ASSERT(0); 9606 return; 9607 case RSM_DRV_REG_PROCESSING: 9608 /* 9609 * The driver is in the process of registering with 9610 * the DR framework. Wait till the registration is 9611 * complete. 9612 */ 9613 recheck_state = 1; 9614 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9615 break; 9616 case RSM_DRV_UNREG_PROCESSING: 9617 /* 9618 * RSM_DRV_UNREG_PROCESSING state means the module 9619 * is detaching and unregistering the callbacks 9620 * from the DR framework. So simply return. 9621 */ 9622 /* FALLTHRU */ 9623 case RSM_DRV_OK: 9624 /* 9625 * RSM_DRV_OK means we missed the pre-del 9626 * corresponding to this post-del coz we had not 9627 * registered yet, so simply return. 9628 */ 9629 mutex_exit(&rsm_drv_data.drv_lock); 9630 DBG_PRINTF((category, RSM_DEBUG, 9631 "rsm_dr_callback_post_del:" 9632 "post-del on OK/UNREG\n")); 9633 return; 9634 /* break; */ 9635 case RSM_DRV_PREDEL_STARTED: 9636 /* FALLTHRU */ 9637 case RSM_DRV_PREDEL_COMPLETED: 9638 /* FALLTHRU */ 9639 case RSM_DRV_POSTDEL_IN_PROGRESS: 9640 recheck_state = 1; 9641 cv_wait(&rsm_drv_data.drv_cv, &rsm_drv_data.drv_lock); 9642 break; 9643 case RSM_DRV_DR_IN_PROGRESS: 9644 rsm_drv_data.drv_memdel_cnt--; 9645 if (rsm_drv_data.drv_memdel_cnt > 0) { 9646 mutex_exit(&rsm_drv_data.drv_lock); 9647 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9648 "rsm_dr_callback_post_del done:\n")); 9649 return; 9650 } 9651 rsm_drv_data.drv_state = RSM_DRV_POSTDEL_IN_PROGRESS; 9652 break; 9653 default: 9654 ASSERT(0); 9655 return; 9656 /* break; */ 9657 } 9658 } while (recheck_state); 9659 9660 mutex_exit(&rsm_drv_data.drv_lock); 9661 9662 /* Do all the unquiescing stuff here */ 9663 DBG_PRINTF((category, RSM_DEBUG, 9664 "rsm_dr_callback_post_del: unquiesce things now\n")); 9665 9666 rsm_dr_process_local_segments(RSM_DR_UNQUIESCE); 9667 9668 /* 9669 * now that all local segments have been unquiesced lets inform 9670 * the importers 9671 */ 9672 rsm_send_resume(); 9673 9674 mutex_enter(&rsm_drv_data.drv_lock); 9675 9676 rsm_drv_data.drv_state = RSM_DRV_OK; 9677 9678 cv_broadcast(&rsm_drv_data.drv_cv); 9679 9680 mutex_exit(&rsm_drv_data.drv_lock); 9681 9682 DBG_PRINTF((category, RSM_DEBUG_VERBOSE, 9683 "rsm_dr_callback_post_del done\n")); 9684 9685 return; 9686 9687 } 9688